summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2024-02-14 10:45:07 +0100
committerIngo Molnar <mingo@kernel.org>2024-02-14 10:45:07 +0100
commit03c11eb3b16dc0058589751dfd91f254be2be613 (patch)
treee5f2889212fec0bb0babdce9abd781ab487e246a /net
parentde8c6a352131f642b82474abe0cbb5dd26a7e081 (diff)
parent841c35169323cd833294798e58b9bf63fa4fa1de (diff)
Merge tag 'v6.8-rc4' into x86/percpu, to resolve conflicts and refresh the branch
Conflicts: arch/x86/include/asm/percpu.h arch/x86/include/asm/text-patching.h Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/802/fddi.c1
-rw-r--r--net/802/garp.c1
-rw-r--r--net/802/mrp.c1
-rw-r--r--net/802/p8022.c1
-rw-r--r--net/802/psnap.c1
-rw-r--r--net/802/stp.c1
-rw-r--r--net/8021q/vlan.c1
-rw-r--r--net/8021q/vlan_core.c9
-rw-r--r--net/8021q/vlan_dev.c15
-rw-r--r--net/8021q/vlan_netlink.c4
-rw-r--r--net/9p/client.c8
-rw-r--r--net/9p/protocol.c17
-rw-r--r--net/9p/trans_fd.c21
-rw-r--r--net/9p/trans_xen.c15
-rw-r--r--net/Kconfig13
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/Kconfig30
-rw-r--r--net/appletalk/aarp.c2
-rw-r--r--net/appletalk/ddp.c61
-rw-r--r--net/atm/atm_sysfs.c2
-rw-r--r--net/atm/common.c1
-rw-r--r--net/atm/ioctl.c7
-rw-r--r--net/atm/lec.c1
-rw-r--r--net/ax25/Kconfig16
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/batman-adv/Makefile1
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c2
-rw-r--r--net/batman-adv/fragmentation.c8
-rw-r--r--net/batman-adv/gateway_client.c2
-rw-r--r--net/batman-adv/main.c5
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c132
-rw-r--r--net/batman-adv/multicast.h30
-rw-r--r--net/batman-adv/multicast_forw.c1178
-rw-r--r--net/batman-adv/netlink.c2
-rw-r--r--net/batman-adv/originator.c28
-rw-r--r--net/batman-adv/originator.h3
-rw-r--r--net/batman-adv/routing.c70
-rw-r--r--net/batman-adv/routing.h11
-rw-r--r--net/batman-adv/soft-interface.c18
-rw-r--r--net/batman-adv/types.h70
-rw-r--r--net/bluetooth/af_bluetooth.c7
-rw-r--r--net/bluetooth/amp.c3
-rw-r--r--net/bluetooth/hci_conn.c246
-rw-r--r--net/bluetooth/hci_core.c17
-rw-r--r--net/bluetooth/hci_debugfs.c12
-rw-r--r--net/bluetooth/hci_event.c182
-rw-r--r--net/bluetooth/hci_request.h2
-rw-r--r--net/bluetooth/hci_sock.c3
-rw-r--r--net/bluetooth/hci_sync.c174
-rw-r--r--net/bluetooth/hci_sysfs.c23
-rw-r--r--net/bluetooth/iso.c242
-rw-r--r--net/bluetooth/l2cap_core.c24
-rw-r--r--net/bluetooth/l2cap_sock.c2
-rw-r--r--net/bluetooth/lib.c69
-rw-r--r--net/bluetooth/mgmt.c42
-rw-r--r--net/bluetooth/msft.c20
-rw-r--r--net/bluetooth/rfcomm/tty.c2
-rw-r--r--net/bluetooth/smp.c10
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c38
-rw-r--r--net/bpf/test_run.c23
-rw-r--r--net/bpfilter/.gitignore2
-rw-r--r--net/bpfilter/Kconfig23
-rw-r--r--net/bpfilter/Makefile20
-rw-r--r--net/bpfilter/bpfilter_kern.c136
-rw-r--r--net/bpfilter/bpfilter_umh_blob.S7
-rw-r--r--net/bpfilter/main.c64
-rw-r--r--net/bpfilter/msgfmt.h17
-rw-r--r--net/bridge/br.c1
-rw-r--r--net/bridge/br_cfm_netlink.c2
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_fdb.c71
-rw-r--r--net/bridge/br_forward.c4
-rw-r--r--net/bridge/br_input.c6
-rw-r--r--net/bridge/br_mdb.c317
-rw-r--r--net/bridge/br_multicast.c25
-rw-r--r--net/bridge/br_netfilter_hooks.c142
-rw-r--r--net/bridge/br_netfilter_ipv6.c20
-rw-r--r--net/bridge/br_netlink.c17
-rw-r--r--net/bridge/br_private.h40
-rw-r--r--net/bridge/netfilter/ebtable_broute.c1
-rw-r--r--net/bridge/netfilter/ebtable_filter.c1
-rw-r--r--net/bridge/netfilter/ebtable_nat.c1
-rw-r--r--net/bridge/netfilter/ebtables.c1
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c3
-rw-r--r--net/caif/caif_dev.c1
-rw-r--r--net/caif/caif_socket.c1
-rw-r--r--net/caif/caif_usb.c1
-rw-r--r--net/caif/chnl_net.c1
-rw-r--r--net/can/isotp.c19
-rw-r--r--net/can/j1939/socket.c12
-rw-r--r--net/can/raw.c5
-rw-r--r--net/ceph/messenger.c82
-rw-r--r--net/ceph/messenger_v1.c97
-rw-r--r--net/ceph/messenger_v2.c293
-rw-r--r--net/ceph/mon_client.c2
-rw-r--r--net/ceph/osd_client.c334
-rw-r--r--net/compat.c2
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/bpf_sk_storage.c3
-rw-r--r--net/core/datagram.c75
-rw-r--r--net/core/dev.c397
-rw-r--r--net/core/dev.h15
-rw-r--r--net/core/dev_addr_lists_test.c1
-rw-r--r--net/core/dev_ioctl.c9
-rw-r--r--net/core/drop_monitor.c4
-rw-r--r--net/core/dst.c10
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/filter.c207
-rw-r--r--net/core/flow_dissector.c5
-rw-r--r--net/core/gso_test.c274
-rw-r--r--net/core/link_watch.c8
-rw-r--r--net/core/neighbour.c90
-rw-r--r--net/core/net-sysfs.c17
-rw-r--r--net/core/net_namespace.c49
-rw-r--r--net/core/netclassid_cgroup.c6
-rw-r--r--net/core/netdev-genl-gen.c110
-rw-r--r--net/core/netdev-genl-gen.h16
-rw-r--r--net/core/netdev-genl.c354
-rw-r--r--net/core/page_pool.c150
-rw-r--r--net/core/page_pool_priv.h12
-rw-r--r--net/core/page_pool_user.c410
-rw-r--r--net/core/pktgen.c125
-rw-r--r--net/core/request_sock.c3
-rw-r--r--net/core/rtnetlink.c254
-rw-r--r--net/core/scm.c8
-rw-r--r--net/core/selftests.c10
-rw-r--r--net/core/skbuff.c218
-rw-r--r--net/core/skmsg.c14
-rw-r--r--net/core/sock.c290
-rw-r--r--net/core/sock_map.c42
-rw-r--r--net/core/stream.c14
-rw-r--r--net/core/sysctl_net_core.c15
-rw-r--r--net/core/xdp.c43
-rw-r--r--net/dccp/ipv4.c17
-rw-r--r--net/dccp/ipv6.c29
-rw-r--r--net/dccp/timer.c4
-rw-r--r--net/devlink/core.c227
-rw-r--r--net/devlink/dev.c97
-rw-r--r--net/devlink/devl_internal.h178
-rw-r--r--net/devlink/dpipe.c14
-rw-r--r--net/devlink/health.c454
-rw-r--r--net/devlink/linecard.c88
-rw-r--r--net/devlink/netlink.c517
-rw-r--r--net/devlink/netlink_gen.c771
-rw-r--r--net/devlink/netlink_gen.h71
-rw-r--r--net/devlink/param.c19
-rw-r--r--net/devlink/port.c76
-rw-r--r--net/devlink/rate.c11
-rw-r--r--net/devlink/region.c17
-rw-r--r--net/devlink/resource.c4
-rw-r--r--net/devlink/sb.c17
-rw-r--r--net/devlink/trap.c27
-rw-r--r--net/dns_resolver/Kconfig2
-rw-r--r--net/dns_resolver/dns_key.c25
-rw-r--r--net/dsa/Makefile6
-rw-r--r--net/dsa/conduit.c (renamed from net/dsa/master.c)118
-rw-r--r--net/dsa/conduit.h22
-rw-r--r--net/dsa/dsa.c224
-rw-r--r--net/dsa/dsa.h12
-rw-r--r--net/dsa/master.h22
-rw-r--r--net/dsa/netlink.c22
-rw-r--r--net/dsa/port.c144
-rw-r--r--net/dsa/port.h7
-rw-r--r--net/dsa/slave.h69
-rw-r--r--net/dsa/switch.c20
-rw-r--r--net/dsa/switch.h8
-rw-r--r--net/dsa/tag.c10
-rw-r--r--net/dsa/tag.h26
-rw-r--r--net/dsa/tag_8021q.c22
-rw-r--r--net/dsa/tag_8021q.h2
-rw-r--r--net/dsa/tag_ar9331.c5
-rw-r--r--net/dsa/tag_brcm.c15
-rw-r--r--net/dsa/tag_dsa.c7
-rw-r--r--net/dsa/tag_gswip.c5
-rw-r--r--net/dsa/tag_hellcreek.c5
-rw-r--r--net/dsa/tag_ksz.c21
-rw-r--r--net/dsa/tag_lan9303.c5
-rw-r--r--net/dsa/tag_mtk.c5
-rw-r--r--net/dsa/tag_none.c7
-rw-r--r--net/dsa/tag_ocelot.c23
-rw-r--r--net/dsa/tag_ocelot_8021q.c13
-rw-r--r--net/dsa/tag_qca.c7
-rw-r--r--net/dsa/tag_rtl4_a.c12
-rw-r--r--net/dsa/tag_rtl8_4.c7
-rw-r--r--net/dsa/tag_rzn1_a5psw.c5
-rw-r--r--net/dsa/tag_sja1105.c31
-rw-r--r--net/dsa/tag_trailer.c5
-rw-r--r--net/dsa/tag_xrs700x.c5
-rw-r--r--net/dsa/user.c (renamed from net/dsa/slave.c)1503
-rw-r--r--net/dsa/user.h69
-rw-r--r--net/ethtool/common.c39
-rw-r--r--net/ethtool/features.c9
-rw-r--r--net/ethtool/ioctl.c198
-rw-r--r--net/ethtool/netlink.c1
-rw-r--r--net/ethtool/plca.c45
-rw-r--r--net/ethtool/rings.c12
-rw-r--r--net/ethtool/rss.c24
-rw-r--r--net/handshake/genl.c2
-rw-r--r--net/handshake/handshake-test.c14
-rw-r--r--net/handshake/netlink.c50
-rw-r--r--net/handshake/tlshd.c6
-rw-r--r--net/hsr/hsr_device.c71
-rw-r--r--net/hsr/hsr_forward.c5
-rw-r--r--net/hsr/hsr_framereg.c4
-rw-r--r--net/hsr/hsr_main.c1
-rw-r--r--net/hsr/hsr_main.h2
-rw-r--r--net/ieee802154/Makefile2
-rw-r--r--net/ieee802154/core.c24
-rw-r--r--net/ieee802154/nl802154.c249
-rw-r--r--net/ieee802154/pan.c109
-rw-r--r--net/ieee802154/rdev-ops.h30
-rw-r--r--net/ieee802154/trace.h38
-rw-r--r--net/ife/ife.c1
-rw-r--r--net/ipv4/Kconfig17
-rw-r--r--net/ipv4/Makefile4
-rw-r--r--net/ipv4/af_inet.c33
-rw-r--r--net/ipv4/ah4.c19
-rw-r--r--net/ipv4/bpf_tcp_ca.c69
-rw-r--r--net/ipv4/bpfilter/Makefile2
-rw-r--r--net/ipv4/bpfilter/sockopt.c71
-rw-r--r--net/ipv4/datagram.c6
-rw-r--r--net/ipv4/devinet.c10
-rw-r--r--net/ipv4/esp4.c6
-rw-r--r--net/ipv4/esp4_offload.c6
-rw-r--r--net/ipv4/fib_rules.c6
-rw-r--r--net/ipv4/fib_semantics.c20
-rw-r--r--net/ipv4/fib_trie.c8
-rw-r--r--net/ipv4/fou_bpf.c6
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/igmp.c11
-rw-r--r--net/ipv4/inet_connection_sock.c126
-rw-r--r--net/ipv4/inet_diag.c91
-rw-r--r--net/ipv4/inet_hashtables.c145
-rw-r--r--net/ipv4/inet_timewait_sock.c21
-rw-r--r--net/ipv4/ip_forward.c5
-rw-r--r--net/ipv4/ip_gre.c11
-rw-r--r--net/ipv4/ip_input.c3
-rw-r--r--net/ipv4/ip_output.c38
-rw-r--r--net/ipv4/ip_sockglue.c246
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ip_vti.c4
-rw-r--r--net/ipv4/ipmr.c16
-rw-r--r--net/ipv4/netfilter.c2
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c9
-rw-r--r--net/ipv4/netfilter/iptable_nat.c1
-rw-r--r--net/ipv4/netfilter/iptable_raw.c1
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.asn18
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c10
-rw-r--r--net/ipv4/ping.c15
-rw-r--r--net/ipv4/proc.c8
-rw-r--r--net/ipv4/raw.c21
-rw-r--r--net/ipv4/raw_diag.c1
-rw-r--r--net/ipv4/route.c63
-rw-r--r--net/ipv4/syncookies.c239
-rw-r--r--net/ipv4/sysctl_net_ipv4.c35
-rw-r--r--net/ipv4/tcp.c459
-rw-r--r--net/ipv4/tcp_ao.c2408
-rw-r--r--net/ipv4/tcp_bbr.c13
-rw-r--r--net/ipv4/tcp_bpf.c16
-rw-r--r--net/ipv4/tcp_diag.c1
-rw-r--r--net/ipv4/tcp_input.c298
-rw-r--r--net/ipv4/tcp_ipv4.c385
-rw-r--r--net/ipv4/tcp_lp.c2
-rw-r--r--net/ipv4/tcp_metrics.c22
-rw-r--r--net/ipv4/tcp_minisocks.c69
-rw-r--r--net/ipv4/tcp_output.c360
-rw-r--r--net/ipv4/tcp_recovery.c2
-rw-r--r--net/ipv4/tcp_sigpool.c357
-rw-r--r--net/ipv4/tcp_timer.c67
-rw-r--r--net/ipv4/udp.c157
-rw-r--r--net/ipv4/udp_diag.c1
-rw-r--r--net/ipv4/udp_offload.c4
-rw-r--r--net/ipv4/udp_tunnel_core.c51
-rw-r--r--net/ipv4/udp_tunnel_nic.c11
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv4/xfrm4_input.c99
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c47
-rw-r--r--net/ipv6/addrconf_core.c21
-rw-r--r--net/ipv6/af_inet6.c23
-rw-r--r--net/ipv6/ah6.c19
-rw-r--r--net/ipv6/datagram.c21
-rw-r--r--net/ipv6/esp6.c6
-rw-r--r--net/ipv6/esp6_offload.c10
-rw-r--r--net/ipv6/exthdrs_offload.c11
-rw-r--r--net/ipv6/fib6_rules.c4
-rw-r--r--net/ipv6/icmp.c14
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/ioam6_iptunnel.c2
-rw-r--r--net/ipv6/ip6_fib.c61
-rw-r--r--net/ipv6/ip6_flowlabel.c8
-rw-r--r--net/ipv6/ip6_input.c3
-rw-r--r--net/ipv6/ip6_offload.c76
-rw-r--r--net/ipv6/ip6_output.c174
-rw-r--r--net/ipv6/ip6_tunnel.c47
-rw-r--r--net/ipv6/ip6_udp_tunnel.c75
-rw-r--r--net/ipv6/ip6_vti.c4
-rw-r--r--net/ipv6/ip6mr.c4
-rw-r--r--net/ipv6/ipv6_sockglue.c378
-rw-r--r--net/ipv6/mcast.c15
-rw-r--r--net/ipv6/ndisc.c6
-rw-r--r--net/ipv6/netfilter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c9
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c1
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c1
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c1
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c12
-rw-r--r--net/ipv6/ping.c16
-rw-r--r--net/ipv6/proc.c3
-rw-r--r--net/ipv6/raw.c24
-rw-r--r--net/ipv6/route.c15
-rw-r--r--net/ipv6/syncookies.c108
-rw-r--r--net/ipv6/tcp_ao.c168
-rw-r--r--net/ipv6/tcp_ipv6.c417
-rw-r--r--net/ipv6/udp.c72
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_input.c107
-rw-r--r--net/ipv6/xfrm6_output.c2
-rw-r--r--net/ipv6/xfrm6_policy.c4
-rw-r--r--net/iucv/iucv.c6
-rw-r--r--net/kcm/kcmsock.c18
-rw-r--r--net/l2tp/l2tp_core.c6
-rw-r--r--net/l2tp/l2tp_eth.c34
-rw-r--r--net/l2tp/l2tp_ip6.c12
-rw-r--r--net/llc/af_llc.c26
-rw-r--r--net/llc/llc_core.c7
-rw-r--r--net/llc/llc_input.c10
-rw-r--r--net/llc/llc_s_ac.c3
-rw-r--r--net/llc/llc_station.c3
-rw-r--r--net/mac80211/Kconfig12
-rw-r--r--net/mac80211/Makefile4
-rw-r--r--net/mac80211/agg-rx.c63
-rw-r--r--net/mac80211/agg-tx.c63
-rw-r--r--net/mac80211/airtime.c10
-rw-r--r--net/mac80211/cfg.c508
-rw-r--r--net/mac80211/chan.c169
-rw-r--r--net/mac80211/debugfs.c12
-rw-r--r--net/mac80211/debugfs_key.c20
-rw-r--r--net/mac80211/debugfs_netdev.c246
-rw-r--r--net/mac80211/debugfs_netdev.h14
-rw-r--r--net/mac80211/debugfs_sta.c76
-rw-r--r--net/mac80211/driver-ops.c64
-rw-r--r--net/mac80211/driver-ops.h190
-rw-r--r--net/mac80211/drop.h49
-rw-r--r--net/mac80211/ethtool.c20
-rw-r--r--net/mac80211/ht.c59
-rw-r--r--net/mac80211/ibss.c108
-rw-r--r--net/mac80211/ieee80211_i.h262
-rw-r--r--net/mac80211/iface.c180
-rw-r--r--net/mac80211/key.c168
-rw-r--r--net/mac80211/key.h11
-rw-r--r--net/mac80211/link.c66
-rw-r--r--net/mac80211/main.c95
-rw-r--r--net/mac80211/mesh.c32
-rw-r--r--net/mac80211/mesh_hwmp.c4
-rw-r--r--net/mac80211/mesh_pathtbl.c30
-rw-r--r--net/mac80211/mesh_plink.c24
-rw-r--r--net/mac80211/mesh_ps.c6
-rw-r--r--net/mac80211/mesh_sync.c4
-rw-r--r--net/mac80211/mlme.c913
-rw-r--r--net/mac80211/ocb.c19
-rw-r--r--net/mac80211/offchannel.c120
-rw-r--r--net/mac80211/pm.c13
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c7
-rw-r--r--net/mac80211/rx.c135
-rw-r--r--net/mac80211/s1g.c15
-rw-r--r--net/mac80211/scan.c306
-rw-r--r--net/mac80211/spectmgmt.c13
-rw-r--r--net/mac80211/sta_info.c186
-rw-r--r--net/mac80211/sta_info.h28
-rw-r--r--net/mac80211/status.c111
-rw-r--r--net/mac80211/tdls.c104
-rw-r--r--net/mac80211/tests/Makefile3
-rw-r--r--net/mac80211/tests/elems.c101
-rw-r--r--net/mac80211/tests/mfp.c286
-rw-r--r--net/mac80211/tests/module.c10
-rw-r--r--net/mac80211/trace.h36
-rw-r--r--net/mac80211/tx.c92
-rw-r--r--net/mac80211/util.c279
-rw-r--r--net/mac80211/vht.c16
-rw-r--r--net/mac80211/wbrf.c93
-rw-r--r--net/mac80211/wep.c9
-rw-r--r--net/mac80211/wpa.c42
-rw-r--r--net/mac802154/cfg.c175
-rw-r--r--net/mac802154/ieee802154_i.h27
-rw-r--r--net/mac802154/main.c2
-rw-r--r--net/mac802154/rx.c36
-rw-r--r--net/mac802154/scan.c407
-rw-r--r--net/mctp/route.c22
-rw-r--r--net/mptcp/Makefile3
-rw-r--r--net/mptcp/crypto_test.c1
-rw-r--r--net/mptcp/ctrl.c16
-rw-r--r--net/mptcp/fastopen.c1
-rw-r--r--net/mptcp/mib.c1
-rw-r--r--net/mptcp/mib.h8
-rw-r--r--net/mptcp/mptcp_diag.c1
-rw-r--r--net/mptcp/mptcp_pm_gen.c179
-rw-r--r--net/mptcp/mptcp_pm_gen.h58
-rw-r--r--net/mptcp/options.c12
-rw-r--r--net/mptcp/pm.c2
-rw-r--r--net/mptcp/pm_netlink.c126
-rw-r--r--net/mptcp/pm_userspace.c101
-rw-r--r--net/mptcp/protocol.c504
-rw-r--r--net/mptcp/protocol.h170
-rw-r--r--net/mptcp/sockopt.c105
-rw-r--r--net/mptcp/subflow.c153
-rw-r--r--net/mptcp/token_test.c1
-rw-r--r--net/ncsi/internal.h7
-rw-r--r--net/ncsi/ncsi-cmd.c3
-rw-r--r--net/ncsi/ncsi-manage.c29
-rw-r--r--net/ncsi/ncsi-netlink.c4
-rw-r--r--net/ncsi/ncsi-pkt.h17
-rw-r--r--net/ncsi/ncsi-rsp.c67
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h16
-rw-r--r--net/netfilter/ipset/ip_set_core.c59
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h20
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c1
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_fo.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_lc.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_ovf.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c26
-rw-r--r--net/netfilter/ipvs/ip_vs_twos.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c4
-rw-r--r--net/netfilter/nf_bpf_link.c10
-rw-r--r--net/netfilter/nf_conntrack_bpf.c8
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c76
-rw-r--r--net/netfilter/nf_conntrack_extend.c4
-rw-r--r--net/netfilter/nf_conntrack_helper.c7
-rw-r--r--net/netfilter/nf_conntrack_labels.c17
-rw-r--r--net/netfilter/nf_conntrack_netlink.c19
-rw-r--r--net/netfilter/nf_conntrack_proto.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c45
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c17
-rw-r--r--net/netfilter/nf_flow_table_core.c14
-rw-r--r--net/netfilter/nf_log.c7
-rw-r--r--net/netfilter/nf_log_syslog.c13
-rw-r--r--net/netfilter/nf_nat_bpf.c6
-rw-r--r--net/netfilter/nf_nat_core.c1
-rw-r--r--net/netfilter/nf_nat_ovs.c3
-rw-r--r--net/netfilter/nf_nat_proto.c71
-rw-r--r--net/netfilter/nf_nat_redirect.c27
-rw-r--r--net/netfilter/nf_queue.c6
-rw-r--r--net/netfilter/nf_synproxy_core.c6
-rw-r--r--net/netfilter/nf_tables_api.c1026
-rw-r--r--net/netfilter/nf_tables_core.c10
-rw-r--r--net/netfilter/nf_tables_trace.c8
-rw-r--r--net/netfilter/nfnetlink_log.c10
-rw-r--r--net/netfilter/nfnetlink_osf.c9
-rw-r--r--net/netfilter/nfnetlink_queue.c22
-rw-r--r--net/netfilter/nft_byteorder.c5
-rw-r--r--net/netfilter/nft_chain_filter.c11
-rw-r--r--net/netfilter/nft_chain_nat.c1
-rw-r--r--net/netfilter/nft_compat.c29
-rw-r--r--net/netfilter/nft_ct.c27
-rw-r--r--net/netfilter/nft_dynset.c36
-rw-r--r--net/netfilter/nft_exthdr.c46
-rw-r--r--net/netfilter/nft_fib.c9
-rw-r--r--net/netfilter/nft_flow_offload.c5
-rw-r--r--net/netfilter/nft_fwd_netdev.c1
-rw-r--r--net/netfilter/nft_immediate.c2
-rw-r--r--net/netfilter/nft_inner.c1
-rw-r--r--net/netfilter/nft_limit.c42
-rw-r--r--net/netfilter/nft_meta.c2
-rw-r--r--net/netfilter/nft_nat.c5
-rw-r--r--net/netfilter/nft_payload.c15
-rw-r--r--net/netfilter/nft_rt.c5
-rw-r--r--net/netfilter/nft_set_bitmap.c53
-rw-r--r--net/netfilter/nft_set_hash.c128
-rw-r--r--net/netfilter/nft_set_pipapo.c211
-rw-r--r--net/netfilter/nft_set_pipapo.h24
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c19
-rw-r--r--net/netfilter/nft_set_rbtree.c265
-rw-r--r--net/netfilter/nft_socket.c5
-rw-r--r--net/netfilter/nft_synproxy.c7
-rw-r--r--net/netfilter/nft_tproxy.c5
-rw-r--r--net/netfilter/nft_tunnel.c1
-rw-r--r--net/netfilter/nft_xfrm.c5
-rw-r--r--net/netfilter/xt_owner.c16
-rw-r--r--net/netfilter/xt_physdev.c2
-rw-r--r--net/netfilter/xt_recent.c2
-rw-r--r--net/netfilter/xt_sctp.c2
-rw-r--r--net/netfilter/xt_u32.c21
-rw-r--r--net/netlabel/netlabel_calipso.c49
-rw-r--r--net/netlink/af_netlink.c16
-rw-r--r--net/netlink/diag.c1
-rw-r--r--net/netlink/genetlink.c152
-rw-r--r--net/netlink/policy.c29
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/nfc/digital_core.c1
-rw-r--r--net/nfc/llcp_core.c71
-rw-r--r--net/nfc/llcp_sock.c5
-rw-r--r--net/nfc/nci/core.c10
-rw-r--r--net/nfc/nci/spi.c3
-rw-r--r--net/openvswitch/actions.c27
-rw-r--r--net/openvswitch/conntrack.c2
-rw-r--r--net/openvswitch/flow_table.c7
-rw-r--r--net/openvswitch/flow_table.h2
-rw-r--r--net/openvswitch/meter.h4
-rw-r--r--net/packet/af_packet.c44
-rw-r--r--net/packet/diag.c1
-rw-r--r--net/packet/internal.h4
-rw-r--r--net/psample/psample.c3
-rw-r--r--net/qrtr/ns.c4
-rw-r--r--net/rds/af_rds.c2
-rw-r--r--net/rds/rdma_transport.c12
-rw-r--r--net/rds/tcp_connect.c4
-rw-r--r--net/rds/tcp_listen.c4
-rw-r--r--net/rfkill/core.c41
-rw-r--r--net/rfkill/rfkill-gpio.c12
-rw-r--r--net/rose/af_rose.c45
-rw-r--r--net/rxrpc/af_rxrpc.c62
-rw-r--r--net/rxrpc/ar-internal.h44
-rw-r--r--net/rxrpc/call_event.c12
-rw-r--r--net/rxrpc/call_object.c22
-rw-r--r--net/rxrpc/conn_client.c17
-rw-r--r--net/rxrpc/conn_event.c10
-rw-r--r--net/rxrpc/conn_object.c2
-rw-r--r--net/rxrpc/conn_service.c3
-rw-r--r--net/rxrpc/input.c176
-rw-r--r--net/rxrpc/local_object.c15
-rw-r--r--net/rxrpc/net_ns.c4
-rw-r--r--net/rxrpc/output.c14
-rw-r--r--net/rxrpc/peer_object.c58
-rw-r--r--net/rxrpc/proc.c78
-rw-r--r--net/rxrpc/rxkad.c6
-rw-r--r--net/rxrpc/sendmsg.c11
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c251
-rw-r--r--net/sched/act_bpf.c2
-rw-r--r--net/sched/act_connmark.c2
-rw-r--r--net/sched/act_csum.c4
-rw-r--r--net/sched/act_ct.c116
-rw-r--r--net/sched/act_ctinfo.c2
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_gate.c3
-rw-r--r--net/sched/act_ife.c2
-rw-r--r--net/sched/act_ipt.c464
-rw-r--r--net/sched/act_mirred.c266
-rw-r--r--net/sched/act_mpls.c2
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c2
-rw-r--r--net/sched/act_police.c2
-rw-r--r--net/sched/act_sample.c2
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/act_tunnel_key.c2
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_api.c114
-rw-r--r--net/sched/cls_basic.c1
-rw-r--r--net/sched/cls_cgroup.c1
-rw-r--r--net/sched/cls_flower.c23
-rw-r--r--net/sched/cls_fw.c1
-rw-r--r--net/sched/cls_route.c38
-rw-r--r--net/sched/cls_u32.c39
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/sched/em_text.c4
-rw-r--r--net/sched/sch_api.c79
-rw-r--r--net/sched/sch_cbs.c5
-rw-r--r--net/sched/sch_choke.c1
-rw-r--r--net/sched/sch_drr.c1
-rw-r--r--net/sched/sch_etf.c1
-rw-r--r--net/sched/sch_ets.c1
-rw-r--r--net/sched/sch_fifo.c1
-rw-r--r--net/sched/sch_fq.c385
-rw-r--r--net/sched/sch_fq_pie.c29
-rw-r--r--net/sched/sch_frag.c4
-rw-r--r--net/sched/sch_generic.c18
-rw-r--r--net/sched/sch_gred.c1
-rw-r--r--net/sched/sch_hfsc.c19
-rw-r--r--net/sched/sch_htb.c1
-rw-r--r--net/sched/sch_ingress.c1
-rw-r--r--net/sched/sch_mqprio.c1
-rw-r--r--net/sched/sch_mqprio_lib.c1
-rw-r--r--net/sched/sch_multiq.c1
-rw-r--r--net/sched/sch_netem.c3
-rw-r--r--net/sched/sch_plug.c3
-rw-r--r--net/sched/sch_prio.c1
-rw-r--r--net/sched/sch_qfq.c27
-rw-r--r--net/sched/sch_red.c1
-rw-r--r--net/sched/sch_sfq.c1
-rw-r--r--net/sched/sch_skbprio.c1
-rw-r--r--net/sched/sch_taprio.c3
-rw-r--r--net/sched/sch_tbf.c1
-rw-r--r--net/sched/sch_teql.c1
-rw-r--r--net/sctp/associola.c3
-rw-r--r--net/sctp/diag.c1
-rw-r--r--net/sctp/ipv6.c9
-rw-r--r--net/sctp/proc.c2
-rw-r--r--net/sctp/protocol.c4
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/socket.c24
-rw-r--r--net/smc/Kconfig1
-rw-r--r--net/smc/af_smc.c141
-rw-r--r--net/smc/smc.h16
-rw-r--r--net/smc/smc_cdc.c11
-rw-r--r--net/smc/smc_clc.c334
-rw-r--r--net/smc/smc_clc.h71
-rw-r--r--net/smc/smc_close.c5
-rw-r--r--net/smc/smc_core.c45
-rw-r--r--net/smc/smc_core.h18
-rw-r--r--net/smc/smc_diag.c15
-rw-r--r--net/smc/smc_ib.c9
-rw-r--r--net/smc/smc_ib.h2
-rw-r--r--net/smc/smc_ism.c50
-rw-r--r--net/smc/smc_ism.h30
-rw-r--r--net/smc/smc_pnet.c4
-rw-r--r--net/smc/smc_stats.h17
-rw-r--r--net/smc/smc_sysctl.c24
-rw-r--r--net/smc/smc_sysctl.h2
-rw-r--r--net/smc/smc_tx.c30
-rw-r--r--net/socket.c173
-rw-r--r--net/sunrpc/auth.c34
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c1
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c1
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c16
-rw-r--r--net/sunrpc/auth_tls.c4
-rw-r--r--net/sunrpc/backchannel_rqst.c13
-rw-r--r--net/sunrpc/clnt.c96
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/rpcb_clnt.c4
-rw-r--r--net/sunrpc/sunrpc_syms.c1
-rw-r--r--net/sunrpc/svc.c173
-rw-r--r--net/sunrpc/svc_xprt.c273
-rw-r--r--net/sunrpc/svcauth.c16
-rw-r--r--net/sunrpc/svcsock.c18
-rw-r--r--net/sunrpc/xprt.c35
-rw-r--r--net/sunrpc/xprtmultipath.c19
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c6
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c32
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c11
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c210
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c450
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c96
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c36
-rw-r--r--net/sunrpc/xprtrdma/verbs.c2
-rw-r--r--net/sunrpc/xprtsock.c20
-rw-r--r--net/tipc/bearer.c6
-rw-r--r--net/tipc/crypto.c4
-rw-r--r--net/tipc/diag.c1
-rw-r--r--net/tipc/link.c19
-rw-r--r--net/tipc/netlink.c4
-rw-r--r--net/tipc/netlink_compat.c3
-rw-r--r--net/tls/tls.h11
-rw-r--r--net/tls/tls_device.c101
-rw-r--r--net/tls/tls_device_fallback.c23
-rw-r--r--net/tls/tls_main.c72
-rw-r--r--net/tls/tls_sw.c228
-rw-r--r--net/unix/af_unix.c85
-rw-r--r--net/unix/diag.c3
-rw-r--r--net/unix/garbage.c11
-rw-r--r--net/unix/scm.c10
-rw-r--r--net/unix/unix_bpf.c20
-rw-r--r--net/vmw_vsock/af_vsock.c75
-rw-r--r--net/vmw_vsock/diag.c1
-rw-r--r--net/vmw_vsock/hyperv_transport.c4
-rw-r--r--net/vmw_vsock/virtio_transport.c118
-rw-r--r--net/vmw_vsock/virtio_transport_common.c369
-rw-r--r--net/vmw_vsock/vsock_loopback.c7
-rw-r--r--net/wireless/Kconfig10
-rw-r--r--net/wireless/Makefile5
-rw-r--r--net/wireless/ap.c24
-rw-r--r--net/wireless/certs/wens.hex87
-rw-r--r--net/wireless/chan.c148
-rw-r--r--net/wireless/core.c95
-rw-r--r--net/wireless/core.h86
-rw-r--r--net/wireless/debugfs.c160
-rw-r--r--net/wireless/ibss.c76
-rw-r--r--net/wireless/lib80211_crypt_tkip.c12
-rw-r--r--net/wireless/mesh.c28
-rw-r--r--net/wireless/mlme.c29
-rw-r--r--net/wireless/nl80211.c990
-rw-r--r--net/wireless/nl80211.h7
-rw-r--r--net/wireless/ocb.c43
-rw-r--r--net/wireless/pmsr.c4
-rw-r--r--net/wireless/rdev-ops.h28
-rw-r--r--net/wireless/reg.c107
-rw-r--r--net/wireless/reg.h21
-rw-r--r--net/wireless/scan.c419
-rw-r--r--net/wireless/sme.c84
-rw-r--r--net/wireless/sysfs.c4
-rw-r--r--net/wireless/tests/Makefile3
-rw-r--r--net/wireless/tests/fragmentation.c157
-rw-r--r--net/wireless/tests/module.c10
-rw-r--r--net/wireless/tests/scan.c625
-rw-r--r--net/wireless/tests/util.c56
-rw-r--r--net/wireless/tests/util.h66
-rw-r--r--net/wireless/trace.h108
-rw-r--r--net/wireless/util.c116
-rw-r--r--net/wireless/wext-compat.c47
-rw-r--r--net/wireless/wext-sme.c59
-rw-r--r--net/x25/af_x25.c16
-rw-r--r--net/x25/x25_facilities.c14
-rw-r--r--net/x25/x25_out.c2
-rw-r--r--net/xdp/xdp_umem.c11
-rw-r--r--net/xdp/xsk.c127
-rw-r--r--net/xdp/xsk_buff_pool.c18
-rw-r--r--net/xdp/xsk_diag.c4
-rw-r--r--net/xdp/xsk_queue.c10
-rw-r--r--net/xdp/xsk_queue.h19
-rw-r--r--net/xfrm/Kconfig1
-rw-r--r--net/xfrm/Makefile1
-rw-r--r--net/xfrm/xfrm_algo.c19
-rw-r--r--net/xfrm/xfrm_input.c6
-rw-r--r--net/xfrm/xfrm_interface_bpf.c6
-rw-r--r--net/xfrm/xfrm_interface_core.c26
-rw-r--r--net/xfrm/xfrm_policy.c330
-rw-r--r--net/xfrm/xfrm_state_bpf.c134
730 files changed, 30444 insertions, 13599 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 7533ce26ba5f..888379ae35ec 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -175,4 +175,5 @@ struct net_device *alloc_fddidev(int sizeof_priv)
}
EXPORT_SYMBOL(alloc_fddidev);
+MODULE_DESCRIPTION("Core routines for FDDI network devices");
MODULE_LICENSE("GPL");
diff --git a/net/802/garp.c b/net/802/garp.c
index ab24b21fbb49..6a743d004301 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -21,6 +21,7 @@
static unsigned int garp_join_time __read_mostly = 200;
module_param(garp_join_time, uint, 0644);
MODULE_PARM_DESC(garp_join_time, "Join time in ms (default 200ms)");
+MODULE_DESCRIPTION("IEEE 802.1D Generic Attribute Registration Protocol (GARP)");
MODULE_LICENSE("GPL");
static const struct garp_state_trans {
diff --git a/net/802/mrp.c b/net/802/mrp.c
index eafc21ecc287..3154d7409493 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -26,6 +26,7 @@ static unsigned int mrp_periodic_time __read_mostly = 1000;
module_param(mrp_periodic_time, uint, 0644);
MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)");
+MODULE_DESCRIPTION("IEEE 802.1Q Multiple Registration Protocol (MRP)");
MODULE_LICENSE("GPL");
static const u8
diff --git a/net/802/p8022.c b/net/802/p8022.c
index 79c23173116c..78c25168d7c9 100644
--- a/net/802/p8022.c
+++ b/net/802/p8022.c
@@ -60,4 +60,5 @@ void unregister_8022_client(struct datalink_proto *proto)
EXPORT_SYMBOL(register_8022_client);
EXPORT_SYMBOL(unregister_8022_client);
+MODULE_DESCRIPTION("Support for 802.2 demultiplexing off Ethernet");
MODULE_LICENSE("GPL");
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 1406bfdbda13..fca9d454905f 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -160,4 +160,5 @@ void unregister_snap_client(struct datalink_proto *proto)
kfree(proto);
}
+MODULE_DESCRIPTION("SNAP data link layer. Derived from 802.2");
MODULE_LICENSE("GPL");
diff --git a/net/802/stp.c b/net/802/stp.c
index d550d9f88f60..03c9f75e92c9 100644
--- a/net/802/stp.c
+++ b/net/802/stp.c
@@ -98,4 +98,5 @@ void stp_proto_unregister(const struct stp_proto *proto)
}
EXPORT_SYMBOL_GPL(stp_proto_unregister);
+MODULE_DESCRIPTION("SAP demux for IEEE 802.1D Spanning Tree Protocol (STP)");
MODULE_LICENSE("GPL");
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index e40aa3e3641c..e45187b88220 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -738,5 +738,6 @@ static void __exit vlan_cleanup_module(void)
module_init(vlan_proto_init);
module_exit(vlan_cleanup_module);
+MODULE_DESCRIPTION("802.1Q/802.1ad VLAN Protocol");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 0beb44f2fe1f..f00158234505 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -407,6 +407,8 @@ int vlan_vids_add_by_dev(struct net_device *dev,
return 0;
list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
+ if (!vlan_hw_filter_capable(by_dev, vid_info->proto))
+ continue;
err = vlan_vid_add(dev, vid_info->proto, vid_info->vid);
if (err)
goto unwind;
@@ -417,6 +419,8 @@ unwind:
list_for_each_entry_continue_reverse(vid_info,
&vlan_info->vid_list,
list) {
+ if (!vlan_hw_filter_capable(by_dev, vid_info->proto))
+ continue;
vlan_vid_del(dev, vid_info->proto, vid_info->vid);
}
@@ -436,8 +440,11 @@ void vlan_vids_del_by_dev(struct net_device *dev,
if (!vlan_info)
return;
- list_for_each_entry(vid_info, &vlan_info->vid_list, list)
+ list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
+ if (!vlan_hw_filter_capable(by_dev, vid_info->proto))
+ continue;
vlan_vid_del(dev, vid_info->proto, vid_info->vid);
+ }
}
EXPORT_SYMBOL(vlan_vids_del_by_dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2a7f1b15714a..407b2335f091 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -702,20 +702,7 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev,
struct ethtool_ts_info *info)
{
const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
- const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops;
- struct phy_device *phydev = vlan->real_dev->phydev;
-
- if (phy_has_tsinfo(phydev)) {
- return phy_ts_info(phydev, info);
- } else if (ops->get_ts_info) {
- return ops->get_ts_info(vlan->real_dev, info);
- } else {
- info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
- info->phc_index = -1;
- }
-
- return 0;
+ return ethtool_get_ts_info_by_layer(vlan->real_dev, info);
}
static void vlan_dev_get_stats64(struct net_device *dev,
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 214532173536..a3b68243fd4b 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -118,12 +118,16 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[],
}
if (data[IFLA_VLAN_INGRESS_QOS]) {
nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) {
+ if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING)
+ continue;
m = nla_data(attr);
vlan_dev_set_ingress_priority(dev, m->to, m->from);
}
}
if (data[IFLA_VLAN_EGRESS_QOS]) {
nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) {
+ if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING)
+ continue;
m = nla_data(attr);
err = vlan_dev_set_egress_priority(dev, m->from, m->to);
if (err)
diff --git a/net/9p/client.c b/net/9p/client.c
index 86bbc7147fc1..e265a0ca6bdd 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -540,12 +540,14 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
return 0;
if (!p9_is_proto_dotl(c)) {
- char *ename;
+ char *ename = NULL;
err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
&ename, &ecode);
- if (err)
+ if (err) {
+ kfree(ename);
goto out_err;
+ }
if (p9_is_proto_dotu(c) && ecode < 512)
err = -ecode;
@@ -1979,7 +1981,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
goto error;
}
p9_debug(P9_DEBUG_9P,
- ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n",
+ ">>> TXATTRWALK file_fid %d, attr_fid %d name '%s'\n",
file_fid->fid, attr_fid->fid, attr_name);
req = p9_client_rpc(clnt, P9_TXATTRWALK, "dds",
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 4e3a2a1ffcb3..0e6603b1ec90 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -394,6 +394,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
uint16_t *nwname = va_arg(ap, uint16_t *);
char ***wnames = va_arg(ap, char ***);
+ *wnames = NULL;
+
errcode = p9pdu_readf(pdu, proto_version,
"w", nwname);
if (!errcode) {
@@ -403,6 +405,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
GFP_NOFS);
if (!*wnames)
errcode = -ENOMEM;
+ else
+ (*wnames)[0] = NULL;
}
if (!errcode) {
@@ -414,8 +418,10 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
proto_version,
"s",
&(*wnames)[i]);
- if (errcode)
+ if (errcode) {
+ (*wnames)[i] = NULL;
break;
+ }
}
}
@@ -423,11 +429,14 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
if (*wnames) {
int i;
- for (i = 0; i < *nwname; i++)
+ for (i = 0; i < *nwname; i++) {
+ if (!(*wnames)[i])
+ break;
kfree((*wnames)[i]);
+ }
+ kfree(*wnames);
+ *wnames = NULL;
}
- kfree(*wnames);
- *wnames = NULL;
}
}
break;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index c4015f30f9fa..1a3948b8c493 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -671,10 +671,14 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
m, current, &req->tc, req->tc.id);
- if (m->err < 0)
- return m->err;
spin_lock(&m->req_lock);
+
+ if (m->err < 0) {
+ spin_unlock(&m->req_lock);
+ return m->err;
+ }
+
WRITE_ONCE(req->status, REQ_STATUS_UNSENT);
list_add_tail(&req->req_list, &m->unsent_req_list);
spin_unlock(&m->req_lock);
@@ -832,14 +836,21 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
goto out_free_ts;
if (!(ts->rd->f_mode & FMODE_READ))
goto out_put_rd;
- /* prevent workers from hanging on IO when fd is a pipe */
- ts->rd->f_flags |= O_NONBLOCK;
+ /* Prevent workers from hanging on IO when fd is a pipe.
+ * It's technically possible for userspace or concurrent mounts to
+ * modify this flag concurrently, which will likely result in a
+ * broken filesystem. However, just having bad flags here should
+ * not crash the kernel or cause any other sort of bug, so mark this
+ * particular data race as intentional so that tooling (like KCSAN)
+ * can allow it and detect further problems.
+ */
+ data_race(ts->rd->f_flags |= O_NONBLOCK);
ts->wr = fget(wfd);
if (!ts->wr)
goto out_put_rd;
if (!(ts->wr->f_mode & FMODE_WRITE))
goto out_put_wr;
- ts->wr->f_flags |= O_NONBLOCK;
+ data_race(ts->wr->f_flags |= O_NONBLOCK);
client->trans = ts;
client->status = Connected;
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 1fffe2bed5b0..dfdbe1ca5338 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -54,7 +54,6 @@ struct xen_9pfs_front_priv {
char *tag;
struct p9_client *client;
- int num_rings;
struct xen_9pfs_dataring *rings;
};
@@ -131,7 +130,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
if (list_entry_is_head(priv, &xen_9pfs_devs, list))
return -EINVAL;
- num = p9_req->tc.tag % priv->num_rings;
+ num = p9_req->tc.tag % XEN_9PFS_NUM_RINGS;
ring = &priv->rings[num];
again:
@@ -279,7 +278,7 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
list_del(&priv->list);
write_unlock(&xen_9pfs_lock);
- for (i = 0; i < priv->num_rings; i++) {
+ for (i = 0; i < XEN_9PFS_NUM_RINGS; i++) {
struct xen_9pfs_dataring *ring = &priv->rings[i];
cancel_work_sync(&ring->work);
@@ -408,15 +407,14 @@ static int xen_9pfs_front_init(struct xenbus_device *dev)
if (p9_xen_trans.maxsize > XEN_FLEX_RING_SIZE(max_ring_order))
p9_xen_trans.maxsize = XEN_FLEX_RING_SIZE(max_ring_order) / 2;
- priv->num_rings = XEN_9PFS_NUM_RINGS;
- priv->rings = kcalloc(priv->num_rings, sizeof(*priv->rings),
+ priv->rings = kcalloc(XEN_9PFS_NUM_RINGS, sizeof(*priv->rings),
GFP_KERNEL);
if (!priv->rings) {
kfree(priv);
return -ENOMEM;
}
- for (i = 0; i < priv->num_rings; i++) {
+ for (i = 0; i < XEN_9PFS_NUM_RINGS; i++) {
priv->rings[i].priv = priv;
ret = xen_9pfs_front_alloc_dataring(dev, &priv->rings[i],
max_ring_order);
@@ -434,10 +432,11 @@ static int xen_9pfs_front_init(struct xenbus_device *dev)
if (ret)
goto error_xenbus;
ret = xenbus_printf(xbt, dev->nodename, "num-rings", "%u",
- priv->num_rings);
+ XEN_9PFS_NUM_RINGS);
if (ret)
goto error_xenbus;
- for (i = 0; i < priv->num_rings; i++) {
+
+ for (i = 0; i < XEN_9PFS_NUM_RINGS; i++) {
char str[16];
BUILD_BUG_ON(XEN_9PFS_NUM_RINGS > 9);
diff --git a/net/Kconfig b/net/Kconfig
index d532ec33f1fe..4adc47d0c9c2 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -233,8 +233,6 @@ source "net/bridge/netfilter/Kconfig"
endif
-source "net/bpfilter/Kconfig"
-
source "net/dccp/Kconfig"
source "net/sctp/Kconfig"
source "net/rds/Kconfig"
@@ -246,7 +244,7 @@ source "net/bridge/Kconfig"
source "net/dsa/Kconfig"
source "net/8021q/Kconfig"
source "net/llc/Kconfig"
-source "drivers/net/appletalk/Kconfig"
+source "net/appletalk/Kconfig"
source "net/x25/Kconfig"
source "net/lapb/Kconfig"
source "net/phonet/Kconfig"
@@ -508,4 +506,13 @@ config NETDEV_ADDR_LIST_TEST
default KUNIT_ALL_TESTS
depends on KUNIT
+config NET_TEST
+ tristate "KUnit tests for networking" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ KUnit tests covering core networking infra, such as sk_buff.
+
+ If unsure, say N.
+
endif # if NET
diff --git a/net/Makefile b/net/Makefile
index 4c4dc535453d..b06b5539e7a6 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -19,7 +19,6 @@ obj-$(CONFIG_TLS) += tls/
obj-$(CONFIG_XFRM) += xfrm/
obj-$(CONFIG_UNIX_SCM) += unix/
obj-y += ipv6/
-obj-$(CONFIG_BPFILTER) += bpfilter/
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
obj-$(CONFIG_BRIDGE) += bridge/
diff --git a/net/appletalk/Kconfig b/net/appletalk/Kconfig
new file mode 100644
index 000000000000..041141abf925
--- /dev/null
+++ b/net/appletalk/Kconfig
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Appletalk configuration
+#
+config ATALK
+ tristate "Appletalk protocol support"
+ select LLC
+ help
+ AppleTalk is the protocol that Apple computers can use to communicate
+ on a network. If your Linux box is connected to such a network and you
+ wish to connect to it, say Y. You will need to use the netatalk package
+ so that your Linux box can act as a print and file server for Macs as
+ well as access AppleTalk printers. Check out
+ <http://www.zettabyte.net/netatalk/> on the WWW for details.
+ EtherTalk is the name used for AppleTalk over Ethernet and the
+ cheaper and slower LocalTalk is AppleTalk over a proprietary Apple
+ network using serial links. EtherTalk and LocalTalk are fully
+ supported by Linux.
+
+ General information about how to connect Linux, Windows machines and
+ Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>. The
+ NET3-4-HOWTO, available from
+ <http://www.tldp.org/docs.html#howto>, contains valuable
+ information as well.
+
+ To compile this driver as a module, choose M here: the module will be
+ called appletalk. You almost certainly want to compile it as a
+ module so you can restart your AppleTalk stack without rebooting
+ your machine. I hear that the GNU boycott of Apple is over, so
+ even politically correct people are allowed to say Y here.
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index c7236daa2415..9fa0b246902b 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -664,7 +664,7 @@ out_unlock:
sendit:
if (skb->sk)
- skb->priority = skb->sk->sk_priority;
+ skb->priority = READ_ONCE(skb->sk->sk_priority);
if (dev_queue_xmit(skb))
goto drop;
sent:
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 8978fb6212ff..198f5ba2feae 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1284,39 +1284,6 @@ out:
return err;
}
-#if IS_ENABLED(CONFIG_IPDDP)
-static __inline__ int is_ip_over_ddp(struct sk_buff *skb)
-{
- return skb->data[12] == 22;
-}
-
-static int handle_ip_over_ddp(struct sk_buff *skb)
-{
- struct net_device *dev = __dev_get_by_name(&init_net, "ipddp0");
- struct net_device_stats *stats;
-
- /* This needs to be able to handle ipddp"N" devices */
- if (!dev) {
- kfree_skb(skb);
- return NET_RX_DROP;
- }
-
- skb->protocol = htons(ETH_P_IP);
- skb_pull(skb, 13);
- skb->dev = dev;
- skb_reset_transport_header(skb);
-
- stats = netdev_priv(dev);
- stats->rx_packets++;
- stats->rx_bytes += skb->len + 13;
- return netif_rx(skb); /* Send the SKB up to a higher place. */
-}
-#else
-/* make it easy for gcc to optimize this test out, i.e. kill the code */
-#define is_ip_over_ddp(skb) 0
-#define handle_ip_over_ddp(skb) 0
-#endif
-
static int atalk_route_packet(struct sk_buff *skb, struct net_device *dev,
struct ddpehdr *ddp, __u16 len_hops, int origlen)
{
@@ -1480,9 +1447,6 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
return atalk_route_packet(skb, dev, ddp, len_hops, origlen);
}
- /* if IP over DDP is not selected this code will be optimized out */
- if (is_ip_over_ddp(skb))
- return handle_ip_over_ddp(skb);
/*
* Which socket - atalk_search_socket() looks for a *full match*
* of the <net, node, port> tuple.
@@ -1617,7 +1581,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
}
/* Build a packet */
- SOCK_DEBUG(sk, "SK %p: Got address.\n", sk);
+ net_dbg_ratelimited("SK %p: Got address.\n", sk);
/* For headers */
size = sizeof(struct ddpehdr) + len + ddp_dl->header_length;
@@ -1638,7 +1602,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
dev = rt->dev;
- SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n",
+ net_dbg_ratelimited("SK %p: Size needed %d, device %s\n",
sk, size, dev->name);
hard_header_len = dev->hard_header_len;
@@ -1667,7 +1631,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
skb_reserve(skb, hard_header_len);
skb->dev = dev;
- SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk);
+ net_dbg_ratelimited("SK %p: Begin build.\n", sk);
ddp = skb_put(skb, sizeof(struct ddpehdr));
ddp->deh_len_hops = htons(len + sizeof(*ddp));
@@ -1678,7 +1642,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
ddp->deh_dport = usat->sat_port;
ddp->deh_sport = at->src_port;
- SOCK_DEBUG(sk, "SK %p: Copy user data (%zd bytes).\n", sk, len);
+ net_dbg_ratelimited("SK %p: Copy user data (%zd bytes).\n", sk, len);
err = memcpy_from_msg(skb_put(skb, len), msg, len);
if (err) {
@@ -1702,7 +1666,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (skb2) {
loopback = 1;
- SOCK_DEBUG(sk, "SK %p: send out(copy).\n", sk);
+ net_dbg_ratelimited("SK %p: send out(copy).\n", sk);
/*
* If it fails it is queued/sent above in the aarp queue
*/
@@ -1711,7 +1675,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
}
if (dev->flags & IFF_LOOPBACK || loopback) {
- SOCK_DEBUG(sk, "SK %p: Loop back.\n", sk);
+ net_dbg_ratelimited("SK %p: Loop back.\n", sk);
/* loop back */
skb_orphan(skb);
if (ddp->deh_dnode == ATADDR_BCAST) {
@@ -1725,7 +1689,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
}
ddp_dl->request(ddp_dl, skb, dev->dev_addr);
} else {
- SOCK_DEBUG(sk, "SK %p: send out.\n", sk);
+ net_dbg_ratelimited("SK %p: send out.\n", sk);
if (rt->flags & RTF_GATEWAY) {
gsat.sat_addr = rt->gateway;
usat = &gsat;
@@ -1736,7 +1700,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
*/
aarp_send_ddp(dev, skb, &usat->sat_addr, NULL);
}
- SOCK_DEBUG(sk, "SK %p: Done write (%zd).\n", sk, len);
+ net_dbg_ratelimited("SK %p: Done write (%zd).\n", sk, len);
out:
release_sock(sk);
@@ -1811,15 +1775,14 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
break;
}
case TIOCINQ: {
- /*
- * These two are safe on a single CPU system as only
- * user tasks fiddle here
- */
- struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+ struct sk_buff *skb;
long amount = 0;
+ spin_lock_irq(&sk->sk_receive_queue.lock);
+ skb = skb_peek(&sk->sk_receive_queue);
if (skb)
amount = skb->len - sizeof(struct ddpehdr);
+ spin_unlock_irq(&sk->sk_receive_queue.lock);
rc = put_user(amount, (int __user *)argp);
break;
}
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 466353b3dde4..54e7fb1a4ee5 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -116,8 +116,6 @@ static int atm_uevent(const struct device *cdev, struct kobj_uevent_env *env)
return -ENODEV;
adev = to_atm_dev(cdev);
- if (!adev)
- return -ENODEV;
if (add_uevent_var(env, "NAME=%s%d", adev->type, adev->number))
return -ENOMEM;
diff --git a/net/atm/common.c b/net/atm/common.c
index f7019df41c3e..2a1ec014e901 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -890,6 +890,7 @@ subsys_initcall(atm_init);
module_exit(atm_exit);
+MODULE_DESCRIPTION("Asynchronous Transfer Mode (ATM) networking core");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_ATMPVC);
MODULE_ALIAS_NETPROTO(PF_ATMSVC);
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 838ebf0cabbf..f81f8d56f5c0 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -73,14 +73,17 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
case SIOCINQ:
{
struct sk_buff *skb;
+ int amount;
if (sock->state != SS_CONNECTED) {
error = -EINVAL;
goto done;
}
+ spin_lock_irq(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
- error = put_user(skb ? skb->len : 0,
- (int __user *)argp) ? -EFAULT : 0;
+ amount = skb ? skb->len : 0;
+ spin_unlock_irq(&sk->sk_receive_queue.lock);
+ error = put_user(amount, (int __user *)argp) ? -EFAULT : 0;
goto done;
}
case ATM_SETSC:
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 6257bf12e5a0..ffef658862db 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -2234,4 +2234,5 @@ out:
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
}
+MODULE_DESCRIPTION("ATM LAN Emulation (LANE) support");
MODULE_LICENSE("GPL");
diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
index d3a9843a043d..fdb666607f10 100644
--- a/net/ax25/Kconfig
+++ b/net/ax25/Kconfig
@@ -10,7 +10,7 @@ menuconfig HAMRADIO
If you want to connect your Linux box to an amateur radio, answer Y
here. You want to read <https://www.tapr.org/>
and more specifically about AX.25 on Linux
- <http://www.linux-ax25.org/>.
+ <https://linux-ax25.in-berlin.de>.
Note that the answer to this question won't directly affect the
kernel: saying N will just cause the configurator to skip all
@@ -61,7 +61,7 @@ config AX25_DAMA_SLAVE
configuration. Linux cannot yet act as a DAMA server. This option
only compiles DAMA slave support into the kernel. It still needs to
be enabled at runtime. For more about DAMA see
- <http://www.linux-ax25.org>. If unsure, say Y.
+ <https://linux-ax25.in-berlin.de>. If unsure, say Y.
# placeholder until implemented
config AX25_DAMA_MASTER
@@ -87,9 +87,9 @@ config NETROM
A comprehensive listing of all the software for Linux amateur radio
users as well as information about how to configure an AX.25 port is
contained in the Linux Ham Wiki, available from
- <http://www.linux-ax25.org>. You also might want to check out the
- file <file:Documentation/networking/ax25.rst>. More information about
- digital amateur radio in general is on the WWW at
+ <https://linux-ax25.in-berlin.de>. You also might want to check out
+ the file <file:Documentation/networking/ax25.rst>. More information
+ about digital amateur radio in general is on the WWW at
<https://www.tapr.org/>.
To compile this driver as a module, choose M here: the
@@ -106,9 +106,9 @@ config ROSE
A comprehensive listing of all the software for Linux amateur radio
users as well as information about how to configure an AX.25 port is
contained in the Linux Ham Wiki, available from
- <http://www.linux-ax25.org>. You also might want to check out the
- file <file:Documentation/networking/ax25.rst>. More information about
- digital amateur radio in general is on the WWW at
+ <https://linux-ax25.in-berlin.de>. You also might want to check out
+ the file <file:Documentation/networking/ax25.rst>. More information
+ about digital amateur radio in general is on the WWW at
<https://www.tapr.org/>.
To compile this driver as a module, choose M here: the
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 5db805d5f74d..558e158c98d0 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -939,7 +939,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
sock_init_data(NULL, sk);
sk->sk_type = osk->sk_type;
- sk->sk_priority = osk->sk_priority;
+ sk->sk_priority = READ_ONCE(osk->sk_priority);
sk->sk_protocol = osk->sk_protocol;
sk->sk_rcvbuf = osk->sk_rcvbuf;
sk->sk_sndbuf = osk->sk_sndbuf;
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 3bd0760c76a2..b51d8b071b56 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -20,6 +20,7 @@ batman-adv-y += hash.o
batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o
batman-adv-y += main.o
batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
+batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast_forw.o
batman-adv-y += netlink.o
batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
batman-adv-y += originator.o
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 37ce6cfb3520..5f46ca3d4bb8 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -20,7 +20,6 @@
#include <linux/if_vlan.h>
#include <linux/jhash.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
@@ -31,6 +30,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/workqueue.h>
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index c120c7c6d25f..757c084ac2d1 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -25,7 +25,6 @@
#include "hard-interface.h"
#include "originator.h"
-#include "routing.h"
#include "send.h"
/**
@@ -351,18 +350,14 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
struct batadv_orig_node *orig_node_src)
{
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
- struct batadv_orig_node *orig_node_dst;
struct batadv_neigh_node *neigh_node = NULL;
struct batadv_frag_packet *packet;
u16 total_size;
bool ret = false;
packet = (struct batadv_frag_packet *)skb->data;
- orig_node_dst = batadv_orig_hash_find(bat_priv, packet->dest);
- if (!orig_node_dst)
- goto out;
- neigh_node = batadv_find_router(bat_priv, orig_node_dst, recv_if);
+ neigh_node = batadv_orig_to_router(bat_priv, packet->dest, recv_if);
if (!neigh_node)
goto out;
@@ -381,7 +376,6 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
}
out:
- batadv_orig_node_put(orig_node_dst);
batadv_neigh_node_put(neigh_node);
return ret;
}
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index d26124bc27e1..0ddd8b4b3f4c 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -18,7 +18,6 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
@@ -29,6 +28,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/udp.h>
#include <net/sock.h>
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index e8a449915566..5fc754b0b3f7 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -6,6 +6,7 @@
#include "main.h"
+#include <linux/array_size.h>
#include <linux/atomic.h>
#include <linux/build_bug.h>
#include <linux/byteorder/generic.h>
@@ -20,7 +21,6 @@
#include <linux/init.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include <linux/kernel.h>
#include <linux/kobject.h>
#include <linux/kref.h>
#include <linux/list.h>
@@ -33,6 +33,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/workqueue.h>
@@ -532,6 +533,8 @@ static void batadv_recv_handler_init(void)
/* broadcast packet */
batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet;
+ /* multicast packet */
+ batadv_rx_handler[BATADV_MCAST] = batadv_recv_mcast_packet;
/* unicast packets ... */
/* unicast with 4 addresses packet */
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 10007c5894a1..870dcd7f1786 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2023.3"
+#define BATADV_SOURCE_VERSION "2024.0"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 315394f12c55..14088c4ff2f6 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -25,7 +25,6 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@ -36,6 +35,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -236,6 +236,37 @@ static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_mla_forw_flags_get() - get multicast forwarding flags
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Checks if all active hard interfaces have an MTU larger or equal to 1280
+ * bytes (IPv6 minimum MTU).
+ *
+ * Return: BATADV_MCAST_HAVE_MC_PTYPE_CAPA if yes, BATADV_NO_FLAGS otherwise.
+ */
+static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv)
+{
+ const struct batadv_hard_iface *hard_iface;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ if (hard_iface->if_status != BATADV_IF_ACTIVE)
+ continue;
+
+ if (hard_iface->soft_iface != bat_priv->soft_iface)
+ continue;
+
+ if (hard_iface->net_dev->mtu < IPV6_MIN_MTU) {
+ rcu_read_unlock();
+ return BATADV_NO_FLAGS;
+ }
+ }
+ rcu_read_unlock();
+
+ return BATADV_MCAST_HAVE_MC_PTYPE_CAPA;
+}
+
+/**
* batadv_mcast_mla_flags_get() - get the new multicast flags
* @bat_priv: the bat priv with all the soft interface information
*
@@ -256,6 +287,7 @@ batadv_mcast_mla_flags_get(struct batadv_priv *bat_priv)
mla_flags.enabled = 1;
mla_flags.tvlv_flags |= batadv_mcast_mla_rtr_flags_get(bat_priv,
bridge);
+ mla_flags.tvlv_flags |= batadv_mcast_mla_forw_flags_get(bat_priv);
if (!bridge)
return mla_flags;
@@ -806,23 +838,25 @@ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags)
{
bool old_enabled = bat_priv->mcast.mla_flags.enabled;
u8 old_flags = bat_priv->mcast.mla_flags.tvlv_flags;
- char str_old_flags[] = "[.... . ]";
+ char str_old_flags[] = "[.... . .]";
- sprintf(str_old_flags, "[%c%c%c%s%s]",
+ sprintf(str_old_flags, "[%c%c%c%s%s%c]",
(old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
(old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
(old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
!(old_flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
- !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
+ !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ",
+ !(old_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.');
batadv_dbg(BATADV_DBG_MCAST, bat_priv,
- "Changing multicast flags from '%s' to '[%c%c%c%s%s]'\n",
+ "Changing multicast flags from '%s' to '[%c%c%c%s%s%c]'\n",
old_enabled ? str_old_flags : "<undefined>",
(flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
(flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
(flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
!(flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
- !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
+ !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ",
+ !(flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.');
}
/**
@@ -1136,16 +1170,61 @@ static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_forw_mode_by_count() - get forwarding mode by count
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to check
+ * @vid: the vlan identifier
+ * @is_routable: stores whether the destination is routable
+ * @count: the number of originators the multicast packet need to be sent to
+ *
+ * For a multicast packet with multiple destination originators, checks which
+ * mode to use. For BATADV_FORW_MCAST it also encapsulates the packet with a
+ * complete batman-adv multicast header.
+ *
+ * Return:
+ * BATADV_FORW_MCAST: If all nodes have multicast packet routing
+ * capabilities and an MTU >= 1280 on all hard interfaces (including us)
+ * and the encapsulated multicast packet with all destination addresses
+ * would still fit into an 1280 bytes batman-adv multicast packet
+ * (excluding the outer ethernet frame) and we could successfully push
+ * the full batman-adv multicast packet header.
+ * BATADV_FORW_UCASTS: If the packet cannot be sent in a batman-adv
+ * multicast packet and the amount of batman-adv unicast packets needed
+ * is smaller or equal to the configured multicast fanout.
+ * BATADV_FORW_BCAST: Otherwise.
+ */
+static enum batadv_forw_mode
+batadv_mcast_forw_mode_by_count(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, unsigned short vid,
+ int is_routable, int count)
+{
+ unsigned int mcast_hdrlen = batadv_mcast_forw_packet_hdrlen(count);
+ u8 own_tvlv_flags = bat_priv->mcast.mla_flags.tvlv_flags;
+
+ if (!atomic_read(&bat_priv->mcast.num_no_mc_ptype_capa) &&
+ own_tvlv_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA &&
+ skb->len + mcast_hdrlen <= IPV6_MIN_MTU &&
+ batadv_mcast_forw_push(bat_priv, skb, vid, is_routable, count))
+ return BATADV_FORW_MCAST;
+
+ if (count <= atomic_read(&bat_priv->multicast_fanout))
+ return BATADV_FORW_UCASTS;
+
+ return BATADV_FORW_BCAST;
+}
+
+/**
* batadv_mcast_forw_mode() - check on how to forward a multicast packet
* @bat_priv: the bat priv with all the soft interface information
* @skb: the multicast packet to check
+ * @vid: the vlan identifier
* @is_routable: stores whether the destination is routable
*
* Return: The forwarding mode as enum batadv_forw_mode.
*/
enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- int *is_routable)
+ unsigned short vid, int *is_routable)
{
int ret, tt_count, ip_count, unsnoop_count, total_count;
bool is_unsnoopable = false;
@@ -1175,10 +1254,8 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
else if (unsnoop_count)
return BATADV_FORW_BCAST;
- if (total_count <= atomic_read(&bat_priv->multicast_fanout))
- return BATADV_FORW_UCASTS;
-
- return BATADV_FORW_BCAST;
+ return batadv_mcast_forw_mode_by_count(bat_priv, skb, vid, *is_routable,
+ total_count);
}
/**
@@ -1739,6 +1816,31 @@ static void batadv_mcast_want_rtr6_update(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_have_mc_ptype_update() - update multicast packet type counter
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node which multicast state might have changed of
+ * @mcast_flags: flags indicating the new multicast state
+ *
+ * If the BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag of this originator, orig, has
+ * toggled then this method updates the counter accordingly.
+ */
+static void batadv_mcast_have_mc_ptype_update(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ u8 mcast_flags)
+{
+ lockdep_assert_held(&orig->mcast_handler_lock);
+
+ /* switched from flag set to unset */
+ if (!(mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) &&
+ orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA)
+ atomic_inc(&bat_priv->mcast.num_no_mc_ptype_capa);
+ /* switched from flag unset to set */
+ else if (mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA &&
+ !(orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA))
+ atomic_dec(&bat_priv->mcast.num_no_mc_ptype_capa);
+}
+
+/**
* batadv_mcast_tvlv_flags_get() - get multicast flags from an OGM TVLV
* @enabled: whether the originator has multicast TVLV support enabled
* @tvlv_value: tvlv buffer containing the multicast flags
@@ -1806,6 +1908,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags);
batadv_mcast_want_rtr4_update(bat_priv, orig, mcast_flags);
batadv_mcast_want_rtr6_update(bat_priv, orig, mcast_flags);
+ batadv_mcast_have_mc_ptype_update(bat_priv, orig, mcast_flags);
orig->mcast_flags = mcast_flags;
spin_unlock_bh(&orig->mcast_handler_lock);
@@ -1820,6 +1923,10 @@ void batadv_mcast_init(struct batadv_priv *bat_priv)
batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler,
NULL, NULL, BATADV_TVLV_MCAST, 2,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
+ batadv_tvlv_handler_register(bat_priv, NULL, NULL,
+ batadv_mcast_forw_tracker_tvlv_handler,
+ BATADV_TVLV_MCAST_TRACKER, 1,
+ BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update);
batadv_mcast_start_timer(bat_priv);
@@ -2068,6 +2175,7 @@ void batadv_mcast_free(struct batadv_priv *bat_priv)
cancel_delayed_work_sync(&bat_priv->mcast.work);
batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
+ batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST_TRACKER, 1);
batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
/* safely calling outside of worker, as worker was canceled above */
@@ -2091,6 +2199,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig)
BATADV_MCAST_WANT_NO_RTR4);
batadv_mcast_want_rtr6_update(bat_priv, orig,
BATADV_MCAST_WANT_NO_RTR6);
+ batadv_mcast_have_mc_ptype_update(bat_priv, orig,
+ BATADV_MCAST_HAVE_MC_PTYPE_CAPA);
spin_unlock_bh(&orig->mcast_handler_lock);
}
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index a9770d8d6d36..d97ee51d26f2 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -11,6 +11,7 @@
#include <linux/netlink.h>
#include <linux/skbuff.h>
+#include <linux/types.h>
/**
* enum batadv_forw_mode - the way a packet should be forwarded as
@@ -28,6 +29,12 @@ enum batadv_forw_mode {
*/
BATADV_FORW_UCASTS,
+ /**
+ * @BATADV_FORW_MCAST: forward the packet to some nodes via a
+ * batman-adv multicast packet
+ */
+ BATADV_FORW_MCAST,
+
/** @BATADV_FORW_NONE: don't forward, drop it */
BATADV_FORW_NONE,
};
@@ -36,7 +43,7 @@ enum batadv_forw_mode {
enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- int *is_routable);
+ unsigned short vid, int *is_routable);
int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid, int is_routable);
@@ -52,11 +59,23 @@ void batadv_mcast_free(struct batadv_priv *bat_priv);
void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
+/* multicast_forw.c */
+
+int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv,
+ struct sk_buff *skb);
+
+unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests);
+
+bool batadv_mcast_forw_push(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int count);
+
+int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv, struct sk_buff *skb);
+
#else
static inline enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- int *is_routable)
+ unsigned short vid, int *is_routable)
{
return BATADV_FORW_BCAST;
}
@@ -94,6 +113,13 @@ static inline void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node)
{
}
+static inline int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+}
+
#endif /* CONFIG_BATMAN_ADV_MCAST */
#endif /* _NET_BATMAN_ADV_MULTICAST_H_ */
diff --git a/net/batman-adv/multicast_forw.c b/net/batman-adv/multicast_forw.c
new file mode 100644
index 000000000000..fafd6ba8c056
--- /dev/null
+++ b/net/batman-adv/multicast_forw.c
@@ -0,0 +1,1178 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) B.A.T.M.A.N. contributors:
+ *
+ * Linus Lüssing
+ */
+
+#include "multicast.h"
+#include "main.h"
+
+#include <linux/bug.h>
+#include <linux/build_bug.h>
+#include <linux/byteorder/generic.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/gfp.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/ipv6.h>
+#include <linux/limits.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <uapi/linux/batadv_packet.h>
+
+#include "bridge_loop_avoidance.h"
+#include "originator.h"
+#include "send.h"
+#include "translation-table.h"
+
+#define batadv_mcast_forw_tracker_for_each_dest(dest, num_dests) \
+ for (; num_dests; num_dests--, (dest) += ETH_ALEN)
+
+#define batadv_mcast_forw_tracker_for_each_dest2(dest1, dest2, num_dests) \
+ for (; num_dests; num_dests--, (dest1) += ETH_ALEN, (dest2) += ETH_ALEN)
+
+/**
+ * batadv_mcast_forw_skb_push() - skb_push and memorize amount of pushed bytes
+ * @skb: the skb to push onto
+ * @size: the amount of bytes to push
+ * @len: stores the total amount of bytes pushed
+ *
+ * Performs an skb_push() onto the given skb and adds the amount of pushed bytes
+ * to the given len pointer.
+ *
+ * Return: the return value of the skb_push() call.
+ */
+static void *batadv_mcast_forw_skb_push(struct sk_buff *skb, size_t size,
+ unsigned short *len)
+{
+ *len += size;
+ return skb_push(skb, size);
+}
+
+/**
+ * batadv_mcast_forw_push_padding() - push 2 padding bytes to skb's front
+ * @skb: the skb to push onto
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes two padding bytes to the front of the given skb.
+ *
+ * Return: On success a pointer to the first byte of the two pushed padding
+ * bytes within the skb. NULL otherwise.
+ */
+static char *
+batadv_mcast_forw_push_padding(struct sk_buff *skb, unsigned short *tvlv_len)
+{
+ const int pad_len = 2;
+ char *padding;
+
+ if (skb_headroom(skb) < pad_len)
+ return NULL;
+
+ padding = batadv_mcast_forw_skb_push(skb, pad_len, tvlv_len);
+ memset(padding, 0, pad_len);
+
+ return padding;
+}
+
+/**
+ * batadv_mcast_forw_push_est_padding() - push padding bytes if necessary
+ * @skb: the skb to potentially push the padding onto
+ * @count: the (estimated) number of originators the multicast packet needs to
+ * be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * If the number of destination entries is even then this adds two
+ * padding bytes to the end of the tracker TVLV.
+ *
+ * Return: true on success or if no padding is needed, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_est_padding(struct sk_buff *skb, int count,
+ unsigned short *tvlv_len)
+{
+ if (!(count % 2) && !batadv_mcast_forw_push_padding(skb, tvlv_len))
+ return false;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_orig_entry() - get orig_node from an hlist node
+ * @node: the hlist node to get the orig_node from
+ * @entry_offset: the offset of the hlist node within the orig_node struct
+ *
+ * Return: The orig_node containing the hlist node on success, NULL on error.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_orig_entry(struct hlist_node *node,
+ size_t entry_offset)
+{
+ /* sanity check */
+ switch (entry_offset) {
+ case offsetof(struct batadv_orig_node, mcast_want_all_ipv4_node):
+ case offsetof(struct batadv_orig_node, mcast_want_all_ipv6_node):
+ case offsetof(struct batadv_orig_node, mcast_want_all_rtr4_node):
+ case offsetof(struct batadv_orig_node, mcast_want_all_rtr6_node):
+ break;
+ default:
+ WARN_ON(1);
+ return NULL;
+ }
+
+ return (struct batadv_orig_node *)((void *)node - entry_offset);
+}
+
+/**
+ * batadv_mcast_forw_push_dest() - push an originator MAC address onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination address onto
+ * @vid: the vlan identifier
+ * @orig_node: the originator node to get the MAC address from
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * If the orig_node is a BLA backbone gateway, if there is not enough skb
+ * headroom available or if num_dests is already at its maximum (65535) then
+ * neither the skb nor num_dests is changed. Otherwise the originator's MAC
+ * address is pushed onto the given skb and num_dests incremented by one.
+ *
+ * Return: true if the orig_node is a backbone gateway or if an orig address
+ * was pushed successfully, false otherwise.
+ */
+static bool batadv_mcast_forw_push_dest(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, unsigned short vid,
+ struct batadv_orig_node *orig_node,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ BUILD_BUG_ON(sizeof_field(struct batadv_tvlv_mcast_tracker, num_dests)
+ != sizeof(__be16));
+
+ /* Avoid sending to other BLA gateways - they already got the frame from
+ * the LAN side we share with them.
+ * TODO: Refactor to take BLA into account earlier in mode check.
+ */
+ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid))
+ return true;
+
+ if (skb_headroom(skb) < ETH_ALEN || *num_dests == U16_MAX)
+ return false;
+
+ batadv_mcast_forw_skb_push(skb, ETH_ALEN, tvlv_len);
+ ether_addr_copy(skb->data, orig_node->orig);
+ (*num_dests)++;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_dests_list() - push originators from list onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @head: the list to gather originators from
+ * @entry_offset: offset of an hlist node in an orig_node structure
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators in the given list onto the given
+ * skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static int batadv_mcast_forw_push_dests_list(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct hlist_head *head,
+ size_t entry_offset,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct hlist_node *node;
+ struct batadv_orig_node *orig_node;
+
+ rcu_read_lock();
+ __hlist_for_each_rcu(node, head) {
+ orig_node = batadv_mcast_forw_orig_entry(node, entry_offset);
+ if (!orig_node ||
+ !batadv_mcast_forw_push_dest(bat_priv, skb, vid, orig_node,
+ num_dests, tvlv_len)) {
+ rcu_read_unlock();
+ return false;
+ }
+ }
+ rcu_read_unlock();
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_tt() - push originators with interest through TT
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the translation table onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_tt(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct batadv_tt_orig_list_entry *orig_entry;
+
+ struct batadv_tt_global_entry *tt_global;
+ const u8 *addr = eth_hdr(skb)->h_dest;
+
+ /* ok */
+ int ret = true;
+
+ tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid);
+ if (!tt_global)
+ goto out;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_entry, &tt_global->orig_list, list) {
+ if (!batadv_mcast_forw_push_dest(bat_priv, skb, vid,
+ orig_entry->orig_node,
+ num_dests, tvlv_len)) {
+ ret = false;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ batadv_tt_global_entry_put(tt_global);
+
+out:
+ return ret;
+}
+
+/**
+ * batadv_mcast_forw_push_want_all() - push originators with want-all flag
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the want-all flag onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_want_all(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct hlist_head *head = NULL;
+ size_t offset;
+ int ret;
+
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ head = &bat_priv->mcast.want_all_ipv4_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_ipv4_node);
+ break;
+ case htons(ETH_P_IPV6):
+ head = &bat_priv->mcast.want_all_ipv6_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_ipv6_node);
+ break;
+ default:
+ return false;
+ }
+
+ ret = batadv_mcast_forw_push_dests_list(bat_priv, skb, vid, head,
+ offset, num_dests, tvlv_len);
+ if (!ret)
+ return false;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_want_rtr() - push originators with want-router flag
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the want-all-rtr flag onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_want_rtr(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct hlist_head *head = NULL;
+ size_t offset;
+ int ret;
+
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ head = &bat_priv->mcast.want_all_rtr4_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_rtr4_node);
+ break;
+ case htons(ETH_P_IPV6):
+ head = &bat_priv->mcast.want_all_rtr6_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_rtr6_node);
+ break;
+ default:
+ return false;
+ }
+
+ ret = batadv_mcast_forw_push_dests_list(bat_priv, skb, vid, head,
+ offset, num_dests, tvlv_len);
+ if (!ret)
+ return false;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_scrape() - remove bytes within skb data
+ * @skb: the skb to remove bytes from
+ * @offset: the offset from the skb data from which to scrape
+ * @len: the amount of bytes to scrape starting from the offset
+ *
+ * Scrapes/removes len bytes from the given skb at the given offset from the
+ * skb data.
+ *
+ * Caller needs to ensure that the region from the skb data's start up
+ * to/including the to be removed bytes are linearized.
+ */
+static void batadv_mcast_forw_scrape(struct sk_buff *skb,
+ unsigned short offset,
+ unsigned short len)
+{
+ char *to, *from;
+
+ SKB_LINEAR_ASSERT(skb);
+
+ to = skb_pull(skb, len);
+ from = to - len;
+
+ memmove(to, from, offset);
+}
+
+/**
+ * batadv_mcast_forw_push_scrape_padding() - remove TVLV padding
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Remove two padding bytes from the end of the multicast tracker TVLV,
+ * from before the payload data.
+ *
+ * Caller needs to ensure that the TVLV bytes are linearized.
+ */
+static void batadv_mcast_forw_push_scrape_padding(struct sk_buff *skb,
+ unsigned short *tvlv_len)
+{
+ const int pad_len = 2;
+
+ batadv_mcast_forw_scrape(skb, *tvlv_len - pad_len, pad_len);
+ *tvlv_len -= pad_len;
+}
+
+/**
+ * batadv_mcast_forw_push_insert_padding() - insert TVLV padding
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Inserts two padding bytes at the end of the multicast tracker TVLV,
+ * before the payload data in the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_insert_padding(struct sk_buff *skb,
+ unsigned short *tvlv_len)
+{
+ unsigned short offset = *tvlv_len;
+ char *to, *from = skb->data;
+
+ to = batadv_mcast_forw_push_padding(skb, tvlv_len);
+ if (!to)
+ return false;
+
+ memmove(to, from, offset);
+ memset(to + offset, 0, *tvlv_len - offset);
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_adjust_padding() - adjust padding if necessary
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @count: the estimated number of originators the multicast packet needs to
+ * be sent to
+ * @num_dests_pushed: the number of originators that were actually added to the
+ * multicast packet's tracker TVLV
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Adjusts the padding in the multicast packet's tracker TVLV depending on the
+ * initially estimated amount of destinations versus the amount of destinations
+ * that were actually added to the tracker TVLV.
+ *
+ * If the initial estimate was correct or at least the oddness was the same then
+ * no padding adjustment is performed.
+ * If the initially estimated number was even, so padding was initially added,
+ * but it turned out to be odd then padding is removed.
+ * If the initially estimated number was odd, so no padding was initially added,
+ * but it turned out to be even then padding is added.
+ *
+ * Return: true if no padding adjustment is needed or the adjustment was
+ * successful, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_adjust_padding(struct sk_buff *skb, int *count,
+ unsigned short num_dests_pushed,
+ unsigned short *tvlv_len)
+{
+ int ret = true;
+
+ if (likely((num_dests_pushed % 2) == (*count % 2)))
+ goto out;
+
+ /**
+ * estimated even number of destinations, but turned out to be odd
+ * -> remove padding
+ */
+ if (!(*count % 2) && (num_dests_pushed % 2))
+ batadv_mcast_forw_push_scrape_padding(skb, tvlv_len);
+ /**
+ * estimated odd number of destinations, but turned out to be even
+ * -> add padding
+ */
+ else if ((*count % 2) && (!(num_dests_pushed % 2)))
+ ret = batadv_mcast_forw_push_insert_padding(skb, tvlv_len);
+
+out:
+ *count = num_dests_pushed;
+ return ret;
+}
+
+/**
+ * batadv_mcast_forw_push_dests() - push originator addresses onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet onto the given skb.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_dests(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int *count,
+ unsigned short *tvlv_len)
+{
+ unsigned short num_dests = 0;
+
+ if (!batadv_mcast_forw_push_est_padding(skb, *count, tvlv_len))
+ goto err;
+
+ if (!batadv_mcast_forw_push_tt(bat_priv, skb, vid, &num_dests,
+ tvlv_len))
+ goto err;
+
+ if (!batadv_mcast_forw_push_want_all(bat_priv, skb, vid, &num_dests,
+ tvlv_len))
+ goto err;
+
+ if (is_routable &&
+ !batadv_mcast_forw_push_want_rtr(bat_priv, skb, vid, &num_dests,
+ tvlv_len))
+ goto err;
+
+ if (!batadv_mcast_forw_push_adjust_padding(skb, count, num_dests,
+ tvlv_len))
+ goto err;
+
+ return 0;
+err:
+ return -ENOMEM;
+}
+
+/**
+ * batadv_mcast_forw_push_tracker() - push a multicast tracker TVLV header
+ * @skb: the skb to push the tracker TVLV onto
+ * @num_dests: the number of destination addresses to set in the header
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes a multicast tracker TVLV header onto the given skb, including the
+ * generic TVLV header but excluding the destination MAC addresses.
+ *
+ * The provided num_dests value is taken into consideration to set the
+ * num_dests field in the tracker header and to set the appropriate TVLV length
+ * value fields.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int batadv_mcast_forw_push_tracker(struct sk_buff *skb, int num_dests,
+ unsigned short *tvlv_len)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ unsigned int tvlv_value_len;
+
+ if (skb_headroom(skb) < sizeof(*mcast_tracker) + sizeof(*tvlv_hdr))
+ return -ENOMEM;
+
+ tvlv_value_len = sizeof(*mcast_tracker) + *tvlv_len;
+ if (tvlv_value_len + sizeof(*tvlv_hdr) > U16_MAX)
+ return -ENOMEM;
+
+ batadv_mcast_forw_skb_push(skb, sizeof(*mcast_tracker), tvlv_len);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb->data;
+ mcast_tracker->num_dests = htons(num_dests);
+
+ skb_reset_network_header(skb);
+
+ batadv_mcast_forw_skb_push(skb, sizeof(*tvlv_hdr), tvlv_len);
+ tvlv_hdr = (struct batadv_tvlv_hdr *)skb->data;
+ tvlv_hdr->type = BATADV_TVLV_MCAST_TRACKER;
+ tvlv_hdr->version = 1;
+ tvlv_hdr->len = htons(tvlv_value_len);
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_push_tvlvs() - push a multicast tracker TVLV onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the tracker TVLV onto
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes a multicast tracker TVLV onto the given skb, including the collected
+ * destination MAC addresses and the generic TVLV header.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_tvlvs(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int count,
+ unsigned short *tvlv_len)
+{
+ int ret;
+
+ ret = batadv_mcast_forw_push_dests(bat_priv, skb, vid, is_routable,
+ &count, tvlv_len);
+ if (ret < 0)
+ return ret;
+
+ ret = batadv_mcast_forw_push_tracker(skb, count, tvlv_len);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_push_hdr() - push a multicast packet header onto an skb
+ * @skb: the skb to push the header onto
+ * @tvlv_len: the total TVLV length value to set in the header
+ *
+ * Pushes a batman-adv multicast packet header onto the given skb and sets
+ * the provided total TVLV length value in it.
+ *
+ * Caller needs to ensure enough skb headroom is available.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_hdr(struct sk_buff *skb, unsigned short tvlv_len)
+{
+ struct batadv_mcast_packet *mcast_packet;
+
+ if (skb_headroom(skb) < sizeof(*mcast_packet))
+ return -ENOMEM;
+
+ skb_push(skb, sizeof(*mcast_packet));
+
+ mcast_packet = (struct batadv_mcast_packet *)skb->data;
+ mcast_packet->version = BATADV_COMPAT_VERSION;
+ mcast_packet->ttl = BATADV_TTL;
+ mcast_packet->packet_type = BATADV_MCAST;
+ mcast_packet->reserved = 0;
+ mcast_packet->tvlv_len = htons(tvlv_len);
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_scrub_dests() - scrub destinations in a tracker TVLV
+ * @bat_priv: the bat priv with all the soft interface information
+ * @comp_neigh: next hop neighbor to scrub+collect destinations for
+ * @dest: start MAC entry in original skb's tracker TVLV
+ * @next_dest: start MAC entry in to be sent skb's tracker TVLV
+ * @num_dests: number of remaining destination MAC entries to iterate over
+ *
+ * This sorts destination entries into either the original batman-adv
+ * multicast packet or the skb (copy) that is going to be sent to comp_neigh
+ * next.
+ *
+ * In preparation for the next, to be (unicast) transmitted batman-adv multicast
+ * packet skb to be sent to the given neighbor node, tries to collect all
+ * originator MAC addresses that have the given neighbor node as their next hop
+ * in the to be transmitted skb (copy), which next_dest points into. That is we
+ * zero all destination entries in next_dest which do not have comp_neigh as
+ * their next hop. And zero all destination entries in the original skb that
+ * would have comp_neigh as their next hop (to avoid redundant transmissions and
+ * duplicated payload later).
+ */
+static void
+batadv_mcast_forw_scrub_dests(struct batadv_priv *bat_priv,
+ struct batadv_neigh_node *comp_neigh, u8 *dest,
+ u8 *next_dest, u16 num_dests)
+{
+ struct batadv_neigh_node *next_neigh;
+
+ /* skip first entry, this is what we are comparing with */
+ eth_zero_addr(dest);
+ dest += ETH_ALEN;
+ next_dest += ETH_ALEN;
+ num_dests--;
+
+ batadv_mcast_forw_tracker_for_each_dest2(dest, next_dest, num_dests) {
+ if (is_zero_ether_addr(next_dest))
+ continue;
+
+ /* sanity check, we expect unicast destinations */
+ if (is_multicast_ether_addr(next_dest)) {
+ eth_zero_addr(dest);
+ eth_zero_addr(next_dest);
+ continue;
+ }
+
+ next_neigh = batadv_orig_to_router(bat_priv, next_dest, NULL);
+ if (!next_neigh) {
+ eth_zero_addr(next_dest);
+ continue;
+ }
+
+ if (!batadv_compare_eth(next_neigh->addr, comp_neigh->addr)) {
+ eth_zero_addr(next_dest);
+ batadv_neigh_node_put(next_neigh);
+ continue;
+ }
+
+ /* found an entry for our next packet to transmit, so remove it
+ * from the original packet
+ */
+ eth_zero_addr(dest);
+ batadv_neigh_node_put(next_neigh);
+ }
+}
+
+/**
+ * batadv_mcast_forw_shrink_fill() - swap slot with next non-zero destination
+ * @slot: the to be filled zero-MAC destination entry in a tracker TVLV
+ * @num_dests_slot: remaining entries in tracker TVLV from/including slot
+ *
+ * Searches for the next non-zero-MAC destination entry in a tracker TVLV after
+ * the given slot pointer. And if found, swaps it with the zero-MAC destination
+ * entry which the slot points to.
+ *
+ * Return: true if slot was swapped/filled successfully, false otherwise.
+ */
+static bool batadv_mcast_forw_shrink_fill(u8 *slot, u16 num_dests_slot)
+{
+ u16 num_dests_filler;
+ u8 *filler;
+
+ /* sanity check, should not happen */
+ if (!num_dests_slot)
+ return false;
+
+ num_dests_filler = num_dests_slot - 1;
+ filler = slot + ETH_ALEN;
+
+ /* find a candidate to fill the empty slot */
+ batadv_mcast_forw_tracker_for_each_dest(filler, num_dests_filler) {
+ if (is_zero_ether_addr(filler))
+ continue;
+
+ ether_addr_copy(slot, filler);
+ eth_zero_addr(filler);
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * batadv_mcast_forw_shrink_pack_dests() - pack destinations of a tracker TVLV
+ * @skb: the batman-adv multicast packet to compact destinations in
+ *
+ * Compacts the originator destination MAC addresses in the multicast tracker
+ * TVLV of the given multicast packet. This is done by moving all non-zero
+ * MAC addresses in direction of the skb head and all zero MAC addresses in skb
+ * tail direction, within the multicast tracker TVLV.
+ *
+ * Return: The number of consecutive zero MAC address destinations which are
+ * now at the end of the multicast tracker TVLV.
+ */
+static int batadv_mcast_forw_shrink_pack_dests(struct sk_buff *skb)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ unsigned char *skb_net_hdr;
+ u16 num_dests_slot;
+ u8 *slot;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests_slot = ntohs(mcast_tracker->num_dests);
+
+ slot = (u8 *)mcast_tracker + sizeof(*mcast_tracker);
+
+ batadv_mcast_forw_tracker_for_each_dest(slot, num_dests_slot) {
+ /* find an empty slot */
+ if (!is_zero_ether_addr(slot))
+ continue;
+
+ if (!batadv_mcast_forw_shrink_fill(slot, num_dests_slot))
+ /* could not find a filler, so we successfully packed
+ * and can stop - and must not reduce num_dests_slot!
+ */
+ break;
+ }
+
+ /* num_dests_slot is now the amount of reduced, zeroed
+ * destinations at the end of the tracker TVLV
+ */
+ return num_dests_slot;
+}
+
+/**
+ * batadv_mcast_forw_shrink_align_offset() - get new alignment offset
+ * @num_dests_old: the old, to be updated amount of destination nodes
+ * @num_dests_reduce: the number of destinations that were removed
+ *
+ * Calculates the amount of potential extra alignment offset that is needed to
+ * adjust the TVLV padding after the change in destination nodes.
+ *
+ * Return:
+ * 0: If no change to padding is needed.
+ * 2: If padding needs to be removed.
+ * -2: If padding needs to be added.
+ */
+static short
+batadv_mcast_forw_shrink_align_offset(unsigned int num_dests_old,
+ unsigned int num_dests_reduce)
+{
+ /* even amount of removed destinations -> no alignment change */
+ if (!(num_dests_reduce % 2))
+ return 0;
+
+ /* even to odd amount of destinations -> remove padding */
+ if (!(num_dests_old % 2))
+ return 2;
+
+ /* odd to even amount of destinations -> add padding */
+ return -2;
+}
+
+/**
+ * batadv_mcast_forw_shrink_update_headers() - update shrunk mc packet headers
+ * @skb: the batman-adv multicast packet to update headers of
+ * @num_dests_reduce: the number of destinations that were removed
+ *
+ * This updates any fields of a batman-adv multicast packet that are affected
+ * by the reduced number of destinations in the multicast tracket TVLV. In
+ * particular this updates:
+ *
+ * The num_dest field of the multicast tracker TVLV.
+ * The TVLV length field of the according generic TVLV header.
+ * The batman-adv multicast packet's total TVLV length field.
+ *
+ * Return: The offset in skb's tail direction at which the new batman-adv
+ * multicast packet header needs to start.
+ */
+static unsigned int
+batadv_mcast_forw_shrink_update_headers(struct sk_buff *skb,
+ unsigned int num_dests_reduce)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ struct batadv_mcast_packet *mcast_packet;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ unsigned char *skb_net_hdr;
+ unsigned int offset;
+ short align_offset;
+ u16 num_dests;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests = ntohs(mcast_tracker->num_dests);
+
+ align_offset = batadv_mcast_forw_shrink_align_offset(num_dests,
+ num_dests_reduce);
+ offset = ETH_ALEN * num_dests_reduce + align_offset;
+ num_dests -= num_dests_reduce;
+
+ /* update tracker header */
+ mcast_tracker->num_dests = htons(num_dests);
+
+ /* update tracker's tvlv header's length field */
+ tvlv_hdr = (struct batadv_tvlv_hdr *)(skb_network_header(skb) -
+ sizeof(*tvlv_hdr));
+ tvlv_hdr->len = htons(ntohs(tvlv_hdr->len) - offset);
+
+ /* update multicast packet header's tvlv length field */
+ mcast_packet = (struct batadv_mcast_packet *)skb->data;
+ mcast_packet->tvlv_len = htons(ntohs(mcast_packet->tvlv_len) - offset);
+
+ return offset;
+}
+
+/**
+ * batadv_mcast_forw_shrink_move_headers() - move multicast headers by offset
+ * @skb: the batman-adv multicast packet to move headers for
+ * @offset: a non-negative offset to move headers by, towards the skb tail
+ *
+ * Moves the batman-adv multicast packet header, its multicast tracker TVLV and
+ * any TVLVs in between by the given offset in direction towards the tail.
+ */
+static void
+batadv_mcast_forw_shrink_move_headers(struct sk_buff *skb, unsigned int offset)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ unsigned char *skb_net_hdr;
+ unsigned int len;
+ u16 num_dests;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests = ntohs(mcast_tracker->num_dests);
+ len = skb_network_offset(skb) + sizeof(*mcast_tracker);
+ len += num_dests * ETH_ALEN;
+
+ batadv_mcast_forw_scrape(skb, len, offset);
+}
+
+/**
+ * batadv_mcast_forw_shrink_tracker() - remove zero addresses in a tracker tvlv
+ * @skb: the batman-adv multicast packet to (potentially) shrink
+ *
+ * Removes all destinations with a zero MAC addresses (00:00:00:00:00:00) from
+ * the given batman-adv multicast packet's tracker TVLV and updates headers
+ * accordingly to maintain a valid batman-adv multicast packet.
+ */
+static void batadv_mcast_forw_shrink_tracker(struct sk_buff *skb)
+{
+ unsigned int offset;
+ u16 dests_reduced;
+
+ dests_reduced = batadv_mcast_forw_shrink_pack_dests(skb);
+ if (!dests_reduced)
+ return;
+
+ offset = batadv_mcast_forw_shrink_update_headers(skb, dests_reduced);
+ batadv_mcast_forw_shrink_move_headers(skb, offset);
+}
+
+/**
+ * batadv_mcast_forw_packet() - forward a batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the received or locally generated batman-adv multicast packet
+ * @local_xmit: indicates that the packet was locally generated and not received
+ *
+ * Parses the tracker TVLV of a batman-adv multicast packet and forwards the
+ * packet as indicated in this TVLV.
+ *
+ * Caller needs to set the skb network header to the start of the multicast
+ * tracker TVLV (excluding the generic TVLV header) and the skb transport header
+ * to the next byte after this multicast tracker TVLV.
+ *
+ * Caller needs to free the skb.
+ *
+ * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error
+ * code on failure. NET_RX_SUCCESS if the received packet is supposed to be
+ * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise.
+ */
+static int batadv_mcast_forw_packet(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, bool local_xmit)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ struct batadv_neigh_node *neigh_node;
+ unsigned long offset, num_dests_off;
+ struct sk_buff *nexthop_skb;
+ unsigned char *skb_net_hdr;
+ bool local_recv = false;
+ unsigned int tvlv_len;
+ bool xmitted = false;
+ u8 *dest, *next_dest;
+ u16 num_dests;
+ int ret;
+
+ /* (at least) TVLV part needs to be linearized */
+ SKB_LINEAR_ASSERT(skb);
+
+ /* check if num_dests is within skb length */
+ num_dests_off = offsetof(struct batadv_tvlv_mcast_tracker, num_dests);
+ if (num_dests_off > skb_network_header_len(skb))
+ return -EINVAL;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests = ntohs(mcast_tracker->num_dests);
+
+ dest = (u8 *)mcast_tracker + sizeof(*mcast_tracker);
+
+ /* check if full tracker tvlv is within skb length */
+ tvlv_len = sizeof(*mcast_tracker) + ETH_ALEN * num_dests;
+ if (tvlv_len > skb_network_header_len(skb))
+ return -EINVAL;
+
+ /* invalidate checksum: */
+ skb->ip_summed = CHECKSUM_NONE;
+
+ batadv_mcast_forw_tracker_for_each_dest(dest, num_dests) {
+ if (is_zero_ether_addr(dest))
+ continue;
+
+ /* only unicast originator addresses supported */
+ if (is_multicast_ether_addr(dest)) {
+ eth_zero_addr(dest);
+ continue;
+ }
+
+ if (batadv_is_my_mac(bat_priv, dest)) {
+ eth_zero_addr(dest);
+ local_recv = true;
+ continue;
+ }
+
+ neigh_node = batadv_orig_to_router(bat_priv, dest, NULL);
+ if (!neigh_node) {
+ eth_zero_addr(dest);
+ continue;
+ }
+
+ nexthop_skb = skb_copy(skb, GFP_ATOMIC);
+ if (!nexthop_skb) {
+ batadv_neigh_node_put(neigh_node);
+ return -ENOMEM;
+ }
+
+ offset = dest - skb->data;
+ next_dest = nexthop_skb->data + offset;
+
+ batadv_mcast_forw_scrub_dests(bat_priv, neigh_node, dest,
+ next_dest, num_dests);
+ batadv_mcast_forw_shrink_tracker(nexthop_skb);
+
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_TX);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_TX_BYTES,
+ nexthop_skb->len + ETH_HLEN);
+ xmitted = true;
+ ret = batadv_send_unicast_skb(nexthop_skb, neigh_node);
+
+ batadv_neigh_node_put(neigh_node);
+
+ if (ret < 0)
+ return ret;
+ }
+
+ if (xmitted) {
+ if (local_xmit) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_TX_LOCAL);
+ batadv_add_counter(bat_priv,
+ BATADV_CNT_MCAST_TX_LOCAL_BYTES,
+ skb->len -
+ skb_transport_offset(skb));
+ } else {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_FWD);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_FWD_BYTES,
+ skb->len + ETH_HLEN);
+ }
+ }
+
+ if (local_recv)
+ return NET_RX_SUCCESS;
+ else
+ return NET_RX_DROP;
+}
+
+/**
+ * batadv_mcast_forw_tracker_tvlv_handler() - handle an mcast tracker tvlv
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the received batman-adv multicast packet
+ *
+ * Parses the tracker TVLV of an incoming batman-adv multicast packet and
+ * forwards the packet as indicated in this TVLV.
+ *
+ * Caller needs to set the skb network header to the start of the multicast
+ * tracker TVLV (excluding the generic TVLV header) and the skb transport header
+ * to the next byte after this multicast tracker TVLV.
+ *
+ * Caller needs to free the skb.
+ *
+ * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error
+ * code on failure. NET_RX_SUCCESS if the received packet is supposed to be
+ * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise.
+ */
+int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ return batadv_mcast_forw_packet(bat_priv, skb, false);
+}
+
+/**
+ * batadv_mcast_forw_packet_hdrlen() - multicast packet header length
+ * @num_dests: number of destination nodes
+ *
+ * Calculates the total batman-adv multicast packet header length for a given
+ * number of destination nodes (excluding the outer ethernet frame).
+ *
+ * Return: The calculated total batman-adv multicast packet header length.
+ */
+unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests)
+{
+ /**
+ * If the number of destination entries is even then we need to add
+ * two byte padding to the tracker TVLV.
+ */
+ int padding = (!(num_dests % 2)) ? 2 : 0;
+
+ return padding + num_dests * ETH_ALEN +
+ sizeof(struct batadv_tvlv_mcast_tracker) +
+ sizeof(struct batadv_tvlv_hdr) +
+ sizeof(struct batadv_mcast_packet);
+}
+
+/**
+ * batadv_mcast_forw_expand_head() - expand headroom for an mcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to send
+ *
+ * Tries to expand an skb's headroom so that its head to tail is 1298
+ * bytes (minimum IPv6 MTU + vlan ethernet header size) large.
+ *
+ * Return: -EINVAL if the given skb's length is too large or -ENOMEM on memory
+ * allocation failure. Otherwise, on success, zero is returned.
+ */
+static int batadv_mcast_forw_expand_head(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ int hdr_size = VLAN_ETH_HLEN + IPV6_MIN_MTU - skb->len;
+
+ /* TODO: Could be tightened to actual number of destination nodes?
+ * But it's tricky, number of destinations might have increased since
+ * we last checked.
+ */
+ if (hdr_size < 0) {
+ /* batadv_mcast_forw_mode_check_count() should ensure we do not
+ * end up here
+ */
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ if (skb_headroom(skb) < hdr_size &&
+ pskb_expand_head(skb, hdr_size, 0, GFP_ATOMIC) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_push() - encapsulate skb in a batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to encapsulate and send
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ *
+ * Encapsulates the given multicast packet in a batman-adv multicast packet.
+ * A multicast tracker TVLV with destination originator addresses for any node
+ * that signaled interest in it, that is either via the translation table or the
+ * according want-all flags, is attached accordingly.
+ *
+ * Return: true on success, false otherwise.
+ */
+bool batadv_mcast_forw_push(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int count)
+{
+ unsigned short tvlv_len = 0;
+ int ret;
+
+ if (batadv_mcast_forw_expand_head(bat_priv, skb) < 0)
+ goto err;
+
+ skb_reset_transport_header(skb);
+
+ ret = batadv_mcast_forw_push_tvlvs(bat_priv, skb, vid, is_routable,
+ count, &tvlv_len);
+ if (ret < 0)
+ goto err;
+
+ ret = batadv_mcast_forw_push_hdr(skb, tvlv_len);
+ if (ret < 0)
+ goto err;
+
+ return true;
+
+err:
+ if (tvlv_len)
+ skb_pull(skb, tvlv_len);
+
+ return false;
+}
+
+/**
+ * batadv_mcast_forw_mcsend() - send a self prepared batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to encapsulate and send
+ *
+ * Transmits a batman-adv multicast packet that was locally prepared and
+ * consumes/frees it.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure. NET_XMIT_SUCCESS
+ * otherwise.
+ */
+int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ int ret = batadv_mcast_forw_packet(bat_priv, skb, true);
+
+ if (ret < 0) {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+ }
+
+ consume_skb(skb);
+ return NET_XMIT_SUCCESS;
+}
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 0c64d81a7761..1f7ed9d4f6fd 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -7,6 +7,7 @@
#include "netlink.h"
#include "main.h"
+#include <linux/array_size.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/bug.h>
@@ -20,7 +21,6 @@
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
-#include <linux/kernel.h>
#include <linux/limits.h>
#include <linux/list.h>
#include <linux/minmax.h>
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 34903df4fe93..71c143d4b6d0 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -312,6 +312,33 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node,
}
/**
+ * batadv_orig_to_router() - get next hop neighbor to an orig address
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_addr: the originator MAC address to search the best next hop router for
+ * @if_outgoing: the interface where the payload packet has been received or
+ * the OGM should be sent to
+ *
+ * Return: A neighbor node which is the best router towards the given originator
+ * address.
+ */
+struct batadv_neigh_node *
+batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr,
+ struct batadv_hard_iface *if_outgoing)
+{
+ struct batadv_neigh_node *neigh_node;
+ struct batadv_orig_node *orig_node;
+
+ orig_node = batadv_orig_hash_find(bat_priv, orig_addr);
+ if (!orig_node)
+ return NULL;
+
+ neigh_node = batadv_find_router(bat_priv, orig_node, if_outgoing);
+ batadv_orig_node_put(orig_node);
+
+ return neigh_node;
+}
+
+/**
* batadv_orig_ifinfo_get() - find the ifinfo from an orig_node
* @orig_node: the orig node to be queried
* @if_outgoing: the interface for which the ifinfo should be acquired
@@ -942,6 +969,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
#ifdef CONFIG_BATMAN_ADV_MCAST
orig_node->mcast_flags = BATADV_MCAST_WANT_NO_RTR4;
orig_node->mcast_flags |= BATADV_MCAST_WANT_NO_RTR6;
+ orig_node->mcast_flags |= BATADV_MCAST_HAVE_MC_PTYPE_CAPA;
INIT_HLIST_NODE(&orig_node->mcast_want_all_unsnoopables_node);
INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv4_node);
INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv6_node);
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index ea3d69e4e670..db0c55128170 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -36,6 +36,9 @@ void batadv_neigh_node_release(struct kref *ref);
struct batadv_neigh_node *
batadv_orig_router_get(struct batadv_orig_node *orig_node,
const struct batadv_hard_iface *if_outgoing);
+struct batadv_neigh_node *
+batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr,
+ struct batadv_hard_iface *if_outgoing);
struct batadv_neigh_ifinfo *
batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
struct batadv_hard_iface *if_outgoing);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 163cd43c4821..f1061985149f 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1270,3 +1270,73 @@ out:
batadv_orig_node_put(orig_node);
return ret;
}
+
+#ifdef CONFIG_BATMAN_ADV_MCAST
+/**
+ * batadv_recv_mcast_packet() - process received batman-adv multicast packet
+ * @skb: the received batman-adv multicast packet
+ * @recv_if: interface that the skb is received on
+ *
+ * Parses the given, received batman-adv multicast packet. Depending on the
+ * contents of its TVLV forwards it and/or decapsulates it to hand it to the
+ * soft interface.
+ *
+ * Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise.
+ */
+int batadv_recv_mcast_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if)
+{
+ struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+ struct batadv_mcast_packet *mcast_packet;
+ int hdr_size = sizeof(*mcast_packet);
+ unsigned char *tvlv_buff;
+ int ret = NET_RX_DROP;
+ u16 tvlv_buff_len;
+
+ if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
+ goto free_skb;
+
+ /* create a copy of the skb, if needed, to modify it. */
+ if (skb_cow(skb, ETH_HLEN) < 0)
+ goto free_skb;
+
+ /* packet needs to be linearized to access the tvlv content */
+ if (skb_linearize(skb) < 0)
+ goto free_skb;
+
+ mcast_packet = (struct batadv_mcast_packet *)skb->data;
+ if (mcast_packet->ttl-- < 2)
+ goto free_skb;
+
+ tvlv_buff = (unsigned char *)(skb->data + hdr_size);
+ tvlv_buff_len = ntohs(mcast_packet->tvlv_len);
+
+ if (tvlv_buff_len > skb->len - hdr_size)
+ goto free_skb;
+
+ ret = batadv_tvlv_containers_process(bat_priv, BATADV_MCAST, NULL, skb,
+ tvlv_buff, tvlv_buff_len);
+ if (ret >= 0) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_RX);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_BYTES,
+ skb->len + ETH_HLEN);
+ }
+
+ hdr_size += tvlv_buff_len;
+
+ if (ret == NET_RX_SUCCESS && (skb->len - hdr_size >= ETH_HLEN)) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL_BYTES,
+ skb->len - hdr_size);
+
+ batadv_interface_rx(bat_priv->soft_iface, skb, hdr_size, NULL);
+ /* skb was consumed */
+ skb = NULL;
+ }
+
+free_skb:
+ kfree_skb(skb);
+
+ return ret;
+}
+#endif /* CONFIG_BATMAN_ADV_MCAST */
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index afd15b3879f1..e9849f032a24 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -27,6 +27,17 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
struct batadv_hard_iface *iface);
int batadv_recv_bcast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
+#ifdef CONFIG_BATMAN_ADV_MCAST
+int batadv_recv_mcast_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if);
+#else
+static inline int batadv_recv_mcast_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if)
+{
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+#endif
int batadv_recv_unicast_tvlv(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 1bf1232a4f75..89c51b3cf430 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -301,12 +301,13 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
send:
if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) {
- forw_mode = batadv_mcast_forw_mode(bat_priv, skb,
+ forw_mode = batadv_mcast_forw_mode(bat_priv, skb, vid,
&mcast_is_routable);
switch (forw_mode) {
case BATADV_FORW_BCAST:
break;
case BATADV_FORW_UCASTS:
+ case BATADV_FORW_MCAST:
do_bcast = false;
break;
case BATADV_FORW_NONE:
@@ -365,6 +366,8 @@ send:
} else if (forw_mode == BATADV_FORW_UCASTS) {
ret = batadv_mcast_forw_send(bat_priv, skb, vid,
mcast_is_routable);
+ } else if (forw_mode == BATADV_FORW_MCAST) {
+ ret = batadv_mcast_forw_mcsend(bat_priv, skb);
} else {
if (batadv_dat_snoop_outgoing_arp_request(bat_priv,
skb))
@@ -762,6 +765,7 @@ static int batadv_softif_init_late(struct net_device *dev)
atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0);
atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0);
atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
+ atomic_set(&bat_priv->mcast.num_no_mc_ptype_capa, 0);
#endif
atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF);
atomic_set(&bat_priv->gw.bandwidth_down, 100);
@@ -925,6 +929,18 @@ static const struct {
{ "tt_response_rx" },
{ "tt_roam_adv_tx" },
{ "tt_roam_adv_rx" },
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ { "mcast_tx" },
+ { "mcast_tx_bytes" },
+ { "mcast_tx_local" },
+ { "mcast_tx_local_bytes" },
+ { "mcast_rx" },
+ { "mcast_rx_bytes" },
+ { "mcast_rx_local" },
+ { "mcast_rx_local_bytes" },
+ { "mcast_fwd" },
+ { "mcast_fwd_bytes" },
+#endif
#ifdef CONFIG_BATMAN_ADV_DAT
{ "dat_get_tx" },
{ "dat_get_rx" },
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 17d5ea1d8e84..00840d5784fe 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -862,6 +862,70 @@ enum batadv_counters {
*/
BATADV_CNT_TT_ROAM_ADV_RX,
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ /**
+ * @BATADV_CNT_MCAST_TX: transmitted batman-adv multicast packets
+ * counter
+ */
+ BATADV_CNT_MCAST_TX,
+
+ /**
+ * @BATADV_CNT_MCAST_TX_BYTES: transmitted batman-adv multicast packets
+ * bytes counter
+ */
+ BATADV_CNT_MCAST_TX_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_TX_LOCAL: counter for multicast packets which
+ * were locally encapsulated and transmitted as batman-adv multicast
+ * packets
+ */
+ BATADV_CNT_MCAST_TX_LOCAL,
+
+ /**
+ * @BATADV_CNT_MCAST_TX_LOCAL_BYTES: bytes counter for multicast packets
+ * which were locally encapsulated and transmitted as batman-adv
+ * multicast packets
+ */
+ BATADV_CNT_MCAST_TX_LOCAL_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_RX: received batman-adv multicast packet counter
+ */
+ BATADV_CNT_MCAST_RX,
+
+ /**
+ * @BATADV_CNT_MCAST_RX_BYTES: received batman-adv multicast packet
+ * bytes counter
+ */
+ BATADV_CNT_MCAST_RX_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_RX_LOCAL: counter for received batman-adv multicast
+ * packets which were forwarded to the local soft interface
+ */
+ BATADV_CNT_MCAST_RX_LOCAL,
+
+ /**
+ * @BATADV_CNT_MCAST_RX_LOCAL_BYTES: bytes counter for received
+ * batman-adv multicast packets which were forwarded to the local soft
+ * interface
+ */
+ BATADV_CNT_MCAST_RX_LOCAL_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_FWD: counter for received batman-adv multicast
+ * packets which were forwarded to other, neighboring nodes
+ */
+ BATADV_CNT_MCAST_FWD,
+
+ /**
+ * @BATADV_CNT_MCAST_FWD_BYTES: bytes counter for received batman-adv
+ * multicast packets which were forwarded to other, neighboring nodes
+ */
+ BATADV_CNT_MCAST_FWD_BYTES,
+#endif
+
#ifdef CONFIG_BATMAN_ADV_DAT
/**
* @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter
@@ -1279,6 +1343,12 @@ struct batadv_priv_mcast {
atomic_t num_want_all_rtr6;
/**
+ * @num_no_mc_ptype_capa: counter for number of nodes without the
+ * BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag
+ */
+ atomic_t num_no_mc_ptype_capa;
+
+ /**
* @want_lists_lock: lock for protecting modifications to mcasts
* want_all_{unsnoopables,ipv4,ipv6}_list (traversals are rcu-locked)
*/
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 336a76165454..b93464ac3517 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -309,11 +309,14 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (flags & MSG_OOB)
return -EOPNOTSUPP;
+ lock_sock(sk);
+
skb = skb_recv_datagram(sk, flags, &err);
if (!skb) {
if (sk->sk_shutdown & RCV_SHUTDOWN)
- return 0;
+ err = 0;
+ release_sock(sk);
return err;
}
@@ -343,6 +346,8 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
skb_free_datagram(sk, skb);
+ release_sock(sk);
+
if (flags & MSG_TRUNC)
copied = skblen;
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index 2134f92bd7ac..5d698f19868c 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -109,7 +109,7 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr,
struct hci_conn *hcon;
u8 role = out ? HCI_ROLE_MASTER : HCI_ROLE_SLAVE;
- hcon = hci_conn_add(hdev, AMP_LINK, dst, role);
+ hcon = hci_conn_add(hdev, AMP_LINK, dst, role, __next_handle(mgr));
if (!hcon)
return NULL;
@@ -117,7 +117,6 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr,
hcon->state = BT_CONNECT;
hcon->attempt++;
- hcon->handle = __next_handle(mgr);
hcon->remote_id = remote_id;
hcon->amp_mgr = amp_mgr_get(mgr);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 9d5057cef30a..a41d2693f4d8 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -153,6 +153,9 @@ static void hci_conn_cleanup(struct hci_conn *conn)
hci_conn_hash_del(hdev, conn);
+ if (HCI_CONN_HANDLE_UNSET(conn->handle))
+ ida_free(&hdev->unset_handle_ida, conn->handle);
+
if (conn->cleanup)
conn->cleanup(conn);
@@ -169,13 +172,11 @@ static void hci_conn_cleanup(struct hci_conn *conn)
hdev->notify(hdev, HCI_NOTIFY_CONN_DEL);
}
- hci_conn_del_sysfs(conn);
-
debugfs_remove_recursive(conn->debugfs);
- hci_dev_put(hdev);
+ hci_conn_del_sysfs(conn);
- hci_conn_put(conn);
+ hci_dev_put(hdev);
}
static void hci_acl_create_connection(struct hci_conn *conn)
@@ -299,6 +300,13 @@ static int configure_datapath_sync(struct hci_dev *hdev, struct bt_codec *codec)
__u8 vnd_len, *vnd_data = NULL;
struct hci_op_configure_data_path *cmd = NULL;
+ if (!codec->data_path || !hdev->get_codec_config_data)
+ return 0;
+
+ /* Do not take me as error */
+ if (!hdev->get_codec_config_data)
+ return 0;
+
err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len,
&vnd_data);
if (err < 0)
@@ -344,9 +352,7 @@ static int hci_enhanced_setup_sync(struct hci_dev *hdev, void *data)
bt_dev_dbg(hdev, "hcon %p", conn);
- /* for offload use case, codec needs to configured before opening SCO */
- if (conn->codec.data_path)
- configure_datapath_sync(hdev, &conn->codec);
+ configure_datapath_sync(hdev, &conn->codec);
conn->state = BT_CONNECT;
conn->out = true;
@@ -759,6 +765,7 @@ static int terminate_big_sync(struct hci_dev *hdev, void *data)
bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", d->big, d->bis);
+ hci_disable_per_advertising_sync(hdev, d->bis);
hci_remove_ext_adv_instance_sync(hdev, d->bis, NULL);
/* Only terminate BIG if it has been created */
@@ -814,6 +821,17 @@ static int big_terminate_sync(struct hci_dev *hdev, void *data)
return 0;
}
+static void find_bis(struct hci_conn *conn, void *data)
+{
+ struct iso_list_data *d = data;
+
+ /* Ignore if BIG doesn't match */
+ if (d->big != conn->iso_qos.bcast.big)
+ return;
+
+ d->count++;
+}
+
static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *conn)
{
struct iso_list_data *d;
@@ -825,10 +843,27 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c
if (!d)
return -ENOMEM;
+ memset(d, 0, sizeof(*d));
d->big = big;
d->sync_handle = conn->sync_handle;
- d->pa_sync_term = test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags);
- d->big_sync_term = test_and_clear_bit(HCI_CONN_BIG_SYNC, &conn->flags);
+
+ if (test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) {
+ hci_conn_hash_list_flag(hdev, find_bis, ISO_LINK,
+ HCI_CONN_PA_SYNC, d);
+
+ if (!d->count)
+ d->pa_sync_term = true;
+
+ d->count = 0;
+ }
+
+ if (test_and_clear_bit(HCI_CONN_BIG_SYNC, &conn->flags)) {
+ hci_conn_hash_list_flag(hdev, find_bis, ISO_LINK,
+ HCI_CONN_BIG_SYNC, d);
+
+ if (!d->count)
+ d->big_sync_term = true;
+ }
ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d,
terminate_big_destroy);
@@ -864,12 +899,6 @@ static void bis_cleanup(struct hci_conn *conn)
hci_le_terminate_big(hdev, conn);
} else {
- bis = hci_conn_hash_lookup_big_any_dst(hdev,
- conn->iso_qos.bcast.big);
-
- if (bis)
- return;
-
hci_le_big_terminate(hdev, conn->iso_qos.bcast.big,
conn);
}
@@ -928,31 +957,18 @@ static void cis_cleanup(struct hci_conn *conn)
hci_le_remove_cig(hdev, conn->iso_qos.ucast.cig);
}
-static u16 hci_conn_hash_alloc_unset(struct hci_dev *hdev)
+static int hci_conn_hash_alloc_unset(struct hci_dev *hdev)
{
- struct hci_conn_hash *h = &hdev->conn_hash;
- struct hci_conn *c;
- u16 handle = HCI_CONN_HANDLE_MAX + 1;
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(c, &h->list, list) {
- /* Find the first unused handle */
- if (handle == 0xffff || c->handle != handle)
- break;
- handle++;
- }
- rcu_read_unlock();
-
- return handle;
+ return ida_alloc_range(&hdev->unset_handle_ida, HCI_CONN_HANDLE_MAX + 1,
+ U16_MAX, GFP_ATOMIC);
}
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
- u8 role)
+ u8 role, u16 handle)
{
struct hci_conn *conn;
- BT_DBG("%s dst %pMR", hdev->name, dst);
+ bt_dev_dbg(hdev, "dst %pMR handle 0x%4.4x", dst, handle);
conn = kzalloc(sizeof(*conn), GFP_KERNEL);
if (!conn)
@@ -960,7 +976,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
bacpy(&conn->dst, dst);
bacpy(&conn->src, &hdev->bdaddr);
- conn->handle = hci_conn_hash_alloc_unset(hdev);
+ conn->handle = handle;
conn->hdev = hdev;
conn->type = type;
conn->role = role;
@@ -973,6 +989,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
conn->rssi = HCI_RSSI_INVALID;
conn->tx_power = HCI_TX_POWER_INVALID;
conn->max_tx_power = HCI_TX_POWER_INVALID;
+ conn->sync_handle = HCI_SYNC_HANDLE_INVALID;
set_bit(HCI_CONN_POWER_SAVE, &conn->flags);
conn->disc_timeout = HCI_DISCONN_TIMEOUT;
@@ -1044,6 +1061,20 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
return conn;
}
+struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type,
+ bdaddr_t *dst, u8 role)
+{
+ int handle;
+
+ bt_dev_dbg(hdev, "dst %pMR", dst);
+
+ handle = hci_conn_hash_alloc_unset(hdev);
+ if (unlikely(handle < 0))
+ return NULL;
+
+ return hci_conn_add(hdev, type, dst, role, handle);
+}
+
static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason)
{
if (!reason)
@@ -1060,8 +1091,9 @@ static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason)
hci_conn_failed(conn, reason);
break;
case ISO_LINK:
- if (conn->state != BT_CONNECTED &&
- !test_bit(HCI_CONN_CREATE_CIS, &conn->flags))
+ if ((conn->state != BT_CONNECTED &&
+ !test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) ||
+ test_bit(HCI_CONN_BIG_CREATED, &conn->flags))
hci_conn_failed(conn, reason);
break;
}
@@ -1247,6 +1279,12 @@ void hci_conn_failed(struct hci_conn *conn, u8 status)
break;
}
+ /* In case of BIG/PA sync failed, clear conn flags so that
+ * the conns will be correctly cleaned up by ISO layer
+ */
+ test_and_clear_bit(HCI_CONN_BIG_SYNC_FAILED, &conn->flags);
+ test_and_clear_bit(HCI_CONN_PA_SYNC_FAILED, &conn->flags);
+
conn->state = BT_CLOSED;
hci_connect_cfm(conn, status);
hci_conn_del(conn);
@@ -1274,6 +1312,9 @@ u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle)
if (conn->abort_reason)
return conn->abort_reason;
+ if (HCI_CONN_HANDLE_UNSET(conn->handle))
+ ida_free(&hdev->unset_handle_ida, conn->handle);
+
conn->handle = handle;
return 0;
@@ -1381,7 +1422,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
if (conn) {
bacpy(&conn->dst, dst);
} else {
- conn = hci_conn_add(hdev, LE_LINK, dst, role);
+ conn = hci_conn_add_unset(hdev, LE_LINK, dst, role);
if (!conn)
return ERR_PTR(-ENOMEM);
hci_conn_hold(conn);
@@ -1486,6 +1527,18 @@ static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos)
/* Allocate BIS if not set */
if (qos->bcast.bis == BT_ISO_QOS_BIS_UNSET) {
+ if (qos->bcast.big != BT_ISO_QOS_BIG_UNSET) {
+ conn = hci_conn_hash_lookup_big(hdev, qos->bcast.big);
+
+ if (conn) {
+ /* If the BIG handle is already matched to an advertising
+ * handle, do not allocate a new one.
+ */
+ qos->bcast.bis = conn->iso_qos.bcast.bis;
+ return 0;
+ }
+ }
+
/* Find an unused adv set to advertise BIS, skip instance 0x00
* since it is reserved as general purpose set.
*/
@@ -1546,7 +1599,7 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
memcmp(conn->le_per_adv_data, base, base_len)))
return ERR_PTR(-EADDRINUSE);
- conn = hci_conn_add(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
+ conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
if (!conn)
return ERR_PTR(-ENOMEM);
@@ -1590,7 +1643,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
BT_DBG("requesting refresh of dst_addr");
- conn = hci_conn_add(hdev, LE_LINK, dst, HCI_ROLE_MASTER);
+ conn = hci_conn_add_unset(hdev, LE_LINK, dst, HCI_ROLE_MASTER);
if (!conn)
return ERR_PTR(-ENOMEM);
@@ -1627,9 +1680,18 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-EOPNOTSUPP);
}
+ /* Reject outgoing connection to device with same BD ADDR against
+ * CVE-2020-26555
+ */
+ if (!bacmp(&hdev->bdaddr, dst)) {
+ bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n",
+ dst);
+ return ERR_PTR(-ECONNREFUSED);
+ }
+
acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
if (!acl) {
- acl = hci_conn_add(hdev, ACL_LINK, dst, HCI_ROLE_MASTER);
+ acl = hci_conn_add_unset(hdev, ACL_LINK, dst, HCI_ROLE_MASTER);
if (!acl)
return ERR_PTR(-ENOMEM);
}
@@ -1689,7 +1751,7 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
sco = hci_conn_hash_lookup_ba(hdev, type, dst);
if (!sco) {
- sco = hci_conn_add(hdev, type, dst, HCI_ROLE_MASTER);
+ sco = hci_conn_add_unset(hdev, type, dst, HCI_ROLE_MASTER);
if (!sco) {
hci_conn_drop(acl);
return ERR_PTR(-ENOMEM);
@@ -1881,7 +1943,7 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
cis = hci_conn_hash_lookup_cis(hdev, dst, dst_type, qos->ucast.cig,
qos->ucast.cis);
if (!cis) {
- cis = hci_conn_add(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
+ cis = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
if (!cis)
return ERR_PTR(-ENOMEM);
cis->cleanup = cis_cleanup;
@@ -2130,7 +2192,7 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
} pdu;
int err;
- if (num_bis > sizeof(pdu.bis))
+ if (num_bis < 0x01 || num_bis > sizeof(pdu.bis))
return -EINVAL;
err = qos_set_big(hdev, qos);
@@ -2172,7 +2234,17 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 base_len, __u8 *base)
{
struct hci_conn *conn;
+ struct hci_conn *parent;
__u8 eir[HCI_MAX_PER_AD_LENGTH];
+ struct hci_link *link;
+
+ /* Look for any BIS that is open for rebinding */
+ conn = hci_conn_hash_lookup_big_state(hdev, qos->bcast.big, BT_OPEN);
+ if (conn) {
+ memcpy(qos, &conn->iso_qos, sizeof(*qos));
+ conn->state = BT_CONNECTED;
+ return conn;
+ }
if (base_len && base)
base_len = eir_append_service_data(eir, 0, 0x1851,
@@ -2200,6 +2272,20 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
conn->iso_qos = *qos;
conn->state = BT_BOUND;
+ /* Link BISes together */
+ parent = hci_conn_hash_lookup_big(hdev,
+ conn->iso_qos.bcast.big);
+ if (parent && parent != conn) {
+ link = hci_conn_link(parent, conn);
+ if (!link) {
+ hci_conn_drop(conn);
+ return ERR_PTR(-ENOLINK);
+ }
+
+ /* Link takes the refcount */
+ hci_conn_drop(conn);
+ }
+
return conn;
}
@@ -2231,6 +2317,9 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
if (IS_ERR(conn))
return conn;
+ if (conn->state == BT_CONNECTED)
+ return conn;
+
data.big = qos->bcast.big;
data.bis = qos->bcast.bis;
@@ -2365,12 +2454,10 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED,
sizeof(cp), &cp);
- /* If we're already encrypted set the REAUTH_PEND flag,
- * otherwise set the ENCRYPT_PEND.
+ /* Set the ENCRYPT_PEND to trigger encryption after
+ * authentication.
*/
- if (test_bit(HCI_CONN_ENCRYPT, &conn->flags))
- set_bit(HCI_CONN_REAUTH_PEND, &conn->flags);
- else
+ if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
}
@@ -2413,34 +2500,41 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type,
if (!test_bit(HCI_CONN_AUTH, &conn->flags))
goto auth;
- /* An authenticated FIPS approved combination key has sufficient
- * security for security level 4. */
- if (conn->key_type == HCI_LK_AUTH_COMBINATION_P256 &&
- sec_level == BT_SECURITY_FIPS)
- goto encrypt;
-
- /* An authenticated combination key has sufficient security for
- security level 3. */
- if ((conn->key_type == HCI_LK_AUTH_COMBINATION_P192 ||
- conn->key_type == HCI_LK_AUTH_COMBINATION_P256) &&
- sec_level == BT_SECURITY_HIGH)
- goto encrypt;
-
- /* An unauthenticated combination key has sufficient security for
- security level 1 and 2. */
- if ((conn->key_type == HCI_LK_UNAUTH_COMBINATION_P192 ||
- conn->key_type == HCI_LK_UNAUTH_COMBINATION_P256) &&
- (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW))
- goto encrypt;
-
- /* A combination key has always sufficient security for the security
- levels 1 or 2. High security level requires the combination key
- is generated using maximum PIN code length (16).
- For pre 2.1 units. */
- if (conn->key_type == HCI_LK_COMBINATION &&
- (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW ||
- conn->pin_length == 16))
- goto encrypt;
+ switch (conn->key_type) {
+ case HCI_LK_AUTH_COMBINATION_P256:
+ /* An authenticated FIPS approved combination key has
+ * sufficient security for security level 4 or lower.
+ */
+ if (sec_level <= BT_SECURITY_FIPS)
+ goto encrypt;
+ break;
+ case HCI_LK_AUTH_COMBINATION_P192:
+ /* An authenticated combination key has sufficient security for
+ * security level 3 or lower.
+ */
+ if (sec_level <= BT_SECURITY_HIGH)
+ goto encrypt;
+ break;
+ case HCI_LK_UNAUTH_COMBINATION_P192:
+ case HCI_LK_UNAUTH_COMBINATION_P256:
+ /* An unauthenticated combination key has sufficient security
+ * for security level 2 or lower.
+ */
+ if (sec_level <= BT_SECURITY_MEDIUM)
+ goto encrypt;
+ break;
+ case HCI_LK_COMBINATION:
+ /* A combination key has always sufficient security for the
+ * security levels 2 or lower. High security level requires the
+ * combination key is generated using maximum PIN code length
+ * (16). For pre 2.1 units.
+ */
+ if (sec_level <= BT_SECURITY_MEDIUM || conn->pin_length == 16)
+ goto encrypt;
+ break;
+ default:
+ break;
+ }
auth:
if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags))
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index a5992f1b3c9b..65601aa52e0d 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2535,6 +2535,8 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
mutex_init(&hdev->lock);
mutex_init(&hdev->req_lock);
+ ida_init(&hdev->unset_handle_ida);
+
INIT_LIST_HEAD(&hdev->mesh_pending);
INIT_LIST_HEAD(&hdev->mgmt_pending);
INIT_LIST_HEAD(&hdev->reject_list);
@@ -2617,7 +2619,11 @@ int hci_register_dev(struct hci_dev *hdev)
if (id < 0)
return id;
- snprintf(hdev->name, sizeof(hdev->name), "hci%d", id);
+ error = dev_set_name(&hdev->dev, "hci%u", id);
+ if (error)
+ return error;
+
+ hdev->name = dev_name(&hdev->dev);
hdev->id = id;
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
@@ -2639,8 +2645,6 @@ int hci_register_dev(struct hci_dev *hdev)
if (!IS_ERR_OR_NULL(bt_debugfs))
hdev->debugfs = debugfs_create_dir(hdev->name, bt_debugfs);
- dev_set_name(&hdev->dev, "%s", hdev->name);
-
error = device_add(&hdev->dev);
if (error < 0)
goto err_wqueue;
@@ -2784,8 +2788,10 @@ void hci_release_dev(struct hci_dev *hdev)
hci_conn_params_clear_all(hdev);
hci_discovery_filter_clear(hdev);
hci_blocked_keys_clear(hdev);
+ hci_codec_list_clear(&hdev->local_codecs);
hci_dev_unlock(hdev);
+ ida_destroy(&hdev->unset_handle_ida);
ida_simple_remove(&hci_index_ida, hdev->id);
kfree_skb(hdev->sent_cmd);
kfree_skb(hdev->recv_event);
@@ -3418,7 +3424,12 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
if (c->type == type && c->sent) {
bt_dev_err(hdev, "killing stalled connection %pMR",
&c->dst);
+ /* hci_disconnect might sleep, so, we have to release
+ * the RCU read lock before calling it.
+ */
+ rcu_read_unlock();
hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
+ rcu_read_lock();
}
}
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 6b7741f6e95b..233453807b50 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -1046,10 +1046,12 @@ static int min_key_size_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val > hdev->le_max_key_size || val < SMP_MIN_ENC_KEY_SIZE)
+ hci_dev_lock(hdev);
+ if (val > hdev->le_max_key_size || val < SMP_MIN_ENC_KEY_SIZE) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_min_key_size = val;
hci_dev_unlock(hdev);
@@ -1074,10 +1076,12 @@ static int max_key_size_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val > SMP_MAX_ENC_KEY_SIZE || val < hdev->le_min_key_size)
+ hci_dev_lock(hdev);
+ if (val > SMP_MAX_ENC_KEY_SIZE || val < hdev->le_min_key_size) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_max_key_size = val;
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 35f251041eeb..ef8c3bed7361 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -26,6 +26,8 @@
/* Bluetooth HCI event handling. */
#include <asm/unaligned.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -33,6 +35,7 @@
#include "hci_request.h"
#include "hci_debugfs.h"
+#include "hci_codec.h"
#include "a2mp.h"
#include "amp.h"
#include "smp.h"
@@ -513,6 +516,9 @@ static u8 hci_cc_read_class_of_dev(struct hci_dev *hdev, void *data,
{
struct hci_rp_read_class_of_dev *rp = data;
+ if (WARN_ON(!hdev))
+ return HCI_ERROR_UNSPECIFIED;
+
bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
if (rp->status)
@@ -744,9 +750,23 @@ static u8 hci_cc_read_enc_key_size(struct hci_dev *hdev, void *data,
} else {
conn->enc_key_size = rp->key_size;
status = 0;
+
+ if (conn->enc_key_size < hdev->min_enc_key_size) {
+ /* As slave role, the conn->state has been set to
+ * BT_CONNECTED and l2cap conn req might not be received
+ * yet, at this moment the l2cap layer almost does
+ * nothing with the non-zero status.
+ * So we also clear encrypt related bits, and then the
+ * handler of l2cap conn req will get the right secure
+ * state at a later time.
+ */
+ status = HCI_ERROR_AUTH_FAILURE;
+ clear_bit(HCI_CONN_ENCRYPT, &conn->flags);
+ clear_bit(HCI_CONN_AES_CCM, &conn->flags);
+ }
}
- hci_encrypt_cfm(conn, 0);
+ hci_encrypt_cfm(conn, status);
done:
hci_dev_unlock(hdev);
@@ -817,8 +837,6 @@ static u8 hci_cc_write_auth_payload_timeout(struct hci_dev *hdev, void *data,
if (!rp->status)
conn->auth_payload_timeout = get_unaligned_le16(sent + 2);
- hci_encrypt_cfm(conn, 0);
-
unlock:
hci_dev_unlock(hdev);
@@ -2301,7 +2319,8 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
return;
}
- set_bit(HCI_INQUIRY, &hdev->flags);
+ if (hci_sent_cmd_data(hdev, HCI_OP_INQUIRY))
+ set_bit(HCI_INQUIRY, &hdev->flags);
}
static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
@@ -2332,8 +2351,8 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
}
} else {
if (!conn) {
- conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr,
- HCI_ROLE_MASTER);
+ conn = hci_conn_add_unset(hdev, ACL_LINK, &cp->bdaddr,
+ HCI_ROLE_MASTER);
if (!conn)
bt_dev_err(hdev, "no memory for new connection");
}
@@ -3148,8 +3167,8 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
hci_bdaddr_list_lookup_with_flags(&hdev->accept_list,
&ev->bdaddr,
BDADDR_BREDR)) {
- conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr,
- HCI_ROLE_SLAVE);
+ conn = hci_conn_add_unset(hdev, ev->link_type,
+ &ev->bdaddr, HCI_ROLE_SLAVE);
if (!conn) {
bt_dev_err(hdev, "no memory for new conn");
goto unlock;
@@ -3267,6 +3286,16 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "bdaddr %pMR type 0x%x", &ev->bdaddr, ev->link_type);
+ /* Reject incoming connection from device with same BD ADDR against
+ * CVE-2020-26555
+ */
+ if (hdev && !bacmp(&hdev->bdaddr, &ev->bdaddr)) {
+ bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n",
+ &ev->bdaddr);
+ hci_reject_conn(hdev, &ev->bdaddr);
+ return;
+ }
+
mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type,
&flags);
@@ -3304,8 +3333,8 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
conn = hci_conn_hash_lookup_ba(hdev, ev->link_type,
&ev->bdaddr);
if (!conn) {
- conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr,
- HCI_ROLE_SLAVE);
+ conn = hci_conn_add_unset(hdev, ev->link_type, &ev->bdaddr,
+ HCI_ROLE_SLAVE);
if (!conn) {
bt_dev_err(hdev, "no memory for new connection");
goto unlock;
@@ -3471,14 +3500,8 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, void *data,
if (!ev->status) {
clear_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
-
- if (!hci_conn_ssp_enabled(conn) &&
- test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) {
- bt_dev_info(hdev, "re-auth of legacy device is not possible.");
- } else {
- set_bit(HCI_CONN_AUTH, &conn->flags);
- conn->sec_level = conn->pending_sec_level;
- }
+ set_bit(HCI_CONN_AUTH, &conn->flags);
+ conn->sec_level = conn->pending_sec_level;
} else {
if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING)
set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
@@ -3487,7 +3510,6 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, void *data,
}
clear_bit(HCI_CONN_AUTH_PEND, &conn->flags);
- clear_bit(HCI_CONN_REAUTH_PEND, &conn->flags);
if (conn->state == BT_CONFIG) {
if (!ev->status && hci_conn_ssp_enabled(conn)) {
@@ -3670,12 +3692,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data,
cp.handle = cpu_to_le16(conn->handle);
cp.timeout = cpu_to_le16(hdev->auth_payload_timeout);
if (hci_send_cmd(conn->hdev, HCI_OP_WRITE_AUTH_PAYLOAD_TO,
- sizeof(cp), &cp)) {
+ sizeof(cp), &cp))
bt_dev_err(hdev, "write auth payload timeout failed");
- goto notify;
- }
-
- goto unlock;
}
notify:
@@ -4741,6 +4759,15 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, void *data,
if (!conn)
goto unlock;
+ /* Ignore NULL link key against CVE-2020-26555 */
+ if (!crypto_memneq(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) {
+ bt_dev_dbg(hdev, "Ignore NULL link key (ZERO KEY) for %pMR",
+ &ev->bdaddr);
+ hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
+ hci_conn_drop(conn);
+ goto unlock;
+ }
+
hci_conn_hold(conn);
conn->disc_timeout = HCI_DISCONN_TIMEOUT;
hci_conn_drop(conn);
@@ -5273,8 +5300,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn)
* available, then do not declare that OOB data is
* present.
*/
- if (!memcmp(data->rand256, ZERO_KEY, 16) ||
- !memcmp(data->hash256, ZERO_KEY, 16))
+ if (!crypto_memneq(data->rand256, ZERO_KEY, 16) ||
+ !crypto_memneq(data->hash256, ZERO_KEY, 16))
return 0x00;
return 0x02;
@@ -5284,8 +5311,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn)
* not supported by the hardware, then check that if
* P-192 data values are present.
*/
- if (!memcmp(data->rand192, ZERO_KEY, 16) ||
- !memcmp(data->hash192, ZERO_KEY, 16))
+ if (!crypto_memneq(data->rand192, ZERO_KEY, 16) ||
+ !crypto_memneq(data->hash192, ZERO_KEY, 16))
return 0x00;
return 0x01;
@@ -5302,7 +5329,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
- if (!conn)
+ if (!conn || !hci_conn_ssp_enabled(conn))
goto unlock;
hci_conn_hold(conn);
@@ -5549,7 +5576,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
- if (!conn)
+ if (!conn || !hci_conn_ssp_enabled(conn))
goto unlock;
/* Reset the authentication requirement to unknown */
@@ -5868,7 +5895,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
if (status)
goto unlock;
- conn = hci_conn_add(hdev, LE_LINK, bdaddr, role);
+ conn = hci_conn_add_unset(hdev, LE_LINK, bdaddr, role);
if (!conn) {
bt_dev_err(hdev, "no memory for new connection");
goto unlock;
@@ -5930,17 +5957,11 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn->dst_type = ev_bdaddr_type(hdev, conn->dst_type, NULL);
- if (handle > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", handle,
- HCI_CONN_HANDLE_MAX);
- status = HCI_ERROR_INVALID_PARAMETERS;
- }
-
/* All connection failure handling is taken care of by the
* hci_conn_failed function which is triggered by the HCI
* request completion callbacks used for connecting.
*/
- if (status)
+ if (status || hci_conn_set_handle(conn, handle))
goto unlock;
/* Drop the connection if it has been aborted */
@@ -5964,7 +5985,6 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
mgmt_device_connected(hdev, conn, NULL, 0);
conn->sec_level = BT_SECURITY_LOW;
- conn->handle = handle;
conn->state = BT_CONFIG;
/* Store current advertising instance as connection advertising instance
@@ -6581,7 +6601,7 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
struct hci_ev_le_pa_sync_established *ev = data;
int mask = hdev->link_mode;
__u8 flags = 0;
- struct hci_conn *bis;
+ struct hci_conn *pa_sync;
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6598,20 +6618,19 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
if (!(flags & HCI_PROTO_DEFER))
goto unlock;
- /* Add connection to indicate the PA sync event */
- bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY,
- HCI_ROLE_SLAVE);
+ if (ev->status) {
+ /* Add connection to indicate the failed PA sync event */
+ pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY,
+ HCI_ROLE_SLAVE);
- if (!bis)
- goto unlock;
+ if (!pa_sync)
+ goto unlock;
- if (ev->status)
- set_bit(HCI_CONN_PA_SYNC_FAILED, &bis->flags);
- else
- set_bit(HCI_CONN_PA_SYNC, &bis->flags);
+ set_bit(HCI_CONN_PA_SYNC_FAILED, &pa_sync->flags);
- /* Notify connection to iso layer */
- hci_connect_cfm(bis, ev->status);
+ /* Notify iso layer */
+ hci_connect_cfm(pa_sync, ev->status);
+ }
unlock:
hci_dev_unlock(hdev);
@@ -6998,12 +7017,12 @@ static void hci_le_cis_req_evt(struct hci_dev *hdev, void *data,
cis = hci_conn_hash_lookup_handle(hdev, cis_handle);
if (!cis) {
- cis = hci_conn_add(hdev, ISO_LINK, &acl->dst, HCI_ROLE_SLAVE);
+ cis = hci_conn_add(hdev, ISO_LINK, &acl->dst, HCI_ROLE_SLAVE,
+ cis_handle);
if (!cis) {
hci_le_reject_cis(hdev, ev->cis_handle);
goto unlock;
}
- cis->handle = cis_handle;
}
cis->iso_qos.ucast.cig = ev->cig_id;
@@ -7020,6 +7039,14 @@ unlock:
hci_dev_unlock(hdev);
}
+static int hci_iso_term_big_sync(struct hci_dev *hdev, void *data)
+{
+ u8 handle = PTR_UINT(data);
+
+ return hci_le_terminate_big_sync(hdev, handle,
+ HCI_ERROR_LOCAL_HOST_TERM);
+}
+
static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -7064,16 +7091,17 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
rcu_read_lock();
}
+ rcu_read_unlock();
+
if (!ev->status && !i)
/* If no BISes have been connected for the BIG,
* terminate. This is in case all bound connections
* have been closed before the BIG creation
* has completed.
*/
- hci_le_terminate_big_sync(hdev, ev->handle,
- HCI_ERROR_LOCAL_HOST_TERM);
+ hci_cmd_sync_queue(hdev, hci_iso_term_big_sync,
+ UINT_PTR(ev->handle), NULL);
- rcu_read_unlock();
hci_dev_unlock(hdev);
}
@@ -7082,7 +7110,6 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
{
struct hci_evt_le_big_sync_estabilished *ev = data;
struct hci_conn *bis;
- struct hci_conn *pa_sync;
int i;
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -7093,15 +7120,6 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
- if (!ev->status) {
- pa_sync = hci_conn_hash_lookup_pa_sync(hdev, ev->handle);
- if (pa_sync)
- /* Also mark the BIG sync established event on the
- * associated PA sync hcon
- */
- set_bit(HCI_CONN_BIG_SYNC, &pa_sync->flags);
- }
-
for (i = 0; i < ev->num_bis; i++) {
u16 handle = le16_to_cpu(ev->bis[i]);
__le32 interval;
@@ -7109,10 +7127,9 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bis = hci_conn_hash_lookup_handle(hdev, handle);
if (!bis) {
bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY,
- HCI_ROLE_SLAVE);
+ HCI_ROLE_SLAVE, handle);
if (!bis)
continue;
- bis->handle = handle;
}
if (ev->status != 0x42)
@@ -7155,15 +7172,42 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data,
struct hci_evt_le_big_info_adv_report *ev = data;
int mask = hdev->link_mode;
__u8 flags = 0;
+ struct hci_conn *pa_sync;
bt_dev_dbg(hdev, "sync_handle 0x%4.4x", le16_to_cpu(ev->sync_handle));
hci_dev_lock(hdev);
mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags);
- if (!(mask & HCI_LM_ACCEPT))
+ if (!(mask & HCI_LM_ACCEPT)) {
hci_le_pa_term_sync(hdev, ev->sync_handle);
+ goto unlock;
+ }
+
+ if (!(flags & HCI_PROTO_DEFER))
+ goto unlock;
+
+ pa_sync = hci_conn_hash_lookup_pa_sync_handle
+ (hdev,
+ le16_to_cpu(ev->sync_handle));
+ if (pa_sync)
+ goto unlock;
+
+ /* Add connection to indicate the PA sync event */
+ pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY,
+ HCI_ROLE_SLAVE);
+
+ if (!pa_sync)
+ goto unlock;
+
+ pa_sync->sync_handle = le16_to_cpu(ev->sync_handle);
+ set_bit(HCI_CONN_PA_SYNC, &pa_sync->flags);
+
+ /* Notify iso layer */
+ hci_connect_cfm(pa_sync, 0x00);
+
+unlock:
hci_dev_unlock(hdev);
}
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index b9c5a9823837..0be75cf0efed 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -71,7 +71,5 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn);
void hci_req_add_le_passive_scan(struct hci_request *req);
-void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next);
-
void hci_request_setup(struct hci_dev *hdev);
void hci_request_cancel_all(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 5e4f718073b7..3e7cd330d731 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -488,7 +488,8 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
ni->type = hdev->dev_type;
ni->bus = hdev->bus;
bacpy(&ni->bdaddr, &hdev->bdaddr);
- memcpy(ni->name, hdev->name, 8);
+ memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name,
+ strnlen(hdev->name, sizeof(ni->name)), '\0');
opcode = cpu_to_le16(HCI_MON_NEW_INDEX);
break;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 9b93653c6197..a6fc8a2a5c67 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -152,7 +152,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
struct sk_buff *skb;
int err = 0;
- bt_dev_dbg(hdev, "Opcode 0x%4x", opcode);
+ bt_dev_dbg(hdev, "Opcode 0x%4.4x", opcode);
hci_req_init(&req, hdev);
@@ -248,7 +248,7 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
skb = __hci_cmd_sync_sk(hdev, opcode, plen, param, event, timeout, sk);
if (IS_ERR(skb)) {
if (!event)
- bt_dev_err(hdev, "Opcode 0x%4x failed: %ld", opcode,
+ bt_dev_err(hdev, "Opcode 0x%4.4x failed: %ld", opcode,
PTR_ERR(skb));
return PTR_ERR(skb);
}
@@ -348,8 +348,6 @@ static void le_scan_disable(struct work_struct *work)
if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
goto _return;
- cancel_delayed_work(&hdev->le_scan_restart);
-
status = hci_cmd_sync_queue(hdev, scan_disable_sync, NULL, NULL);
if (status) {
bt_dev_err(hdev, "failed to disable LE scan: %d", status);
@@ -397,76 +395,6 @@ _return:
static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val,
u8 filter_dup);
-static int hci_le_scan_restart_sync(struct hci_dev *hdev)
-{
- /* If controller is not scanning we are done. */
- if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
- return 0;
-
- if (hdev->scanning_paused) {
- bt_dev_dbg(hdev, "Scanning is paused for suspend");
- return 0;
- }
-
- hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00);
- return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE,
- LE_SCAN_FILTER_DUP_ENABLE);
-}
-
-static int le_scan_restart_sync(struct hci_dev *hdev, void *data)
-{
- return hci_le_scan_restart_sync(hdev);
-}
-
-static void le_scan_restart(struct work_struct *work)
-{
- struct hci_dev *hdev = container_of(work, struct hci_dev,
- le_scan_restart.work);
- unsigned long timeout, duration, scan_start, now;
- int status;
-
- bt_dev_dbg(hdev, "");
-
- hci_dev_lock(hdev);
-
- status = hci_cmd_sync_queue(hdev, le_scan_restart_sync, NULL, NULL);
- if (status) {
- bt_dev_err(hdev, "failed to restart LE scan: status %d",
- status);
- goto unlock;
- }
-
- if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
- !hdev->discovery.scan_start)
- goto unlock;
-
- /* When the scan was started, hdev->le_scan_disable has been queued
- * after duration from scan_start. During scan restart this job
- * has been canceled, and we need to queue it again after proper
- * timeout, to make sure that scan does not run indefinitely.
- */
- duration = hdev->discovery.scan_duration;
- scan_start = hdev->discovery.scan_start;
- now = jiffies;
- if (now - scan_start <= duration) {
- int elapsed;
-
- if (now >= scan_start)
- elapsed = now - scan_start;
- else
- elapsed = ULONG_MAX - scan_start + now;
-
- timeout = duration - elapsed;
- } else {
- timeout = 0;
- }
-
- queue_delayed_work(hdev->req_workqueue,
- &hdev->le_scan_disable, timeout);
-
-unlock:
- hci_dev_unlock(hdev);
-}
static int reenable_adv_sync(struct hci_dev *hdev, void *data)
{
@@ -635,7 +563,6 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work);
INIT_WORK(&hdev->reenable_adv_work, reenable_adv);
INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable);
- INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart);
INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
}
@@ -1317,7 +1244,7 @@ int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance)
return hci_enable_ext_advertising_sync(hdev, instance);
}
-static int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
+int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
{
struct hci_cp_le_set_per_adv_enable cp;
struct adv_info *adv = NULL;
@@ -3805,12 +3732,14 @@ static int hci_set_event_mask_sync(struct hci_dev *hdev)
if (lmp_bredr_capable(hdev)) {
events[4] |= 0x01; /* Flow Specification Complete */
- /* Don't set Disconnect Complete when suspended as that
- * would wakeup the host when disconnecting due to
- * suspend.
+ /* Don't set Disconnect Complete and mode change when
+ * suspended as that would wakeup the host when disconnecting
+ * due to suspend.
*/
- if (hdev->suspended)
+ if (hdev->suspended) {
events[0] &= 0xef;
+ events[2] &= 0xf7;
+ }
} else {
/* Use a different default for LE-only devices */
memset(events, 0, sizeof(events));
@@ -4269,12 +4198,12 @@ static int hci_le_set_host_feature_sync(struct hci_dev *hdev)
{
struct hci_cp_le_set_host_feature cp;
- if (!iso_capable(hdev))
+ if (!cis_capable(hdev))
return 0;
memset(&cp, 0, sizeof(cp));
- /* Isochronous Channels (Host Support) */
+ /* Connected Isochronous Channels (Host Support) */
cp.bit_number = 32;
cp.bit_value = 1;
@@ -4965,7 +4894,6 @@ int hci_dev_close_sync(struct hci_dev *hdev)
cancel_delayed_work(&hdev->power_off);
cancel_delayed_work(&hdev->ncmd_timer);
cancel_delayed_work(&hdev->le_scan_disable);
- cancel_delayed_work(&hdev->le_scan_restart);
hci_request_cancel_all(hdev);
@@ -5079,6 +5007,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
memset(hdev->eir, 0, sizeof(hdev->eir));
memset(hdev->dev_class, 0, sizeof(hdev->dev_class));
bacpy(&hdev->random_addr, BDADDR_ANY);
+ hci_codec_list_clear(&hdev->local_codecs);
hci_dev_put(hdev);
return err;
@@ -5182,7 +5111,6 @@ int hci_stop_discovery_sync(struct hci_dev *hdev)
if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
cancel_delayed_work(&hdev->le_scan_disable);
- cancel_delayed_work(&hdev->le_scan_restart);
err = hci_scan_disable_sync(hdev);
if (err)
@@ -5236,6 +5164,17 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
if (conn->type == AMP_LINK)
return hci_disconnect_phy_link_sync(hdev, conn->handle, reason);
+ if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) {
+ /* This is a BIS connection, hci_conn_del will
+ * do the necessary cleanup.
+ */
+ hci_dev_lock(hdev);
+ hci_conn_failed(conn, reason);
+ hci_dev_unlock(hdev);
+
+ return 0;
+ }
+
memset(&cp, 0, sizeof(cp));
cp.handle = cpu_to_le16(conn->handle);
cp.reason = reason;
@@ -5373,6 +5312,7 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
{
int err = 0;
u16 handle = conn->handle;
+ bool disconnect = false;
struct hci_conn *c;
switch (conn->state) {
@@ -5387,40 +5327,16 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
err = hci_reject_conn_sync(hdev, conn, reason);
break;
case BT_OPEN:
- hci_dev_lock(hdev);
-
- /* Cleanup bis or pa sync connections */
- if (test_and_clear_bit(HCI_CONN_BIG_SYNC_FAILED, &conn->flags) ||
- test_and_clear_bit(HCI_CONN_PA_SYNC_FAILED, &conn->flags)) {
- hci_conn_failed(conn, reason);
- } else if (test_bit(HCI_CONN_PA_SYNC, &conn->flags) ||
- test_bit(HCI_CONN_BIG_SYNC, &conn->flags)) {
- conn->state = BT_CLOSED;
- hci_disconn_cfm(conn, reason);
- hci_conn_del(conn);
- }
-
- hci_dev_unlock(hdev);
- return 0;
case BT_BOUND:
- hci_dev_lock(hdev);
- hci_conn_failed(conn, reason);
- hci_dev_unlock(hdev);
- return 0;
+ break;
default:
- hci_dev_lock(hdev);
- conn->state = BT_CLOSED;
- hci_disconn_cfm(conn, reason);
- hci_conn_del(conn);
- hci_dev_unlock(hdev);
- return 0;
+ disconnect = true;
+ break;
}
hci_dev_lock(hdev);
- /* Check if the connection hasn't been cleanup while waiting
- * commands to complete.
- */
+ /* Check if the connection has been cleaned up concurrently */
c = hci_conn_hash_lookup_handle(hdev, handle);
if (!c || c != conn) {
err = 0;
@@ -5432,7 +5348,13 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
* or in case of LE it was still scanning so it can be cleanup
* safely.
*/
- hci_conn_failed(conn, reason);
+ if (disconnect) {
+ conn->state = BT_CLOSED;
+ hci_disconn_cfm(conn, reason);
+ hci_conn_del(conn);
+ } else {
+ hci_conn_failed(conn, reason);
+ }
unlock:
hci_dev_unlock(hdev);
@@ -5696,19 +5618,18 @@ static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval)
if (err < 0)
own_addr_type = ADDR_LE_DEV_PUBLIC;
- if (hci_is_adv_monitoring(hdev)) {
+ if (hci_is_adv_monitoring(hdev) ||
+ (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) &&
+ hdev->discovery.result_filtering)) {
/* Duplicate filter should be disabled when some advertisement
* monitor is activated, otherwise AdvMon can only receive one
* advertisement for one peer(*) during active scanning, and
* might report loss to these peers.
*
- * Note that different controllers have different meanings of
- * |duplicate|. Some of them consider packets with the same
- * address as duplicate, and others consider packets with the
- * same address and the same RSSI as duplicate. Although in the
- * latter case we don't need to disable duplicate filter, but
- * it is common to have active scanning for a short period of
- * time, the power impact should be neglectable.
+ * If controller does strict duplicate filtering and the
+ * discovery requires result filtering disables controller based
+ * filtering since that can cause reports that would match the
+ * host filter to not be reported.
*/
filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
}
@@ -5788,17 +5709,6 @@ int hci_start_discovery_sync(struct hci_dev *hdev)
bt_dev_dbg(hdev, "timeout %u ms", jiffies_to_msecs(timeout));
- /* When service discovery is used and the controller has a
- * strict duplicate filter, it is important to remember the
- * start and duration of the scan. This is required for
- * restarting scanning during the discovery phase.
- */
- if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) &&
- hdev->discovery.result_filtering) {
- hdev->discovery.scan_start = jiffies;
- hdev->discovery.scan_duration = timeout;
- }
-
queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable,
timeout);
return 0;
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 15b33579007c..367e32fe30eb 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -35,7 +35,7 @@ void hci_conn_init_sysfs(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
- BT_DBG("conn %p", conn);
+ bt_dev_dbg(hdev, "conn %p", conn);
conn->dev.type = &bt_link;
conn->dev.class = &bt_class;
@@ -48,27 +48,30 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
- BT_DBG("conn %p", conn);
+ bt_dev_dbg(hdev, "conn %p", conn);
if (device_is_registered(&conn->dev))
return;
dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
- if (device_add(&conn->dev) < 0) {
+ if (device_add(&conn->dev) < 0)
bt_dev_err(hdev, "failed to register connection device");
- return;
- }
-
- hci_dev_hold(hdev);
}
void hci_conn_del_sysfs(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
- if (!device_is_registered(&conn->dev))
+ bt_dev_dbg(hdev, "conn %p", conn);
+
+ if (!device_is_registered(&conn->dev)) {
+ /* If device_add() has *not* succeeded, use *only* put_device()
+ * to drop the reference count.
+ */
+ put_device(&conn->dev);
return;
+ }
while (1) {
struct device *dev;
@@ -80,9 +83,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
put_device(dev);
}
- device_del(&conn->dev);
-
- hci_dev_put(hdev);
+ device_unregister(&conn->dev);
}
static void bt_host_release(struct device *dev)
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 16da946f5881..04f6572d35f1 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -14,6 +14,7 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/iso.h>
+#include "eir.h"
static const struct proto_ops iso_sock_ops;
@@ -47,11 +48,13 @@ static void iso_sock_kill(struct sock *sk);
#define EIR_SERVICE_DATA_LENGTH 4
#define BASE_MAX_LENGTH (HCI_MAX_PER_AD_LENGTH - EIR_SERVICE_DATA_LENGTH)
+#define EIR_BAA_SERVICE_UUID 0x1851
/* iso_pinfo flags values */
enum {
BT_SK_BIG_SYNC,
BT_SK_PA_SYNC,
+ BT_SK_PA_SYNC_TERM,
};
struct iso_pinfo {
@@ -77,8 +80,14 @@ static struct bt_iso_qos default_qos;
static bool check_ucast_qos(struct bt_iso_qos *qos);
static bool check_bcast_qos(struct bt_iso_qos *qos);
static bool iso_match_sid(struct sock *sk, void *data);
+static bool iso_match_sync_handle(struct sock *sk, void *data);
static void iso_sock_disconn(struct sock *sk);
+typedef bool (*iso_sock_match_t)(struct sock *sk, void *data);
+
+static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst,
+ iso_sock_match_t match, void *data);
+
/* ---- ISO timers ---- */
#define ISO_CONN_TIMEOUT (HZ * 40)
#define ISO_DISCONN_TIMEOUT (HZ * 2)
@@ -187,10 +196,21 @@ static void iso_chan_del(struct sock *sk, int err)
sock_set_flag(sk, SOCK_ZAPPED);
}
+static bool iso_match_conn_sync_handle(struct sock *sk, void *data)
+{
+ struct hci_conn *hcon = data;
+
+ if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags))
+ return false;
+
+ return hcon->sync_handle == iso_pi(sk)->sync_handle;
+}
+
static void iso_conn_del(struct hci_conn *hcon, int err)
{
struct iso_conn *conn = hcon->iso_data;
struct sock *sk;
+ struct sock *parent;
if (!conn)
return;
@@ -206,6 +226,25 @@ static void iso_conn_del(struct hci_conn *hcon, int err)
if (sk) {
lock_sock(sk);
+
+ /* While a PA sync hcon is in the process of closing,
+ * mark parent socket with a flag, so that any residual
+ * BIGInfo adv reports that arrive before PA sync is
+ * terminated are not processed anymore.
+ */
+ if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
+ parent = iso_get_sock_listen(&hcon->src,
+ &hcon->dst,
+ iso_match_conn_sync_handle,
+ hcon);
+
+ if (parent) {
+ set_bit(BT_SK_PA_SYNC_TERM,
+ &iso_pi(parent)->flags);
+ sock_put(parent);
+ }
+ }
+
iso_sock_clear_timer(sk);
iso_chan_del(sk, err);
release_sock(sk);
@@ -502,7 +541,7 @@ drop:
}
/* -------- Socket interface ---------- */
-static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *ba)
+static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *src, bdaddr_t *dst)
{
struct sock *sk;
@@ -510,7 +549,10 @@ static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *ba)
if (sk->sk_state != BT_LISTEN)
continue;
- if (!bacmp(&iso_pi(sk)->src, ba))
+ if (bacmp(&iso_pi(sk)->dst, dst))
+ continue;
+
+ if (!bacmp(&iso_pi(sk)->src, src))
return sk;
}
@@ -539,8 +581,6 @@ static struct sock *__iso_get_sock_listen_by_sid(bdaddr_t *ba, bdaddr_t *bc,
return NULL;
}
-typedef bool (*iso_sock_match_t)(struct sock *sk, void *data);
-
/* Find socket listening:
* source bdaddr (Unicast)
* destination bdaddr (Broadcast only)
@@ -568,19 +608,68 @@ static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst,
continue;
/* Exact match. */
- if (!bacmp(&iso_pi(sk)->src, src))
+ if (!bacmp(&iso_pi(sk)->src, src)) {
+ sock_hold(sk);
break;
+ }
/* Closest match */
- if (!bacmp(&iso_pi(sk)->src, BDADDR_ANY))
+ if (!bacmp(&iso_pi(sk)->src, BDADDR_ANY)) {
+ if (sk1)
+ sock_put(sk1);
+
sk1 = sk;
+ sock_hold(sk1);
+ }
}
+ if (sk && sk1)
+ sock_put(sk1);
+
read_unlock(&iso_sk_list.lock);
return sk ? sk : sk1;
}
+static struct sock *iso_get_sock_big(struct sock *match_sk, bdaddr_t *src,
+ bdaddr_t *dst, uint8_t big)
+{
+ struct sock *sk = NULL;
+
+ read_lock(&iso_sk_list.lock);
+
+ sk_for_each(sk, &iso_sk_list.head) {
+ if (match_sk == sk)
+ continue;
+
+ /* Look for sockets that have already been
+ * connected to the BIG
+ */
+ if (sk->sk_state != BT_CONNECTED &&
+ sk->sk_state != BT_CONNECT)
+ continue;
+
+ /* Match Broadcast destination */
+ if (bacmp(&iso_pi(sk)->dst, dst))
+ continue;
+
+ /* Match BIG handle */
+ if (iso_pi(sk)->qos.bcast.big != big)
+ continue;
+
+ /* Match source address */
+ if (bacmp(&iso_pi(sk)->src, src))
+ continue;
+
+ sock_hold(sk);
+ break;
+ }
+
+ read_unlock(&iso_sk_list.lock);
+
+ return sk;
+}
+
static void iso_sock_destruct(struct sock *sk)
{
BT_DBG("sk %p", sk);
@@ -633,6 +722,28 @@ static void iso_sock_kill(struct sock *sk)
static void iso_sock_disconn(struct sock *sk)
{
+ struct sock *bis_sk;
+ struct hci_conn *hcon = iso_pi(sk)->conn->hcon;
+
+ if (test_bit(HCI_CONN_BIG_CREATED, &hcon->flags)) {
+ bis_sk = iso_get_sock_big(sk, &iso_pi(sk)->src,
+ &iso_pi(sk)->dst,
+ iso_pi(sk)->qos.bcast.big);
+
+ /* If there are any other connected sockets for the
+ * same BIG, just delete the sk and leave the bis
+ * hcon active, in case later rebinding is needed.
+ */
+ if (bis_sk) {
+ hcon->state = BT_OPEN;
+ iso_pi(sk)->conn->hcon = NULL;
+ iso_sock_clear_timer(sk);
+ iso_chan_del(sk, bt_to_errno(hcon->abort_reason));
+ sock_put(bis_sk);
+ return;
+ }
+ }
+
sk->sk_state = BT_DISCONN;
iso_sock_set_timer(sk, ISO_DISCONN_TIMEOUT);
iso_conn_lock(iso_pi(sk)->conn);
@@ -786,28 +897,75 @@ static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr,
BT_DBG("sk %p bc_sid %u bc_num_bis %u", sk, sa->iso_bc->bc_sid,
sa->iso_bc->bc_num_bis);
- if (addr_len > sizeof(*sa) + sizeof(*sa->iso_bc) ||
- sa->iso_bc->bc_num_bis < 0x01 || sa->iso_bc->bc_num_bis > 0x1f)
+ if (addr_len != sizeof(*sa) + sizeof(*sa->iso_bc))
return -EINVAL;
bacpy(&iso_pi(sk)->dst, &sa->iso_bc->bc_bdaddr);
+
+ /* Check if the address type is of LE type */
+ if (!bdaddr_type_is_le(sa->iso_bc->bc_bdaddr_type))
+ return -EINVAL;
+
iso_pi(sk)->dst_type = sa->iso_bc->bc_bdaddr_type;
iso_pi(sk)->sync_handle = -1;
+
+ if (sa->iso_bc->bc_sid > 0x0f)
+ return -EINVAL;
+
iso_pi(sk)->bc_sid = sa->iso_bc->bc_sid;
+
+ if (sa->iso_bc->bc_num_bis > ISO_MAX_NUM_BIS)
+ return -EINVAL;
+
iso_pi(sk)->bc_num_bis = sa->iso_bc->bc_num_bis;
- for (i = 0; i < iso_pi(sk)->bc_num_bis; i++) {
+ for (i = 0; i < iso_pi(sk)->bc_num_bis; i++)
if (sa->iso_bc->bc_bis[i] < 0x01 ||
sa->iso_bc->bc_bis[i] > 0x1f)
return -EINVAL;
- memcpy(iso_pi(sk)->bc_bis, sa->iso_bc->bc_bis,
- iso_pi(sk)->bc_num_bis);
- }
+ memcpy(iso_pi(sk)->bc_bis, sa->iso_bc->bc_bis,
+ iso_pi(sk)->bc_num_bis);
return 0;
}
+static int iso_sock_bind_pa_sk(struct sock *sk, struct sockaddr_iso *sa,
+ int addr_len)
+{
+ int err = 0;
+
+ if (sk->sk_type != SOCK_SEQPACKET) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ if (addr_len != sizeof(*sa) + sizeof(*sa->iso_bc)) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ if (sa->iso_bc->bc_num_bis > ISO_MAX_NUM_BIS) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ iso_pi(sk)->bc_num_bis = sa->iso_bc->bc_num_bis;
+
+ for (int i = 0; i < iso_pi(sk)->bc_num_bis; i++)
+ if (sa->iso_bc->bc_bis[i] < 0x01 ||
+ sa->iso_bc->bc_bis[i] > 0x1f) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ memcpy(iso_pi(sk)->bc_bis, sa->iso_bc->bc_bis,
+ iso_pi(sk)->bc_num_bis);
+
+done:
+ return err;
+}
+
static int iso_sock_bind(struct socket *sock, struct sockaddr *addr,
int addr_len)
{
@@ -823,6 +981,15 @@ static int iso_sock_bind(struct socket *sock, struct sockaddr *addr,
lock_sock(sk);
+ /* Allow the user to bind a PA sync socket to a number
+ * of BISes to sync to.
+ */
+ if (sk->sk_state == BT_CONNECT2 &&
+ test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
+ err = iso_sock_bind_pa_sk(sk, sa, addr_len);
+ goto done;
+ }
+
if (sk->sk_state != BT_OPEN) {
err = -EBADFD;
goto done;
@@ -952,7 +1119,7 @@ static int iso_listen_cis(struct sock *sk)
write_lock(&iso_sk_list.lock);
- if (__iso_get_sock_listen_by_addr(&iso_pi(sk)->src))
+ if (__iso_get_sock_listen_by_addr(&iso_pi(sk)->src, &iso_pi(sk)->dst))
err = -EADDRINUSE;
write_unlock(&iso_sk_list.lock);
@@ -1199,7 +1366,6 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
test_bit(HCI_CONN_PA_SYNC, &pi->conn->hcon->flags)) {
iso_conn_big_sync(sk);
sk->sk_state = BT_LISTEN;
- set_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags);
} else {
iso_conn_defer_accept(pi->conn->hcon);
sk->sk_state = BT_CONFIG;
@@ -1458,6 +1624,8 @@ static int iso_sock_getsockopt(struct socket *sock, int level, int optname,
len = min_t(unsigned int, len, base_len);
if (copy_to_user(optval, base, len))
err = -EFAULT;
+ if (put_user(len, optlen))
+ err = -EFAULT;
break;
@@ -1576,6 +1744,7 @@ static void iso_conn_ready(struct iso_conn *conn)
struct sock *sk = conn->sk;
struct hci_ev_le_big_sync_estabilished *ev = NULL;
struct hci_ev_le_pa_sync_established *ev2 = NULL;
+ struct hci_evt_le_big_info_adv_report *ev3 = NULL;
struct hci_conn *hcon;
BT_DBG("conn %p", conn);
@@ -1600,14 +1769,20 @@ static void iso_conn_ready(struct iso_conn *conn)
parent = iso_get_sock_listen(&hcon->src,
&hcon->dst,
iso_match_big, ev);
- } else if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags) ||
- test_bit(HCI_CONN_PA_SYNC_FAILED, &hcon->flags)) {
+ } else if (test_bit(HCI_CONN_PA_SYNC_FAILED, &hcon->flags)) {
ev2 = hci_recv_event_data(hcon->hdev,
HCI_EV_LE_PA_SYNC_ESTABLISHED);
if (ev2)
parent = iso_get_sock_listen(&hcon->src,
&hcon->dst,
iso_match_sid, ev2);
+ } else if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags)) {
+ ev3 = hci_recv_event_data(hcon->hdev,
+ HCI_EVT_LE_BIG_INFO_ADV_REPORT);
+ if (ev3)
+ parent = iso_get_sock_listen(&hcon->src,
+ &hcon->dst,
+ iso_match_sync_handle, ev3);
}
if (!parent)
@@ -1647,11 +1822,13 @@ static void iso_conn_ready(struct iso_conn *conn)
hcon->sync_handle = iso_pi(parent)->sync_handle;
}
- if (ev2 && !ev2->status) {
- iso_pi(sk)->sync_handle = iso_pi(parent)->sync_handle;
+ if (ev3) {
iso_pi(sk)->qos = iso_pi(parent)->qos;
+ iso_pi(sk)->qos.bcast.encryption = ev3->encryption;
+ hcon->iso_qos = iso_pi(sk)->qos;
iso_pi(sk)->bc_num_bis = iso_pi(parent)->bc_num_bis;
memcpy(iso_pi(sk)->bc_bis, iso_pi(parent)->bc_bis, ISO_MAX_NUM_BIS);
+ set_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags);
}
bacpy(&iso_pi(sk)->dst, &hcon->dst);
@@ -1679,6 +1856,7 @@ static void iso_conn_ready(struct iso_conn *conn)
parent->sk_data_ready(parent);
release_sock(parent);
+ sock_put(parent);
}
}
@@ -1744,9 +1922,20 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
/* Try to get PA sync listening socket, if it exists */
sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
iso_match_pa_sync_flag, NULL);
- if (!sk)
+
+ if (!sk) {
sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
iso_match_sync_handle, ev2);
+
+ /* If PA Sync is in process of terminating,
+ * do not handle any more BIGInfo adv reports.
+ */
+
+ if (sk && test_bit(BT_SK_PA_SYNC_TERM,
+ &iso_pi(sk)->flags))
+ return lm;
+ }
+
if (sk) {
int err;
@@ -1763,6 +1952,7 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
if (err) {
bt_dev_err(hdev, "hci_le_big_create_sync: %d",
err);
+ sock_put(sk);
sk = NULL;
}
}
@@ -1771,12 +1961,16 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
ev3 = hci_recv_event_data(hdev, HCI_EV_LE_PER_ADV_REPORT);
if (ev3) {
+ size_t base_len = ev3->length;
+ u8 *base;
+
sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
iso_match_sync_handle_pa_report, ev3);
-
- if (sk) {
- memcpy(iso_pi(sk)->base, ev3->data, ev3->length);
- iso_pi(sk)->base_len = ev3->length;
+ base = eir_get_service_data(ev3->data, ev3->length,
+ EIR_BAA_SERVICE_UUID, &base_len);
+ if (base && sk && base_len <= sizeof(iso_pi(sk)->base)) {
+ memcpy(iso_pi(sk)->base, base, base_len);
+ iso_pi(sk)->base_len = base_len;
}
} else {
sk = iso_get_sock_listen(&hdev->bdaddr, BDADDR_ANY, NULL, NULL);
@@ -1791,6 +1985,8 @@ done:
if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags))
*flags |= HCI_PROTO_DEFER;
+ sock_put(sk);
+
return lm;
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 17ca13e8c044..60298975d5c4 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -6492,6 +6492,14 @@ drop:
kfree_skb(skb);
}
+static inline void l2cap_sig_send_rej(struct l2cap_conn *conn, u16 ident)
+{
+ struct l2cap_cmd_rej_unk rej;
+
+ rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD);
+ l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej);
+}
+
static inline void l2cap_sig_channel(struct l2cap_conn *conn,
struct sk_buff *skb)
{
@@ -6517,23 +6525,25 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn,
if (len > skb->len || !cmd->ident) {
BT_DBG("corrupted command");
- break;
+ l2cap_sig_send_rej(conn, cmd->ident);
+ skb_pull(skb, len > skb->len ? skb->len : len);
+ continue;
}
err = l2cap_bredr_sig_cmd(conn, cmd, len, skb->data);
if (err) {
- struct l2cap_cmd_rej_unk rej;
-
BT_ERR("Wrong link type (%d)", err);
-
- rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD);
- l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ,
- sizeof(rej), &rej);
+ l2cap_sig_send_rej(conn, cmd->ident);
}
skb_pull(skb, len);
}
+ if (skb->len > 0) {
+ BT_DBG("corrupted command");
+ l2cap_sig_send_rej(conn, 0);
+ }
+
drop:
kfree_skb(skb);
}
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 3bdfc3f1e73d..e50d3d102078 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1615,7 +1615,7 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan,
return ERR_PTR(-ENOTCONN);
}
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
bt_cb(skb)->l2cap.chan = chan;
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index 53a796ac078c..43aa01fd07b9 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -30,6 +30,15 @@
#include <net/bluetooth/bluetooth.h>
+/**
+ * baswap() - Swaps the order of a bd address
+ * @dst: Pointer to a bdaddr_t struct that will store the swapped
+ * bd address.
+ * @src: Pointer to the bdaddr_t struct to be swapped.
+ *
+ * This function reverses the byte order of a Bluetooth device
+ * address.
+ */
void baswap(bdaddr_t *dst, const bdaddr_t *src)
{
const unsigned char *s = (const unsigned char *)src;
@@ -41,7 +50,19 @@ void baswap(bdaddr_t *dst, const bdaddr_t *src)
}
EXPORT_SYMBOL(baswap);
-/* Bluetooth error codes to Unix errno mapping */
+/**
+ * bt_to_errno() - Bluetooth error codes to standard errno
+ * @code: Bluetooth error code to be converted
+ *
+ * This function takes a Bluetooth error code as input and convets
+ * it to an equivalent Unix/standard errno value.
+ *
+ * Return:
+ *
+ * If the bt error code is known, an equivalent Unix errno value
+ * is returned.
+ * If the given bt error code is not known, ENOSYS is returned.
+ */
int bt_to_errno(__u16 code)
{
switch (code) {
@@ -135,10 +156,22 @@ int bt_to_errno(__u16 code)
}
EXPORT_SYMBOL(bt_to_errno);
-/* Unix errno to Bluetooth error codes mapping */
+/**
+ * bt_status() - Standard errno value to Bluetooth error code
+ * @err: Unix/standard errno value to be converted
+ *
+ * This function converts a standard/Unix errno value to an
+ * equivalent Bluetooth error code.
+ *
+ * Return: Bluetooth error code.
+ *
+ * If the given errno is not found, 0x1f is returned by default
+ * which indicates an unspecified error.
+ * For err >= 0, no conversion is performed, and the same value
+ * is immediately returned.
+ */
__u8 bt_status(int err)
{
- /* Don't convert if already positive value */
if (err >= 0)
return err;
@@ -206,6 +239,10 @@ __u8 bt_status(int err)
}
EXPORT_SYMBOL(bt_status);
+/**
+ * bt_info() - Log Bluetooth information message
+ * @format: Message's format string
+ */
void bt_info(const char *format, ...)
{
struct va_format vaf;
@@ -222,6 +259,10 @@ void bt_info(const char *format, ...)
}
EXPORT_SYMBOL(bt_info);
+/**
+ * bt_warn() - Log Bluetooth warning message
+ * @format: Message's format string
+ */
void bt_warn(const char *format, ...)
{
struct va_format vaf;
@@ -238,6 +279,10 @@ void bt_warn(const char *format, ...)
}
EXPORT_SYMBOL(bt_warn);
+/**
+ * bt_err() - Log Bluetooth error message
+ * @format: Message's format string
+ */
void bt_err(const char *format, ...)
{
struct va_format vaf;
@@ -267,6 +312,10 @@ bool bt_dbg_get(void)
return debug_enable;
}
+/**
+ * bt_dbg() - Log Bluetooth debugging message
+ * @format: Message's format string
+ */
void bt_dbg(const char *format, ...)
{
struct va_format vaf;
@@ -287,6 +336,13 @@ void bt_dbg(const char *format, ...)
EXPORT_SYMBOL(bt_dbg);
#endif
+/**
+ * bt_warn_ratelimited() - Log rate-limited Bluetooth warning message
+ * @format: Message's format string
+ *
+ * This functions works like bt_warn, but it uses rate limiting
+ * to prevent the message from being logged too often.
+ */
void bt_warn_ratelimited(const char *format, ...)
{
struct va_format vaf;
@@ -303,6 +359,13 @@ void bt_warn_ratelimited(const char *format, ...)
}
EXPORT_SYMBOL(bt_warn_ratelimited);
+/**
+ * bt_err_ratelimited() - Log rate-limited Bluetooth error message
+ * @format: Message's format string
+ *
+ * This functions works like bt_err, but it uses rate limiting
+ * to prevent the message from being logged too often.
+ */
void bt_err_ratelimited(const char *format, ...)
{
struct va_format vaf;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index ba2e00646e8e..bb72ff6eb22f 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2897,7 +2897,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
for (i = 0; i < key_count; i++) {
struct mgmt_link_key_info *key = &cp->keys[i];
- if (key->addr.type != BDADDR_BREDR || key->type > 0x08)
+ /* Considering SMP over BREDR/LE, there is no need to check addr_type */
+ if (key->type > 0x08)
return mgmt_cmd_status(sk, hdev->id,
MGMT_OP_LOAD_LINK_KEYS,
MGMT_STATUS_INVALID_PARAMS);
@@ -7130,6 +7131,7 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
for (i = 0; i < irk_count; i++) {
struct mgmt_irk_info *irk = &cp->irks[i];
+ u8 addr_type = le_addr_type(irk->addr.type);
if (hci_is_blocked_key(hdev,
HCI_BLOCKED_KEY_TYPE_IRK,
@@ -7139,8 +7141,12 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
continue;
}
+ /* When using SMP over BR/EDR, the addr type should be set to BREDR */
+ if (irk->addr.type == BDADDR_BREDR)
+ addr_type = BDADDR_BREDR;
+
hci_add_irk(hdev, &irk->addr.bdaddr,
- le_addr_type(irk->addr.type), irk->val,
+ addr_type, irk->val,
BDADDR_ANY);
}
@@ -7221,6 +7227,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
for (i = 0; i < key_count; i++) {
struct mgmt_ltk_info *key = &cp->keys[i];
u8 type, authenticated;
+ u8 addr_type = le_addr_type(key->addr.type);
if (hci_is_blocked_key(hdev,
HCI_BLOCKED_KEY_TYPE_LTK,
@@ -7255,8 +7262,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
continue;
}
+ /* When using SMP over BR/EDR, the addr type should be set to BREDR */
+ if (key->addr.type == BDADDR_BREDR)
+ addr_type = BDADDR_BREDR;
+
hci_add_ltk(hdev, &key->addr.bdaddr,
- le_addr_type(key->addr.type), type, authenticated,
+ addr_type, type, authenticated,
key->val, key->enc_size, key->ediv, key->rand);
}
@@ -9523,7 +9534,7 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key,
ev.store_hint = persistent;
bacpy(&ev.key.addr.bdaddr, &key->bdaddr);
- ev.key.addr.type = BDADDR_BREDR;
+ ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type);
ev.key.type = key->type;
memcpy(ev.key.val, key->val, HCI_LINK_KEY_SIZE);
ev.key.pin_len = key->pin_len;
@@ -9574,7 +9585,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
ev.store_hint = persistent;
bacpy(&ev.key.addr.bdaddr, &key->bdaddr);
- ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type);
+ ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type);
ev.key.type = mgmt_ltk_type(key);
ev.key.enc_size = key->enc_size;
ev.key.ediv = key->ediv;
@@ -9603,7 +9614,7 @@ void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent)
bacpy(&ev.rpa, &irk->rpa);
bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr);
- ev.irk.addr.type = link_to_bdaddr(LE_LINK, irk->addr_type);
+ ev.irk.addr.type = link_to_bdaddr(irk->link_type, irk->addr_type);
memcpy(ev.irk.val, irk->val, sizeof(irk->val));
mgmt_event(MGMT_EV_NEW_IRK, hdev, &ev, sizeof(ev), NULL);
@@ -9632,7 +9643,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk,
ev.store_hint = persistent;
bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr);
- ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type);
+ ev.key.addr.type = link_to_bdaddr(csrk->link_type, csrk->bdaddr_type);
ev.key.type = csrk->type;
memcpy(ev.key.val, csrk->val, sizeof(csrk->val));
@@ -10134,21 +10145,6 @@ static bool eir_has_uuids(u8 *eir, u16 eir_len, u16 uuid_count, u8 (*uuids)[16])
return false;
}
-static void restart_le_scan(struct hci_dev *hdev)
-{
- /* If controller is not scanning we are done. */
- if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
- return;
-
- if (time_after(jiffies + DISCOV_LE_RESTART_DELAY,
- hdev->discovery.scan_start +
- hdev->discovery.scan_duration))
- return;
-
- queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_restart,
- DISCOV_LE_RESTART_DELAY);
-}
-
static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir,
u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len)
{
@@ -10183,8 +10179,6 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir,
* scanning to ensure updated result with updated RSSI values.
*/
if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)) {
- restart_le_scan(hdev);
-
/* Validate RSSI value against the RSSI threshold once more. */
if (hdev->discovery.rssi != HCI_RSSI_INVALID &&
rssi < hdev->discovery.rssi)
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index abbafa6194ca..630e3023273b 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -150,10 +150,7 @@ static bool read_supported_features(struct hci_dev *hdev,
skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp,
HCI_CMD_TIMEOUT);
- if (IS_ERR_OR_NULL(skb)) {
- if (!skb)
- skb = ERR_PTR(-EIO);
-
+ if (IS_ERR(skb)) {
bt_dev_err(hdev, "Failed to read MSFT supported features (%ld)",
PTR_ERR(skb));
return false;
@@ -353,7 +350,7 @@ static void msft_remove_addr_filters_sync(struct hci_dev *hdev, u8 handle)
skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp,
HCI_CMD_TIMEOUT);
- if (IS_ERR_OR_NULL(skb)) {
+ if (IS_ERR(skb)) {
kfree(address_filter);
continue;
}
@@ -442,11 +439,8 @@ static int msft_remove_monitor_sync(struct hci_dev *hdev,
skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp,
HCI_CMD_TIMEOUT);
- if (IS_ERR_OR_NULL(skb)) {
- if (!skb)
- return -EIO;
+ if (IS_ERR(skb))
return PTR_ERR(skb);
- }
return msft_le_cancel_monitor_advertisement_cb(hdev, hdev->msft_opcode,
monitor, skb);
@@ -559,7 +553,7 @@ static int msft_add_monitor_sync(struct hci_dev *hdev,
skb = __hci_cmd_sync(hdev, hdev->msft_opcode, total_size, cp,
HCI_CMD_TIMEOUT);
- if (IS_ERR_OR_NULL(skb)) {
+ if (IS_ERR(skb)) {
err = PTR_ERR(skb);
goto out_free;
}
@@ -740,10 +734,10 @@ static int msft_cancel_address_filter_sync(struct hci_dev *hdev, void *data)
skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp,
HCI_CMD_TIMEOUT);
- if (IS_ERR_OR_NULL(skb)) {
+ if (IS_ERR(skb)) {
bt_dev_err(hdev, "MSFT: Failed to cancel address (%pMR) filter",
&address_filter->bdaddr);
- err = -EIO;
+ err = PTR_ERR(skb);
goto done;
}
kfree_skb(skb);
@@ -893,7 +887,7 @@ static int msft_add_address_filter_sync(struct hci_dev *hdev, void *data)
skb = __hci_cmd_sync(hdev, hdev->msft_opcode, size, cp,
HCI_CMD_TIMEOUT);
- if (IS_ERR_OR_NULL(skb)) {
+ if (IS_ERR(skb)) {
bt_dev_err(hdev, "Failed to enable address %pMR filter",
&address_filter->bdaddr);
skb = NULL;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 94ec913dfb76..69c75c041fe1 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -1041,7 +1041,7 @@ static void rfcomm_tty_flush_buffer(struct tty_struct *tty)
tty_wakeup(tty);
}
-static void rfcomm_tty_send_xchar(struct tty_struct *tty, char ch)
+static void rfcomm_tty_send_xchar(struct tty_struct *tty, u8 ch)
{
BT_DBG("tty %p ch %c", tty, ch);
}
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index f1a9fc0012f0..1e7ea3a4b7ef 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -22,11 +22,10 @@
#include <linux/debugfs.h>
#include <linux/scatterlist.h>
-#include <linux/crypto.h>
#include <crypto/aes.h>
-#include <crypto/algapi.h>
#include <crypto/hash.h>
#include <crypto/kpp.h>
+#include <crypto/utils.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -1060,6 +1059,7 @@ static void smp_notify_keys(struct l2cap_conn *conn)
}
if (smp->remote_irk) {
+ smp->remote_irk->link_type = hcon->type;
mgmt_new_irk(hdev, smp->remote_irk, persistent);
/* Now that user space can be considered to know the
@@ -1079,24 +1079,28 @@ static void smp_notify_keys(struct l2cap_conn *conn)
}
if (smp->csrk) {
+ smp->csrk->link_type = hcon->type;
smp->csrk->bdaddr_type = hcon->dst_type;
bacpy(&smp->csrk->bdaddr, &hcon->dst);
mgmt_new_csrk(hdev, smp->csrk, persistent);
}
if (smp->responder_csrk) {
+ smp->responder_csrk->link_type = hcon->type;
smp->responder_csrk->bdaddr_type = hcon->dst_type;
bacpy(&smp->responder_csrk->bdaddr, &hcon->dst);
mgmt_new_csrk(hdev, smp->responder_csrk, persistent);
}
if (smp->ltk) {
+ smp->ltk->link_type = hcon->type;
smp->ltk->bdaddr_type = hcon->dst_type;
bacpy(&smp->ltk->bdaddr, &hcon->dst);
mgmt_new_ltk(hdev, smp->ltk, persistent);
}
if (smp->responder_ltk) {
+ smp->responder_ltk->link_type = hcon->type;
smp->responder_ltk->bdaddr_type = hcon->dst_type;
bacpy(&smp->responder_ltk->bdaddr, &hcon->dst);
mgmt_new_ltk(hdev, smp->responder_ltk, persistent);
@@ -1116,6 +1120,8 @@ static void smp_notify_keys(struct l2cap_conn *conn)
key = hci_add_link_key(hdev, smp->conn->hcon, &hcon->dst,
smp->link_key, type, 0, &persistent);
if (key) {
+ key->link_type = hcon->type;
+ key->bdaddr_type = hcon->dst_type;
mgmt_new_link_key(hdev, key, persistent);
/* Don't keep debug keys around if the relevant
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index 5918d1b32e19..8906f7bdf4a9 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -12,6 +12,11 @@ extern struct bpf_struct_ops bpf_bpf_dummy_ops;
/* A common type for test_N with return value in bpf_dummy_ops */
typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...);
+static int dummy_ops_test_ret_function(struct bpf_dummy_ops_state *state, ...)
+{
+ return 0;
+}
+
struct bpf_dummy_ops_test_args {
u64 args[MAX_BPF_FUNC_ARGS];
struct bpf_dummy_ops_state state;
@@ -62,7 +67,7 @@ static int dummy_ops_copy_args(struct bpf_dummy_ops_test_args *args)
static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args)
{
- dummy_ops_test_ret_fn test = (void *)image;
+ dummy_ops_test_ret_fn test = (void *)image + cfi_get_offset();
struct bpf_dummy_ops_state *state = NULL;
/* state needs to be NULL if args[0] is 0 */
@@ -101,12 +106,11 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
goto out;
}
- image = bpf_jit_alloc_exec(PAGE_SIZE);
+ image = arch_alloc_bpf_trampoline(PAGE_SIZE);
if (!image) {
err = -ENOMEM;
goto out;
}
- set_vm_flush_reset_perms(image);
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
@@ -120,11 +124,12 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
op_idx = prog->expected_attach_type;
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
&st_ops->func_models[op_idx],
+ &dummy_ops_test_ret_function,
image, image + PAGE_SIZE);
if (err < 0)
goto out;
- set_memory_rox((long)image, 1);
+ arch_protect_bpf_trampoline(image, PAGE_SIZE);
prog_ret = dummy_ops_call_op(image, args);
err = dummy_ops_copy_args(args);
@@ -134,7 +139,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
err = -EFAULT;
out:
kfree(args);
- bpf_jit_free_exec(image);
+ arch_free_bpf_trampoline(image, PAGE_SIZE);
if (link)
bpf_link_put(&link->link);
kfree(tlinks);
@@ -220,6 +225,28 @@ static void bpf_dummy_unreg(void *kdata)
{
}
+static int bpf_dummy_test_1(struct bpf_dummy_ops_state *cb)
+{
+ return 0;
+}
+
+static int bpf_dummy_test_2(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2,
+ char a3, unsigned long a4)
+{
+ return 0;
+}
+
+static int bpf_dummy_test_sleepable(struct bpf_dummy_ops_state *cb)
+{
+ return 0;
+}
+
+static struct bpf_dummy_ops __bpf_bpf_dummy_ops = {
+ .test_1 = bpf_dummy_test_1,
+ .test_2 = bpf_dummy_test_2,
+ .test_sleepable = bpf_dummy_test_sleepable,
+};
+
struct bpf_struct_ops bpf_bpf_dummy_ops = {
.verifier_ops = &bpf_dummy_verifier_ops,
.init = bpf_dummy_init,
@@ -228,4 +255,5 @@ struct bpf_struct_ops bpf_bpf_dummy_ops = {
.reg = bpf_dummy_reg,
.unreg = bpf_dummy_unreg,
.name = "bpf_dummy_ops",
+ .cfi_stubs = &__bpf_bpf_dummy_ops,
};
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 57a7a64b84ed..dfd919374017 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -503,9 +503,8 @@ out:
* architecture dependent calling conventions. 7+ can be supported in the
* future.
*/
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
- "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
+
__bpf_kfunc int bpf_fentry_test1(int a)
{
return a + 1;
@@ -543,6 +542,7 @@ struct bpf_fentry_test_t {
int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
{
+ asm volatile ("": "+r"(arg));
return (long)arg;
}
@@ -600,11 +600,22 @@ __bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
refcount_dec(&p->cnt);
}
+__bpf_kfunc void bpf_kfunc_call_test_release_dtor(void *p)
+{
+ bpf_kfunc_call_test_release(p);
+}
+CFI_NOSEAL(bpf_kfunc_call_test_release_dtor);
+
__bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p)
{
}
-__diag_pop();
+__bpf_kfunc void bpf_kfunc_call_memb_release_dtor(void *p)
+{
+}
+CFI_NOSEAL(bpf_kfunc_call_memb_release_dtor);
+
+__bpf_kfunc_end_defs();
BTF_SET8_START(bpf_test_modify_return_ids)
BTF_ID_FLAGS(func, bpf_modify_return_test)
@@ -1671,9 +1682,9 @@ static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids)
BTF_ID(struct, prog_test_ref_kfunc)
-BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(func, bpf_kfunc_call_test_release_dtor)
BTF_ID(struct, prog_test_member)
-BTF_ID(func, bpf_kfunc_call_memb_release)
+BTF_ID(func, bpf_kfunc_call_memb_release_dtor)
static int __init bpf_prog_test_run_init(void)
{
diff --git a/net/bpfilter/.gitignore b/net/bpfilter/.gitignore
deleted file mode 100644
index f34e85ee8204..000000000000
--- a/net/bpfilter/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-bpfilter_umh
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
deleted file mode 100644
index 3d4a21462458..000000000000
--- a/net/bpfilter/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-menuconfig BPFILTER
- bool "BPF based packet filtering framework (BPFILTER)"
- depends on BPF && INET
- select USERMODE_DRIVER
- help
- This builds experimental bpfilter framework that is aiming to
- provide netfilter compatible functionality via BPF
-
-if BPFILTER
-config BPFILTER_UMH
- tristate "bpfilter kernel module with user mode helper"
- depends on CC_CAN_LINK
- depends on m || CC_CAN_LINK_STATIC
- default m
- help
- This builds bpfilter kernel module with embedded user mode helper
-
- Note: To compile this as built-in, your toolchain must support
- building static binaries, since rootfs isn't mounted at the time
- when __init functions are called and do_execv won't be able to find
- the elf interpreter.
-endif
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
deleted file mode 100644
index cdac82b8c53a..000000000000
--- a/net/bpfilter/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the Linux BPFILTER layer.
-#
-
-userprogs := bpfilter_umh
-bpfilter_umh-objs := main.o
-userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi
-
-ifeq ($(CONFIG_BPFILTER_UMH), y)
-# builtin bpfilter_umh should be linked with -static
-# since rootfs isn't mounted at the time of __init
-# function is called and do_execv won't find elf interpreter
-userldflags += -static
-endif
-
-$(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh
-
-obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o
-bpfilter-objs += bpfilter_kern.o bpfilter_umh_blob.o
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
deleted file mode 100644
index 97e129e3f31c..000000000000
--- a/net/bpfilter/bpfilter_kern.c
+++ /dev/null
@@ -1,136 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/umh.h>
-#include <linux/bpfilter.h>
-#include <linux/sched.h>
-#include <linux/sched/signal.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include "msgfmt.h"
-
-extern char bpfilter_umh_start;
-extern char bpfilter_umh_end;
-
-static void shutdown_umh(void)
-{
- struct umd_info *info = &bpfilter_ops.info;
- struct pid *tgid = info->tgid;
-
- if (tgid) {
- kill_pid(tgid, SIGKILL, 1);
- wait_event(tgid->wait_pidfd, thread_group_exited(tgid));
- umd_cleanup_helper(info);
- }
-}
-
-static void __stop_umh(void)
-{
- if (IS_ENABLED(CONFIG_INET))
- shutdown_umh();
-}
-
-static int bpfilter_send_req(struct mbox_request *req)
-{
- struct mbox_reply reply;
- loff_t pos = 0;
- ssize_t n;
-
- if (!bpfilter_ops.info.tgid)
- return -EFAULT;
- pos = 0;
- n = kernel_write(bpfilter_ops.info.pipe_to_umh, req, sizeof(*req),
- &pos);
- if (n != sizeof(*req)) {
- pr_err("write fail %zd\n", n);
- goto stop;
- }
- pos = 0;
- n = kernel_read(bpfilter_ops.info.pipe_from_umh, &reply, sizeof(reply),
- &pos);
- if (n != sizeof(reply)) {
- pr_err("read fail %zd\n", n);
- goto stop;
- }
- return reply.status;
-stop:
- __stop_umh();
- return -EFAULT;
-}
-
-static int bpfilter_process_sockopt(struct sock *sk, int optname,
- sockptr_t optval, unsigned int optlen,
- bool is_set)
-{
- struct mbox_request req = {
- .is_set = is_set,
- .pid = current->pid,
- .cmd = optname,
- .addr = (uintptr_t)optval.user,
- .len = optlen,
- };
- if (sockptr_is_kernel(optval)) {
- pr_err("kernel access not supported\n");
- return -EFAULT;
- }
- return bpfilter_send_req(&req);
-}
-
-static int start_umh(void)
-{
- struct mbox_request req = { .pid = current->pid };
- int err;
-
- /* fork usermode process */
- err = fork_usermode_driver(&bpfilter_ops.info);
- if (err)
- return err;
- pr_info("Loaded bpfilter_umh pid %d\n", pid_nr(bpfilter_ops.info.tgid));
-
- /* health check that usermode process started correctly */
- if (bpfilter_send_req(&req) != 0) {
- shutdown_umh();
- return -EFAULT;
- }
-
- return 0;
-}
-
-static int __init load_umh(void)
-{
- int err;
-
- err = umd_load_blob(&bpfilter_ops.info,
- &bpfilter_umh_start,
- &bpfilter_umh_end - &bpfilter_umh_start);
- if (err)
- return err;
-
- mutex_lock(&bpfilter_ops.lock);
- err = start_umh();
- if (!err && IS_ENABLED(CONFIG_INET)) {
- bpfilter_ops.sockopt = &bpfilter_process_sockopt;
- bpfilter_ops.start = &start_umh;
- }
- mutex_unlock(&bpfilter_ops.lock);
- if (err)
- umd_unload_blob(&bpfilter_ops.info);
- return err;
-}
-
-static void __exit fini_umh(void)
-{
- mutex_lock(&bpfilter_ops.lock);
- if (IS_ENABLED(CONFIG_INET)) {
- shutdown_umh();
- bpfilter_ops.start = NULL;
- bpfilter_ops.sockopt = NULL;
- }
- mutex_unlock(&bpfilter_ops.lock);
-
- umd_unload_blob(&bpfilter_ops.info);
-}
-module_init(load_umh);
-module_exit(fini_umh);
-MODULE_LICENSE("GPL");
diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S
deleted file mode 100644
index 40311d10d2f2..000000000000
--- a/net/bpfilter/bpfilter_umh_blob.S
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
- .section .init.rodata, "a"
- .global bpfilter_umh_start
-bpfilter_umh_start:
- .incbin "net/bpfilter/bpfilter_umh"
- .global bpfilter_umh_end
-bpfilter_umh_end:
diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c
deleted file mode 100644
index 291a92546246..000000000000
--- a/net/bpfilter/main.c
+++ /dev/null
@@ -1,64 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-#include <sys/uio.h>
-#include <errno.h>
-#include <stdio.h>
-#include <sys/socket.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include "../../include/uapi/linux/bpf.h"
-#include <asm/unistd.h>
-#include "msgfmt.h"
-
-FILE *debug_f;
-
-static int handle_get_cmd(struct mbox_request *cmd)
-{
- switch (cmd->cmd) {
- case 0:
- return 0;
- default:
- break;
- }
- return -ENOPROTOOPT;
-}
-
-static int handle_set_cmd(struct mbox_request *cmd)
-{
- return -ENOPROTOOPT;
-}
-
-static void loop(void)
-{
- while (1) {
- struct mbox_request req;
- struct mbox_reply reply;
- int n;
-
- n = read(0, &req, sizeof(req));
- if (n != sizeof(req)) {
- fprintf(debug_f, "invalid request %d\n", n);
- return;
- }
-
- reply.status = req.is_set ?
- handle_set_cmd(&req) :
- handle_get_cmd(&req);
-
- n = write(1, &reply, sizeof(reply));
- if (n != sizeof(reply)) {
- fprintf(debug_f, "reply failed %d\n", n);
- return;
- }
- }
-}
-
-int main(void)
-{
- debug_f = fopen("/dev/kmsg", "w");
- setvbuf(debug_f, 0, _IOLBF, 0);
- fprintf(debug_f, "<5>Started bpfilter\n");
- loop();
- fclose(debug_f);
- return 0;
-}
diff --git a/net/bpfilter/msgfmt.h b/net/bpfilter/msgfmt.h
deleted file mode 100644
index 98d121c62945..000000000000
--- a/net/bpfilter/msgfmt.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_BPFILTER_MSGFMT_H
-#define _NET_BPFILTER_MSGFMT_H
-
-struct mbox_request {
- __u64 addr;
- __u32 len;
- __u32 is_set;
- __u32 cmd;
- __u32 pid;
-};
-
-struct mbox_reply {
- __u32 status;
-};
-
-#endif
diff --git a/net/bridge/br.c b/net/bridge/br.c
index a6e94ceb7c9a..ac19b797dbec 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -477,3 +477,4 @@ module_exit(br_deinit)
MODULE_LICENSE("GPL");
MODULE_VERSION(BR_VERSION);
MODULE_ALIAS_RTNL_LINK("bridge");
+MODULE_DESCRIPTION("Ethernet bridge driver");
diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c
index 5c4c369f8536..2faab44652e7 100644
--- a/net/bridge/br_cfm_netlink.c
+++ b/net/bridge/br_cfm_netlink.c
@@ -362,7 +362,7 @@ static int br_cc_ccm_tx_parse(struct net_bridge *br, struct nlattr *attr,
memset(&tx_info, 0, sizeof(tx_info));
- instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]);
+ instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE]);
nla_memcpy(&tx_info.dmac.addr,
tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC],
sizeof(tx_info.dmac.addr));
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 9a5ea06236bd..65cee0ad3c1b 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -92,7 +92,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
goto out;
}
- mdst = br_mdb_get(brmctx, skb, vid);
+ mdst = br_mdb_entry_skb_get(brmctx, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst))
br_multicast_flood(mdst, skb, brmctx, false, true);
@@ -471,7 +471,9 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_fdb_get = br_fdb_get,
.ndo_mdb_add = br_mdb_add,
.ndo_mdb_del = br_mdb_del,
+ .ndo_mdb_del_bulk = br_mdb_del_bulk,
.ndo_mdb_dump = br_mdb_dump,
+ .ndo_mdb_get = br_mdb_get,
.ndo_bridge_getlink = br_getlink,
.ndo_bridge_setlink = br_setlink,
.ndo_bridge_dellink = br_dellink,
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index e69a872bfc1d..c622de5eccd0 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -329,11 +329,18 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f,
hlist_del_init_rcu(&f->fdb_node);
rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode,
br_fdb_rht_params);
+ if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &f->flags))
+ atomic_dec(&br->fdb_n_learned);
fdb_notify(br, f, RTM_DELNEIGH, swdev_notify);
call_rcu(&f->rcu, fdb_rcu_free);
}
-/* Delete a local entry if no other port had the same address. */
+/* Delete a local entry if no other port had the same address.
+ *
+ * This function should only be called on entries with BR_FDB_LOCAL set,
+ * so even with BR_FDB_ADDED_BY_USER cleared we never need to increase
+ * the accounting for dynamically learned entries again.
+ */
static void fdb_delete_local(struct net_bridge *br,
const struct net_bridge_port *p,
struct net_bridge_fdb_entry *f)
@@ -388,9 +395,20 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
__u16 vid,
unsigned long flags)
{
+ bool learned = !test_bit(BR_FDB_ADDED_BY_USER, &flags) &&
+ !test_bit(BR_FDB_LOCAL, &flags);
+ u32 max_learned = READ_ONCE(br->fdb_max_learned);
struct net_bridge_fdb_entry *fdb;
int err;
+ if (likely(learned)) {
+ int n_learned = atomic_read(&br->fdb_n_learned);
+
+ if (unlikely(max_learned && n_learned >= max_learned))
+ return NULL;
+ __set_bit(BR_FDB_DYNAMIC_LEARNED, &flags);
+ }
+
fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
if (!fdb)
return NULL;
@@ -407,6 +425,9 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
return NULL;
}
+ if (likely(learned))
+ atomic_inc(&br->fdb_n_learned);
+
hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list);
return fdb;
@@ -661,14 +682,30 @@ static int __fdb_flush_validate_ifindex(const struct net_bridge *br,
return 0;
}
-int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev, u16 vid,
+static const struct nla_policy br_fdb_del_bulk_policy[NDA_MAX + 1] = {
+ [NDA_VLAN] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2),
+ [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
+ [NDA_NDM_STATE_MASK] = { .type = NLA_U16 },
+ [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 },
+};
+
+int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev,
struct netlink_ext_ack *extack)
{
- u8 ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS;
- struct net_bridge_fdb_flush_desc desc = { .vlan_id = vid };
+ struct net_bridge_fdb_flush_desc desc = {};
+ struct ndmsg *ndm = nlmsg_data(nlh);
struct net_bridge_port *p = NULL;
+ struct nlattr *tb[NDA_MAX + 1];
struct net_bridge *br;
+ u8 ndm_flags;
+ int err;
+
+ ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS;
+
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX,
+ br_fdb_del_bulk_policy, extack);
+ if (err)
+ return err;
if (netif_is_bridge_master(dev)) {
br = netdev_priv(dev);
@@ -681,6 +718,9 @@ int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[],
br = p->br;
}
+ if (tb[NDA_VLAN])
+ desc.vlan_id = nla_get_u16(tb[NDA_VLAN]);
+
if (ndm_flags & ~FDB_FLUSH_ALLOWED_NDM_FLAGS) {
NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set");
return -EINVAL;
@@ -703,7 +743,7 @@ int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[],
desc.flags_mask |= __ndm_flags_to_fdb_flags(ndm_flags_mask);
}
if (tb[NDA_IFINDEX]) {
- int err, ifidx = nla_get_s32(tb[NDA_IFINDEX]);
+ int ifidx = nla_get_s32(tb[NDA_IFINDEX]);
err = __fdb_flush_validate_ifindex(br, ifidx, extack);
if (err)
@@ -893,8 +933,12 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
clear_bit(BR_FDB_LOCKED, &fdb->flags);
}
- if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags)))
+ if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags))) {
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
+ if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED,
+ &fdb->flags))
+ atomic_dec(&br->fdb_n_learned);
+ }
if (unlikely(fdb_modified)) {
trace_br_fdb_update(br, source, addr, vid, flags);
fdb_notify(br, fdb, RTM_NEWNEIGH, true);
@@ -1056,7 +1100,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
if (!(flags & NLM_F_CREATE))
return -ENOENT;
- fdb = fdb_create(br, source, addr, vid, 0);
+ fdb = fdb_create(br, source, addr, vid,
+ BIT(BR_FDB_ADDED_BY_USER));
if (!fdb)
return -ENOMEM;
@@ -1069,6 +1114,10 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
WRITE_ONCE(fdb->dst, source);
modified = true;
}
+
+ set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
+ if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags))
+ atomic_dec(&br->fdb_n_learned);
}
if (fdb_to_nud(br, fdb) != state) {
@@ -1100,8 +1149,6 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
if (fdb_handle_notify(fdb, notify))
modified = true;
- set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
-
fdb->used = jiffies;
if (modified) {
if (refresh)
@@ -1445,6 +1492,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
if (!p)
set_bit(BR_FDB_LOCAL, &fdb->flags);
+ if ((swdev_notify || !p) &&
+ test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags))
+ atomic_dec(&br->fdb_n_learned);
+
if (modified)
fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 9d7bc8b96b53..7431f89e897b 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -124,7 +124,7 @@ static int deliver_clone(const struct net_bridge_port *prev,
skb = skb_clone(skb, GFP_ATOMIC);
if (!skb) {
- dev->stats.tx_dropped++;
+ DEV_STATS_INC(dev, tx_dropped);
return -ENOMEM;
}
@@ -268,7 +268,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
skb = skb_copy(skb, GFP_ATOMIC);
if (!skb) {
- dev->stats.tx_dropped++;
+ DEV_STATS_INC(dev, tx_dropped);
return;
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index c34a0b0901b0..f21097e73482 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -175,18 +175,18 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
switch (pkt_type) {
case BR_PKT_MULTICAST:
- mdst = br_mdb_get(brmctx, skb, vid);
+ mdst = br_mdb_entry_skb_get(brmctx, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) {
if ((mdst && mdst->host_joined) ||
br_multicast_is_router(brmctx, skb)) {
local_rcv = true;
- br->dev->stats.multicast++;
+ DEV_STATS_INC(br->dev, multicast);
}
mcast_hit = true;
} else {
local_rcv = true;
- br->dev->stats.multicast++;
+ DEV_STATS_INC(br->dev, multicast);
}
break;
case BR_PKT_UNICAST:
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 7305f5f8215c..bc37e47ad829 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -323,9 +323,6 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
struct net_bridge_mdb_entry *mp;
struct nlattr *nest, *nest2;
- if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
- return 0;
-
nest = nla_nest_start_noflag(skb, MDBA_MDB);
if (nest == NULL)
return -EMSGSIZE;
@@ -453,13 +450,15 @@ cancel:
return -EMSGSIZE;
}
-static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg)
+static size_t rtnl_mdb_nlmsg_pg_size(const struct net_bridge_port_group *pg)
{
- size_t nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) +
- nla_total_size(sizeof(struct br_mdb_entry)) +
- nla_total_size(sizeof(u32));
struct net_bridge_group_src *ent;
- size_t addr_size = 0;
+ size_t nlmsg_size, addr_size = 0;
+
+ /* MDBA_MDB_ENTRY_INFO */
+ nlmsg_size = nla_total_size(sizeof(struct br_mdb_entry)) +
+ /* MDBA_MDB_EATTR_TIMER */
+ nla_total_size(sizeof(u32));
if (!pg)
goto out;
@@ -507,6 +506,17 @@ out:
return nlmsg_size;
}
+static size_t rtnl_mdb_nlmsg_size(const struct net_bridge_port_group *pg)
+{
+ return NLMSG_ALIGN(sizeof(struct br_port_msg)) +
+ /* MDBA_MDB */
+ nla_total_size(0) +
+ /* MDBA_MDB_ENTRY */
+ nla_total_size(0) +
+ /* Port group entry */
+ rtnl_mdb_nlmsg_pg_size(pg);
+}
+
void br_mdb_notify(struct net_device *dev,
struct net_bridge_mdb_entry *mp,
struct net_bridge_port_group *pg,
@@ -1401,3 +1411,294 @@ int br_mdb_del(struct net_device *dev, struct nlattr *tb[],
br_mdb_config_fini(&cfg);
return err;
}
+
+struct br_mdb_flush_desc {
+ u32 port_ifindex;
+ u16 vid;
+ u8 rt_protocol;
+ u8 state;
+ u8 state_mask;
+};
+
+static const struct nla_policy br_mdbe_attrs_del_bulk_pol[MDBE_ATTR_MAX + 1] = {
+ [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC),
+ [MDBE_ATTR_STATE_MASK] = NLA_POLICY_MASK(NLA_U8, MDB_PERMANENT),
+};
+
+static int br_mdb_flush_desc_init(struct br_mdb_flush_desc *desc,
+ struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
+{
+ struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]);
+ struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1];
+ int err;
+
+ desc->port_ifindex = entry->ifindex;
+ desc->vid = entry->vid;
+ desc->state = entry->state;
+
+ if (!tb[MDBA_SET_ENTRY_ATTRS])
+ return 0;
+
+ err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX,
+ tb[MDBA_SET_ENTRY_ATTRS],
+ br_mdbe_attrs_del_bulk_pol, extack);
+ if (err)
+ return err;
+
+ if (mdbe_attrs[MDBE_ATTR_STATE_MASK])
+ desc->state_mask = nla_get_u8(mdbe_attrs[MDBE_ATTR_STATE_MASK]);
+
+ if (mdbe_attrs[MDBE_ATTR_RTPROT])
+ desc->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]);
+
+ return 0;
+}
+
+static void br_mdb_flush_host(struct net_bridge *br,
+ struct net_bridge_mdb_entry *mp,
+ const struct br_mdb_flush_desc *desc)
+{
+ u8 state;
+
+ if (desc->port_ifindex && desc->port_ifindex != br->dev->ifindex)
+ return;
+
+ if (desc->rt_protocol)
+ return;
+
+ state = br_group_is_l2(&mp->addr) ? MDB_PERMANENT : 0;
+ if (desc->state_mask && (state & desc->state_mask) != desc->state)
+ return;
+
+ br_multicast_host_leave(mp, true);
+ if (!mp->ports && netif_running(br->dev))
+ mod_timer(&mp->timer, jiffies);
+}
+
+static void br_mdb_flush_pgs(struct net_bridge *br,
+ struct net_bridge_mdb_entry *mp,
+ const struct br_mdb_flush_desc *desc)
+{
+ struct net_bridge_port_group __rcu **pp;
+ struct net_bridge_port_group *p;
+
+ for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;) {
+ u8 state;
+
+ if (desc->port_ifindex &&
+ desc->port_ifindex != p->key.port->dev->ifindex) {
+ pp = &p->next;
+ continue;
+ }
+
+ if (desc->rt_protocol && desc->rt_protocol != p->rt_protocol) {
+ pp = &p->next;
+ continue;
+ }
+
+ state = p->flags & MDB_PG_FLAGS_PERMANENT ? MDB_PERMANENT : 0;
+ if (desc->state_mask &&
+ (state & desc->state_mask) != desc->state) {
+ pp = &p->next;
+ continue;
+ }
+
+ br_multicast_del_pg(mp, p, pp);
+ }
+}
+
+static void br_mdb_flush(struct net_bridge *br,
+ const struct br_mdb_flush_desc *desc)
+{
+ struct net_bridge_mdb_entry *mp;
+
+ spin_lock_bh(&br->multicast_lock);
+
+ /* Safe variant is not needed because entries are removed from the list
+ * upon group timer expiration or bridge deletion.
+ */
+ hlist_for_each_entry(mp, &br->mdb_list, mdb_node) {
+ if (desc->vid && desc->vid != mp->addr.vid)
+ continue;
+
+ br_mdb_flush_host(br, mp, desc);
+ br_mdb_flush_pgs(br, mp, desc);
+ }
+
+ spin_unlock_bh(&br->multicast_lock);
+}
+
+int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct br_mdb_flush_desc desc = {};
+ int err;
+
+ err = br_mdb_flush_desc_init(&desc, tb, extack);
+ if (err)
+ return err;
+
+ br_mdb_flush(br, &desc);
+
+ return 0;
+}
+
+static const struct nla_policy br_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = {
+ [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
+ sizeof(struct in_addr),
+ sizeof(struct in6_addr)),
+};
+
+static int br_mdb_get_parse(struct net_device *dev, struct nlattr *tb[],
+ struct br_ip *group, struct netlink_ext_ack *extack)
+{
+ struct br_mdb_entry *entry = nla_data(tb[MDBA_GET_ENTRY]);
+ struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1];
+ int err;
+
+ if (!tb[MDBA_GET_ENTRY_ATTRS]) {
+ __mdb_entry_to_br_ip(entry, group, NULL);
+ return 0;
+ }
+
+ err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX,
+ tb[MDBA_GET_ENTRY_ATTRS], br_mdbe_attrs_get_pol,
+ extack);
+ if (err)
+ return err;
+
+ if (mdbe_attrs[MDBE_ATTR_SOURCE] &&
+ !is_valid_mdb_source(mdbe_attrs[MDBE_ATTR_SOURCE],
+ entry->addr.proto, extack))
+ return -EINVAL;
+
+ __mdb_entry_to_br_ip(entry, group, mdbe_attrs);
+
+ return 0;
+}
+
+static struct sk_buff *
+br_mdb_get_reply_alloc(const struct net_bridge_mdb_entry *mp)
+{
+ struct net_bridge_port_group *pg;
+ size_t nlmsg_size;
+
+ nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) +
+ /* MDBA_MDB */
+ nla_total_size(0) +
+ /* MDBA_MDB_ENTRY */
+ nla_total_size(0);
+
+ if (mp->host_joined)
+ nlmsg_size += rtnl_mdb_nlmsg_pg_size(NULL);
+
+ for (pg = mlock_dereference(mp->ports, mp->br); pg;
+ pg = mlock_dereference(pg->next, mp->br))
+ nlmsg_size += rtnl_mdb_nlmsg_pg_size(pg);
+
+ return nlmsg_new(nlmsg_size, GFP_ATOMIC);
+}
+
+static int br_mdb_get_reply_fill(struct sk_buff *skb,
+ struct net_bridge_mdb_entry *mp, u32 portid,
+ u32 seq)
+{
+ struct nlattr *mdb_nest, *mdb_entry_nest;
+ struct net_bridge_port_group *pg;
+ struct br_port_msg *bpm;
+ struct nlmsghdr *nlh;
+ int err;
+
+ nlh = nlmsg_put(skb, portid, seq, RTM_NEWMDB, sizeof(*bpm), 0);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ bpm = nlmsg_data(nlh);
+ memset(bpm, 0, sizeof(*bpm));
+ bpm->family = AF_BRIDGE;
+ bpm->ifindex = mp->br->dev->ifindex;
+ mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB);
+ if (!mdb_nest) {
+ err = -EMSGSIZE;
+ goto cancel;
+ }
+ mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY);
+ if (!mdb_entry_nest) {
+ err = -EMSGSIZE;
+ goto cancel;
+ }
+
+ if (mp->host_joined) {
+ err = __mdb_fill_info(skb, mp, NULL);
+ if (err)
+ goto cancel;
+ }
+
+ for (pg = mlock_dereference(mp->ports, mp->br); pg;
+ pg = mlock_dereference(pg->next, mp->br)) {
+ err = __mdb_fill_info(skb, mp, pg);
+ if (err)
+ goto cancel;
+ }
+
+ nla_nest_end(skb, mdb_entry_nest);
+ nla_nest_end(skb, mdb_nest);
+ nlmsg_end(skb, nlh);
+
+ return 0;
+
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return err;
+}
+
+int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct net_bridge_mdb_entry *mp;
+ struct sk_buff *skb;
+ struct br_ip group;
+ int err;
+
+ err = br_mdb_get_parse(dev, tb, &group, extack);
+ if (err)
+ return err;
+
+ /* Hold the multicast lock to ensure that the MDB entry does not change
+ * between the time the reply size is determined and when the reply is
+ * filled in.
+ */
+ spin_lock_bh(&br->multicast_lock);
+
+ mp = br_mdb_ip_get(br, &group);
+ if (!mp) {
+ NL_SET_ERR_MSG_MOD(extack, "MDB entry not found");
+ err = -ENOENT;
+ goto unlock;
+ }
+
+ skb = br_mdb_get_reply_alloc(mp);
+ if (!skb) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ err = br_mdb_get_reply_fill(skb, mp, portid, seq);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to fill MDB get reply");
+ goto free;
+ }
+
+ spin_unlock_bh(&br->multicast_lock);
+
+ return rtnl_unicast(skb, dev_net(dev), portid);
+
+free:
+ kfree_skb(skb);
+unlock:
+ spin_unlock_bh(&br->multicast_lock);
+ return err;
+}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 96d1fc78dd39..2d7b73242958 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -145,8 +145,9 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br,
}
#endif
-struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
- struct sk_buff *skb, u16 vid)
+struct net_bridge_mdb_entry *
+br_mdb_entry_skb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb,
+ u16 vid)
{
struct net_bridge *br = brmctx->br;
struct br_ip ip;
@@ -1761,6 +1762,10 @@ static void br_ip6_multicast_querier_expired(struct timer_list *t)
}
#endif
+static void br_multicast_query_delay_expired(struct timer_list *t)
+{
+}
+
static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx,
struct br_ip *ip,
struct sk_buff *skb)
@@ -3197,7 +3202,7 @@ br_multicast_update_query_timer(struct net_bridge_mcast *brmctx,
unsigned long max_delay)
{
if (!timer_pending(&query->timer))
- query->delay_time = jiffies + max_delay;
+ mod_timer(&query->delay_timer, jiffies + max_delay);
mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval);
}
@@ -4040,13 +4045,11 @@ void br_multicast_ctx_init(struct net_bridge *br,
brmctx->multicast_querier_interval = 255 * HZ;
brmctx->multicast_membership_interval = 260 * HZ;
- brmctx->ip4_other_query.delay_time = 0;
brmctx->ip4_querier.port_ifidx = 0;
seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock);
brmctx->multicast_igmp_version = 2;
#if IS_ENABLED(CONFIG_IPV6)
brmctx->multicast_mld_version = 1;
- brmctx->ip6_other_query.delay_time = 0;
brmctx->ip6_querier.port_ifidx = 0;
seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock);
#endif
@@ -4055,6 +4058,8 @@ void br_multicast_ctx_init(struct net_bridge *br,
br_ip4_multicast_local_router_expired, 0);
timer_setup(&brmctx->ip4_other_query.timer,
br_ip4_multicast_querier_expired, 0);
+ timer_setup(&brmctx->ip4_other_query.delay_timer,
+ br_multicast_query_delay_expired, 0);
timer_setup(&brmctx->ip4_own_query.timer,
br_ip4_multicast_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
@@ -4062,6 +4067,8 @@ void br_multicast_ctx_init(struct net_bridge *br,
br_ip6_multicast_local_router_expired, 0);
timer_setup(&brmctx->ip6_other_query.timer,
br_ip6_multicast_querier_expired, 0);
+ timer_setup(&brmctx->ip6_other_query.delay_timer,
+ br_multicast_query_delay_expired, 0);
timer_setup(&brmctx->ip6_own_query.timer,
br_ip6_multicast_query_expired, 0);
#endif
@@ -4196,10 +4203,12 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx)
{
del_timer_sync(&brmctx->ip4_mc_router_timer);
del_timer_sync(&brmctx->ip4_other_query.timer);
+ del_timer_sync(&brmctx->ip4_other_query.delay_timer);
del_timer_sync(&brmctx->ip4_own_query.timer);
#if IS_ENABLED(CONFIG_IPV6)
del_timer_sync(&brmctx->ip6_mc_router_timer);
del_timer_sync(&brmctx->ip6_other_query.timer);
+ del_timer_sync(&brmctx->ip6_other_query.delay_timer);
del_timer_sync(&brmctx->ip6_own_query.timer);
#endif
}
@@ -4642,13 +4651,15 @@ int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val)
max_delay = brmctx->multicast_query_response_interval;
if (!timer_pending(&brmctx->ip4_other_query.timer))
- brmctx->ip4_other_query.delay_time = jiffies + max_delay;
+ mod_timer(&brmctx->ip4_other_query.delay_timer,
+ jiffies + max_delay);
br_multicast_start_querier(brmctx, &brmctx->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
if (!timer_pending(&brmctx->ip6_other_query.timer))
- brmctx->ip6_other_query.delay_time = jiffies + max_delay;
+ mod_timer(&brmctx->ip6_other_query.delay_timer,
+ jiffies + max_delay);
br_multicast_start_querier(brmctx, &brmctx->ip6_own_query);
#endif
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 15186247b59a..ed1720890757 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -279,8 +279,17 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_
if ((READ_ONCE(neigh->nud_state) & NUD_CONNECTED) &&
READ_ONCE(neigh->hh.hh_len)) {
+ struct net_device *br_indev;
+
+ br_indev = nf_bridge_get_physindev(skb, net);
+ if (!br_indev) {
+ neigh_release(neigh);
+ goto free_skb;
+ }
+
neigh_hh_bridge(&neigh->hh, skb);
- skb->dev = nf_bridge->physindev;
+ skb->dev = br_indev;
+
ret = br_handle_frame_finish(net, sk, skb);
} else {
/* the neighbour function below overwrites the complete
@@ -294,7 +303,7 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_
/* tell br_dev_xmit to continue with forwarding */
nf_bridge->bridged_dnat = 1;
/* FIXME Need to refragment */
- ret = neigh->output(neigh, skb);
+ ret = READ_ONCE(neigh->output)(neigh, skb);
}
neigh_release(neigh);
return ret;
@@ -352,12 +361,18 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
*/
static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb->dev;
+ struct net_device *dev = skb->dev, *br_indev;
struct iphdr *iph = ip_hdr(skb);
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
int err;
+ br_indev = nf_bridge_get_physindev(skb, net);
+ if (!br_indev) {
+ kfree_skb(skb);
+ return 0;
+ }
+
nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
if (nf_bridge->pkt_otherhost) {
@@ -397,7 +412,7 @@ free_skb:
} else {
if (skb_dst(skb)->dev == dev) {
bridged_dnat:
- skb->dev = nf_bridge->physindev;
+ skb->dev = br_indev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
br_nf_hook_thresh(NF_BR_PRE_ROUTING,
@@ -410,7 +425,7 @@ bridged_dnat:
skb->pkt_type = PACKET_HOST;
}
} else {
- rt = bridge_parent_rtable(nf_bridge->physindev);
+ rt = bridge_parent_rtable(br_indev);
if (!rt) {
kfree_skb(skb);
return 0;
@@ -419,7 +434,7 @@ bridged_dnat:
skb_dst_set_noref(skb, &rt->dst);
}
- skb->dev = nf_bridge->physindev;
+ skb->dev = br_indev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL,
@@ -456,7 +471,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net)
}
nf_bridge->in_prerouting = 1;
- nf_bridge->physindev = skb->dev;
+ nf_bridge->physinif = skb->dev->ifindex;
skb->dev = brnf_get_logical_dev(skb, skb->dev, net);
if (skb->protocol == htons(ETH_P_8021Q))
@@ -486,11 +501,11 @@ static unsigned int br_nf_pre_routing(void *priv,
struct brnf_net *brnet;
if (unlikely(!pskb_may_pull(skb, len)))
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0);
p = br_port_get_rcu(state->in);
if (p == NULL)
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
br = p->br;
brnet = net_generic(state->net, brnf_net_id);
@@ -501,7 +516,7 @@ static unsigned int br_nf_pre_routing(void *priv,
return NF_ACCEPT;
if (!ipv6_mod_enabled()) {
pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported.");
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_IPV6DISABLED, 0);
}
nf_bridge_pull_encap_header_rcsum(skb);
@@ -518,12 +533,12 @@ static unsigned int br_nf_pre_routing(void *priv,
nf_bridge_pull_encap_header_rcsum(skb);
if (br_validate_ipv4(state->net, skb))
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
if (!nf_bridge_alloc(skb))
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
if (!setup_pre_routing(skb, state->net))
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
nf_bridge = nf_bridge_info_get(skb);
nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr;
@@ -553,7 +568,11 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff
if (skb->protocol == htons(ETH_P_IPV6))
nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
- in = nf_bridge->physindev;
+ in = nf_bridge_get_physindev(skb, net);
+ if (!in) {
+ kfree_skb(skb);
+ return 0;
+ }
if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
@@ -570,18 +589,12 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff
}
-/* This is the 'purely bridged' case. For IP, we pass the packet to
- * netfilter with indev and outdev set to the bridge device,
- * but we are still able to filter on the 'real' indev/outdev
- * because of the physdev module. For ARP, indev and outdev are the
- * bridge ports. */
-static unsigned int br_nf_forward_ip(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static unsigned int br_nf_forward_ip(struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ u8 pf)
{
struct nf_bridge_info *nf_bridge;
struct net_device *parent;
- u_int8_t pf;
nf_bridge = nf_bridge_info_get(skb);
if (!nf_bridge)
@@ -590,24 +603,15 @@ static unsigned int br_nf_forward_ip(void *priv,
/* Need exclusive nf_bridge_info since we might have multiple
* different physoutdevs. */
if (!nf_bridge_unshare(skb))
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
nf_bridge = nf_bridge_info_get(skb);
if (!nf_bridge)
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
parent = bridge_parent(state->out);
if (!parent)
- return NF_DROP;
-
- if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
- is_pppoe_ip(skb, state->net))
- pf = NFPROTO_IPV4;
- else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) ||
- is_pppoe_ipv6(skb, state->net))
- pf = NFPROTO_IPV6;
- else
- return NF_ACCEPT;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
nf_bridge_pull_encap_header(skb);
@@ -618,21 +622,20 @@ static unsigned int br_nf_forward_ip(void *priv,
if (pf == NFPROTO_IPV4) {
if (br_validate_ipv4(state->net, skb))
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
- }
-
- if (pf == NFPROTO_IPV6) {
+ skb->protocol = htons(ETH_P_IP);
+ } else if (pf == NFPROTO_IPV6) {
if (br_validate_ipv6(state->net, skb))
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
+ skb->protocol = htons(ETH_P_IPV6);
+ } else {
+ WARN_ON_ONCE(1);
+ return NF_DROP;
}
nf_bridge->physoutdev = skb->dev;
- if (pf == NFPROTO_IPV4)
- skb->protocol = htons(ETH_P_IP);
- else
- skb->protocol = htons(ETH_P_IPV6);
NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb,
brnf_get_logical_dev(skb, state->in, state->net),
@@ -641,8 +644,7 @@ static unsigned int br_nf_forward_ip(void *priv,
return NF_STOLEN;
}
-static unsigned int br_nf_forward_arp(void *priv,
- struct sk_buff *skb,
+static unsigned int br_nf_forward_arp(struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct net_bridge_port *p;
@@ -659,14 +661,11 @@ static unsigned int br_nf_forward_arp(void *priv,
if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES))
return NF_ACCEPT;
- if (!IS_ARP(skb)) {
- if (!is_vlan_arp(skb, state->net))
- return NF_ACCEPT;
+ if (is_vlan_arp(skb, state->net))
nf_bridge_pull_encap_header(skb);
- }
if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr))))
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0);
if (arp_hdr(skb)->ar_pln != 4) {
if (is_vlan_arp(skb, state->net))
@@ -680,6 +679,28 @@ static unsigned int br_nf_forward_arp(void *priv,
return NF_STOLEN;
}
+/* This is the 'purely bridged' case. For IP, we pass the packet to
+ * netfilter with indev and outdev set to the bridge device,
+ * but we are still able to filter on the 'real' indev/outdev
+ * because of the physdev module. For ARP, indev and outdev are the
+ * bridge ports.
+ */
+static unsigned int br_nf_forward(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
+ is_pppoe_ip(skb, state->net))
+ return br_nf_forward_ip(skb, state, NFPROTO_IPV4);
+ if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) ||
+ is_pppoe_ipv6(skb, state->net))
+ return br_nf_forward_ip(skb, state, NFPROTO_IPV6);
+ if (IS_ARP(skb) || is_vlan_arp(skb, state->net))
+ return br_nf_forward_arp(skb, state);
+
+ return NF_ACCEPT;
+}
+
static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct brnf_frag_data *data;
@@ -831,7 +852,7 @@ static unsigned int br_nf_post_routing(void *priv,
return NF_ACCEPT;
if (!realoutdev)
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
is_pppoe_ip(skb, state->net))
@@ -897,6 +918,13 @@ static unsigned int ip_sabotage_in(void *priv,
static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+ struct net_device *br_indev;
+
+ br_indev = nf_bridge_get_physindev(skb, dev_net(skb->dev));
+ if (!br_indev) {
+ kfree_skb(skb);
+ return;
+ }
skb_pull(skb, ETH_HLEN);
nf_bridge->bridged_dnat = 0;
@@ -906,7 +934,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN),
nf_bridge->neigh_header,
ETH_HLEN - ETH_ALEN);
- skb->dev = nf_bridge->physindev;
+ skb->dev = br_indev;
nf_bridge->physoutdev = NULL;
br_handle_frame_finish(dev_net(skb->dev), NULL, skb);
@@ -937,13 +965,7 @@ static const struct nf_hook_ops br_nf_ops[] = {
.priority = NF_BR_PRI_BRNF,
},
{
- .hook = br_nf_forward_ip,
- .pf = NFPROTO_BRIDGE,
- .hooknum = NF_BR_FORWARD,
- .priority = NF_BR_PRI_BRNF - 1,
- },
- {
- .hook = br_nf_forward_arp,
+ .hook = br_nf_forward,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF,
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 550039dfc31a..e0421eaa3abc 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -102,9 +102,15 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
- struct net_device *dev = skb->dev;
+ struct net_device *dev = skb->dev, *br_indev;
const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
+ br_indev = nf_bridge_get_physindev(skb, net);
+ if (!br_indev) {
+ kfree_skb(skb);
+ return 0;
+ }
+
nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
if (nf_bridge->pkt_otherhost) {
@@ -122,7 +128,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
}
if (skb_dst(skb)->dev == dev) {
- skb->dev = nf_bridge->physindev;
+ skb->dev = br_indev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
br_nf_hook_thresh(NF_BR_PRE_ROUTING,
@@ -133,7 +139,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
skb->pkt_type = PACKET_HOST;
} else {
- rt = bridge_parent_rtable(nf_bridge->physindev);
+ rt = bridge_parent_rtable(br_indev);
if (!rt) {
kfree_skb(skb);
return 0;
@@ -142,7 +148,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
skb_dst_set_noref(skb, &rt->dst);
}
- skb->dev = nf_bridge->physindev;
+ skb->dev = br_indev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb,
@@ -161,13 +167,13 @@ unsigned int br_nf_pre_routing_ipv6(void *priv,
struct nf_bridge_info *nf_bridge;
if (br_validate_ipv6(state->net, skb))
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
nf_bridge = nf_bridge_alloc(skb);
if (!nf_bridge)
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
if (!setup_pre_routing(skb, state->net))
- return NF_DROP;
+ return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
nf_bridge = nf_bridge_info_get(skb);
nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 10f0d33d8ccf..5ad4abfcb7ba 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1229,6 +1229,8 @@ static size_t br_port_get_slave_size(const struct net_device *brdev,
}
static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
+ [IFLA_BR_UNSPEC] = { .strict_start_type =
+ IFLA_BR_FDB_N_LEARNED },
[IFLA_BR_FORWARD_DELAY] = { .type = NLA_U32 },
[IFLA_BR_HELLO_TIME] = { .type = NLA_U32 },
[IFLA_BR_MAX_AGE] = { .type = NLA_U32 },
@@ -1265,6 +1267,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_VLAN_STATS_PER_PORT] = { .type = NLA_U8 },
[IFLA_BR_MULTI_BOOLOPT] =
NLA_POLICY_EXACT_LEN(sizeof(struct br_boolopt_multi)),
+ [IFLA_BR_FDB_N_LEARNED] = { .type = NLA_REJECT },
+ [IFLA_BR_FDB_MAX_LEARNED] = { .type = NLA_U32 },
};
static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -1539,6 +1543,12 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
return err;
}
+ if (data[IFLA_BR_FDB_MAX_LEARNED]) {
+ u32 val = nla_get_u32(data[IFLA_BR_FDB_MAX_LEARNED]);
+
+ WRITE_ONCE(br->fdb_max_learned, val);
+ }
+
return 0;
}
@@ -1593,6 +1603,8 @@ static size_t br_get_size(const struct net_device *brdev)
nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_TOPOLOGY_CHANGE_TIMER */
nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_GC_TIMER */
nla_total_size(ETH_ALEN) + /* IFLA_BR_GROUP_ADDR */
+ nla_total_size(sizeof(u32)) + /* IFLA_BR_FDB_N_LEARNED */
+ nla_total_size(sizeof(u32)) + /* IFLA_BR_FDB_MAX_LEARNED */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_ROUTER */
nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_SNOOPING */
@@ -1668,7 +1680,10 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
br->topology_change_detected) ||
nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr) ||
- nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm))
+ nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm) ||
+ nla_put_u32(skb, IFLA_BR_FDB_N_LEARNED,
+ atomic_read(&br->fdb_n_learned)) ||
+ nla_put_u32(skb, IFLA_BR_FDB_MAX_LEARNED, br->fdb_max_learned))
return -EMSGSIZE;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a1f4acfa6994..86ea5e6689b5 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -78,7 +78,7 @@ struct bridge_mcast_own_query {
/* other querier */
struct bridge_mcast_other_query {
struct timer_list timer;
- unsigned long delay_time;
+ struct timer_list delay_timer;
};
/* selected querier */
@@ -186,6 +186,7 @@ enum {
* struct net_bridge_vlan - per-vlan entry
*
* @vnode: rhashtable member
+ * @tnode: rhashtable member
* @vid: VLAN id
* @flags: bridge vlan flags
* @priv_flags: private (in-kernel) bridge vlan flags
@@ -196,6 +197,7 @@ enum {
* @refcnt: if MASTER flag set, this is bumped for each port referencing it
* @brvlan: if MASTER flag unset, this points to the global per-VLAN context
* for this VLAN entry
+ * @tinfo: bridge tunnel info
* @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context
* @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast
* context
@@ -274,6 +276,7 @@ enum {
BR_FDB_NOTIFY,
BR_FDB_NOTIFY_INACTIVE,
BR_FDB_LOCKED,
+ BR_FDB_DYNAMIC_LEARNED,
};
struct net_bridge_fdb_key {
@@ -555,6 +558,9 @@ struct net_bridge {
struct kobject *ifobj;
u32 auto_cnt;
+ atomic_t fdb_n_learned;
+ u32 fdb_max_learned;
+
#ifdef CONFIG_NET_SWITCHDEV
/* Counter used to make sure that hardware domains get unique
* identifiers in case a bridge spans multiple switchdev instances.
@@ -847,8 +853,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr, u16 vid,
struct netlink_ext_ack *extack);
-int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev, u16 vid,
+int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev,
struct netlink_ext_ack *extack);
int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags,
@@ -952,8 +957,9 @@ int br_multicast_rcv(struct net_bridge_mcast **brmctx,
struct net_bridge_mcast_port **pmctx,
struct net_bridge_vlan *vlan,
struct sk_buff *skb, u16 vid);
-struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
- struct sk_buff *skb, u16 vid);
+struct net_bridge_mdb_entry *
+br_mdb_entry_skb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb,
+ u16 vid);
int br_multicast_add_port(struct net_bridge_port *port);
void br_multicast_del_port(struct net_bridge_port *port);
void br_multicast_enable_port(struct net_bridge_port *port);
@@ -1016,8 +1022,12 @@ int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
struct netlink_ext_ack *extack);
int br_mdb_del(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack);
+int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack);
int br_mdb_dump(struct net_device *dev, struct sk_buff *skb,
struct netlink_callback *cb);
+int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq,
+ struct netlink_ext_ack *extack);
void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
struct net_bridge_mdb_entry *mp, bool notify);
void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify);
@@ -1149,7 +1159,7 @@ __br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
own_querier_enabled = false;
}
- return time_is_before_jiffies(querier->delay_time) &&
+ return !timer_pending(&querier->delay_timer) &&
(own_querier_enabled || timer_pending(&querier->timer));
}
@@ -1342,8 +1352,9 @@ static inline int br_multicast_rcv(struct net_bridge_mcast **brmctx,
return 0;
}
-static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
- struct sk_buff *skb, u16 vid)
+static inline struct net_bridge_mdb_entry *
+br_mdb_entry_skb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb,
+ u16 vid)
{
return NULL;
}
@@ -1421,12 +1432,25 @@ static inline int br_mdb_del(struct net_device *dev, struct nlattr *tb[],
return -EOPNOTSUPP;
}
+static inline int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
static inline int br_mdb_dump(struct net_device *dev, struct sk_buff *skb,
struct netlink_callback *cb)
{
return 0;
}
+static inline int br_mdb_get(struct net_device *dev, struct nlattr *tb[],
+ u32 portid, u32 seq,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
static inline int br_mdb_hash_init(struct net_bridge *br)
{
return 0;
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 8f19253024b0..741360219552 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -135,3 +135,4 @@ static void __exit ebtable_broute_fini(void)
module_init(ebtable_broute_init);
module_exit(ebtable_broute_fini);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Force packets to be routed instead of bridged");
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 278f324e6752..dacd81b12e62 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -116,3 +116,4 @@ static void __exit ebtable_filter_fini(void)
module_init(ebtable_filter_init);
module_exit(ebtable_filter_fini);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ebtables legacy filter table");
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 9066f7f376d5..0f2a8c6118d4 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -116,3 +116,4 @@ static void __exit ebtable_nat_fini(void)
module_init(ebtable_nat_init);
module_exit(ebtable_nat_fini);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ebtables legacy stateless nat table");
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index aa23479b20b2..99d82676f780 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2595,3 +2595,4 @@ EXPORT_SYMBOL(ebt_do_table);
module_init(ebtables_init);
module_exit(ebtables_fini);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ebtables legacy core");
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 71056ee84773..abb090f94ed2 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -37,7 +37,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
ktime_t tstamp = skb->tstamp;
struct ip_frag_state state;
struct iphdr *iph;
- int err;
+ int err = 0;
/* for offloaded checksums cleanup checksum before fragmentation */
if (skb->ip_summed == CHECKSUM_PARTIAL &&
@@ -416,3 +416,4 @@ module_exit(nf_conntrack_l3proto_bridge_fini);
MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Bridge IPv4 and IPv6 connection tracking");
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 6a0cba4fc29f..24e85c5487ef 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -27,6 +27,7 @@
#include <net/caif/cfcnfg.h>
#include <net/caif/cfserl.h>
+MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol support");
MODULE_LICENSE("GPL");
/* Used for local tracking of the CAIF net devices */
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 9c82698da4f5..039dfbd367c9 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -27,6 +27,7 @@
#include <net/caif/caif_dev.h>
#include <net/caif/cfpkt.h>
+MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol socket support (AF_CAIF)");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(AF_CAIF);
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index bf61ea4b8132..5dc05a1e3178 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -20,6 +20,7 @@
#include <net/caif/cfpkt.h>
#include <net/caif/cfcnfg.h>
+MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol USB support");
MODULE_LICENSE("GPL");
#define CFUSB_PAD_DESCR_SZ 1 /* Alignment descriptor length */
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index f35fc87c453a..47901bd4def1 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -31,6 +31,7 @@
/*This list is protected by the rtnl lock. */
static LIST_HEAD(chnl_net_list);
+MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol GPRS network device");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("caif");
diff --git a/net/can/isotp.c b/net/can/isotp.c
index f02b5d3e4733..d1c6f206f429 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -948,21 +948,18 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (!so->bound || so->tx.state == ISOTP_SHUTDOWN)
return -EADDRNOTAVAIL;
-wait_free_buffer:
- /* we do not support multiple buffers - for now */
- if (wq_has_sleeper(&so->wait) && (msg->msg_flags & MSG_DONTWAIT))
- return -EAGAIN;
+ while (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE) {
+ /* we do not support multiple buffers - for now */
+ if (msg->msg_flags & MSG_DONTWAIT)
+ return -EAGAIN;
- /* wait for complete transmission of current pdu */
- err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
- if (err)
- goto err_event_drop;
-
- if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE) {
if (so->tx.state == ISOTP_SHUTDOWN)
return -EADDRNOTAVAIL;
- goto wait_free_buffer;
+ /* wait for complete transmission of current pdu */
+ err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ if (err)
+ goto err_event_drop;
}
/* PDU size > default => try max_pdu_size */
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index feaec4ad6d16..14c431663233 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -884,7 +884,7 @@ static struct sk_buff *j1939_sk_alloc_skb(struct net_device *ndev,
skcb = j1939_skb_to_cb(skb);
memset(skcb, 0, sizeof(*skcb));
skcb->addr = jsk->addr;
- skcb->priority = j1939_prio(sk->sk_priority);
+ skcb->priority = j1939_prio(READ_ONCE(sk->sk_priority));
if (msg->msg_name) {
struct sockaddr_can *addr = msg->msg_name;
@@ -974,6 +974,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
struct sock_exterr_skb *serr;
struct sk_buff *skb;
char *state = "UNK";
+ u32 tsflags;
int err;
jsk = j1939_sk(sk);
@@ -981,13 +982,14 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
if (!(jsk->state & J1939_SOCK_ERRQUEUE))
return;
+ tsflags = READ_ONCE(sk->sk_tsflags);
switch (type) {
case J1939_ERRQUEUE_TX_ACK:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+ if (!(tsflags & SOF_TIMESTAMPING_TX_ACK))
return;
break;
case J1939_ERRQUEUE_TX_SCHED:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+ if (!(tsflags & SOF_TIMESTAMPING_TX_SCHED))
return;
break;
case J1939_ERRQUEUE_TX_ABORT:
@@ -997,7 +999,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
case J1939_ERRQUEUE_RX_DPO:
fallthrough;
case J1939_ERRQUEUE_RX_ABORT:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
+ if (!(tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
return;
break;
default:
@@ -1054,7 +1056,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
}
serr->opt_stats = true;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ if (tsflags & SOF_TIMESTAMPING_OPT_ID)
serr->ee.ee_data = session->tskey;
netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n",
diff --git a/net/can/raw.c b/net/can/raw.c
index d50c3f3d892f..e6b822624ba2 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -493,8 +493,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
out_put_dev:
/* remove potential reference from dev_get_by_index() */
- if (dev)
- dev_put(dev);
+ dev_put(dev);
out:
release_sock(sk);
rtnl_unlock();
@@ -881,7 +880,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
}
skb->dev = dev;
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 5eb4898cccd4..3c8b78d9c4d1 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -459,8 +459,8 @@ int ceph_tcp_connect(struct ceph_connection *con)
set_sock_callbacks(sock, con);
con_sock_state_connecting(con);
- ret = sock->ops->connect(sock, (struct sockaddr *)&ss, sizeof(ss),
- O_NONBLOCK);
+ ret = kernel_connect(sock, (struct sockaddr *)&ss, sizeof(ss),
+ O_NONBLOCK);
if (ret == -EINPROGRESS) {
dout("connect %s EINPROGRESS sk_state = %u\n",
ceph_pr_addr(&con->peer_addr),
@@ -969,6 +969,62 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
return true;
}
+static void ceph_msg_data_iter_cursor_init(struct ceph_msg_data_cursor *cursor,
+ size_t length)
+{
+ struct ceph_msg_data *data = cursor->data;
+
+ cursor->iov_iter = data->iter;
+ cursor->lastlen = 0;
+ iov_iter_truncate(&cursor->iov_iter, length);
+ cursor->resid = iov_iter_count(&cursor->iov_iter);
+}
+
+static struct page *ceph_msg_data_iter_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset, size_t *length)
+{
+ struct page *page;
+ ssize_t len;
+
+ if (cursor->lastlen)
+ iov_iter_revert(&cursor->iov_iter, cursor->lastlen);
+
+ len = iov_iter_get_pages2(&cursor->iov_iter, &page, PAGE_SIZE,
+ 1, page_offset);
+ BUG_ON(len < 0);
+
+ cursor->lastlen = len;
+
+ /*
+ * FIXME: The assumption is that the pages represented by the iov_iter
+ * are pinned, with the references held by the upper-level
+ * callers, or by virtue of being under writeback. Eventually,
+ * we'll get an iov_iter_get_pages2 variant that doesn't take
+ * page refs. Until then, just put the page ref.
+ */
+ VM_BUG_ON_PAGE(!PageWriteback(page) && page_count(page) < 2, page);
+ put_page(page);
+
+ *length = min_t(size_t, len, cursor->resid);
+ return page;
+}
+
+static bool ceph_msg_data_iter_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ BUG_ON(bytes > cursor->resid);
+ cursor->resid -= bytes;
+
+ if (bytes < cursor->lastlen) {
+ cursor->lastlen -= bytes;
+ } else {
+ iov_iter_advance(&cursor->iov_iter, bytes - cursor->lastlen);
+ cursor->lastlen = 0;
+ }
+
+ return cursor->resid;
+}
+
/*
* Message data is handled (sent or received) in pieces, where each
* piece resides on a single page. The network layer might not
@@ -996,6 +1052,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
case CEPH_MSG_DATA_BVECS:
ceph_msg_data_bvecs_cursor_init(cursor, length);
break;
+ case CEPH_MSG_DATA_ITER:
+ ceph_msg_data_iter_cursor_init(cursor, length);
+ break;
case CEPH_MSG_DATA_NONE:
default:
/* BUG(); */
@@ -1013,6 +1072,7 @@ void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor,
cursor->total_resid = length;
cursor->data = msg->data;
+ cursor->sr_resid = 0;
__ceph_msg_data_cursor_init(cursor);
}
@@ -1042,6 +1102,9 @@ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
case CEPH_MSG_DATA_BVECS:
page = ceph_msg_data_bvecs_next(cursor, page_offset, length);
break;
+ case CEPH_MSG_DATA_ITER:
+ page = ceph_msg_data_iter_next(cursor, page_offset, length);
+ break;
case CEPH_MSG_DATA_NONE:
default:
page = NULL;
@@ -1080,6 +1143,9 @@ void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes)
case CEPH_MSG_DATA_BVECS:
new_piece = ceph_msg_data_bvecs_advance(cursor, bytes);
break;
+ case CEPH_MSG_DATA_ITER:
+ new_piece = ceph_msg_data_iter_advance(cursor, bytes);
+ break;
case CEPH_MSG_DATA_NONE:
default:
BUG();
@@ -1879,6 +1945,18 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
}
EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
+void ceph_msg_data_add_iter(struct ceph_msg *msg,
+ struct iov_iter *iter)
+{
+ struct ceph_msg_data *data;
+
+ data = ceph_msg_data_add(msg);
+ data->type = CEPH_MSG_DATA_ITER;
+ data->iter = *iter;
+
+ msg->data_length += iov_iter_count(&data->iter);
+}
+
/*
* construct a new message with given type, size
* the new msg has a ref count of 1.
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index 3d57bb48a2b4..0cb61c76b9b8 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -159,9 +159,10 @@ static size_t sizeof_footer(struct ceph_connection *con)
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
- /* Initialize data cursor */
+ /* Initialize data cursor if it's not a sparse read */
+ u64 len = msg->sparse_read_total ? : data_len;
- ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
+ ceph_msg_data_cursor_init(&msg->cursor, msg, len);
}
/*
@@ -960,9 +961,9 @@ static void process_ack(struct ceph_connection *con)
prepare_read_tag(con);
}
-static int read_partial_message_section(struct ceph_connection *con,
- struct kvec *section,
- unsigned int sec_len, u32 *crc)
+static int read_partial_message_chunk(struct ceph_connection *con,
+ struct kvec *section,
+ unsigned int sec_len, u32 *crc)
{
int ret, left;
@@ -978,11 +979,91 @@ static int read_partial_message_section(struct ceph_connection *con,
section->iov_len += ret;
}
if (section->iov_len == sec_len)
- *crc = crc32c(0, section->iov_base, section->iov_len);
+ *crc = crc32c(*crc, section->iov_base, section->iov_len);
return 1;
}
+static inline int read_partial_message_section(struct ceph_connection *con,
+ struct kvec *section,
+ unsigned int sec_len, u32 *crc)
+{
+ *crc = 0;
+ return read_partial_message_chunk(con, section, sec_len, crc);
+}
+
+static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
+{
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+ bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE);
+
+ if (do_bounce && unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ while (cursor->sr_resid > 0) {
+ struct page *page, *rpage;
+ size_t off, len;
+ int ret;
+
+ page = ceph_msg_data_next(cursor, &off, &len);
+ rpage = do_bounce ? con->bounce_page : page;
+
+ /* clamp to what remains in extent */
+ len = min_t(int, len, cursor->sr_resid);
+ ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len);
+ if (ret <= 0)
+ return ret;
+ *crc = ceph_crc32c_page(*crc, rpage, off, ret);
+ ceph_msg_data_advance(cursor, (size_t)ret);
+ cursor->sr_resid -= ret;
+ if (do_bounce)
+ memcpy_page(page, off, rpage, off, ret);
+ }
+ return 1;
+}
+
+static int read_partial_sparse_msg_data(struct ceph_connection *con)
+{
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+ bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
+ u32 crc = 0;
+ int ret = 1;
+
+ if (do_datacrc)
+ crc = con->in_data_crc;
+
+ while (cursor->total_resid) {
+ if (con->v1.in_sr_kvec.iov_base)
+ ret = read_partial_message_chunk(con,
+ &con->v1.in_sr_kvec,
+ con->v1.in_sr_len,
+ &crc);
+ else if (cursor->sr_resid > 0)
+ ret = read_partial_sparse_msg_extent(con, &crc);
+ if (ret <= 0)
+ break;
+
+ memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
+ ret = con->ops->sparse_read(con, cursor,
+ (char **)&con->v1.in_sr_kvec.iov_base);
+ if (ret <= 0) {
+ ret = ret ? ret : 1; /* must return > 0 to indicate success */
+ break;
+ }
+ con->v1.in_sr_len = ret;
+ }
+
+ if (do_datacrc)
+ con->in_data_crc = crc;
+
+ return ret;
+}
+
static int read_partial_msg_data(struct ceph_connection *con)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
@@ -1173,7 +1254,9 @@ static int read_partial_message(struct ceph_connection *con)
if (!m->num_data_items)
return -EIO;
- if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
+ if (m->sparse_read_total)
+ ret = read_partial_sparse_msg_data(con);
+ else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
ret = read_partial_msg_data_bounce(con);
else
ret = read_partial_msg_data(con);
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index 1df1d29dee92..a0ca5414b333 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -8,9 +8,9 @@
#include <linux/ceph/ceph_debug.h>
#include <crypto/aead.h>
-#include <crypto/algapi.h> /* for crypto_memneq() */
#include <crypto/hash.h>
#include <crypto/sha2.h>
+#include <crypto/utils.h>
#include <linux/bvec.h>
#include <linux/crc32c.h>
#include <linux/net.h>
@@ -52,14 +52,16 @@
#define FRAME_LATE_STATUS_COMPLETE 0xe
#define FRAME_LATE_STATUS_ABORTED_MASK 0xf
-#define IN_S_HANDLE_PREAMBLE 1
-#define IN_S_HANDLE_CONTROL 2
-#define IN_S_HANDLE_CONTROL_REMAINDER 3
-#define IN_S_PREPARE_READ_DATA 4
-#define IN_S_PREPARE_READ_DATA_CONT 5
-#define IN_S_PREPARE_READ_ENC_PAGE 6
-#define IN_S_HANDLE_EPILOGUE 7
-#define IN_S_FINISH_SKIP 8
+#define IN_S_HANDLE_PREAMBLE 1
+#define IN_S_HANDLE_CONTROL 2
+#define IN_S_HANDLE_CONTROL_REMAINDER 3
+#define IN_S_PREPARE_READ_DATA 4
+#define IN_S_PREPARE_READ_DATA_CONT 5
+#define IN_S_PREPARE_READ_ENC_PAGE 6
+#define IN_S_PREPARE_SPARSE_DATA 7
+#define IN_S_PREPARE_SPARSE_DATA_CONT 8
+#define IN_S_HANDLE_EPILOGUE 9
+#define IN_S_FINISH_SKIP 10
#define OUT_S_QUEUE_DATA 1
#define OUT_S_QUEUE_DATA_CONT 2
@@ -731,8 +733,6 @@ static int setup_crypto(struct ceph_connection *con,
return ret;
}
- WARN_ON((unsigned long)session_key &
- crypto_shash_alignmask(con->v2.hmac_tfm));
ret = crypto_shash_setkey(con->v2.hmac_tfm, session_key,
session_key_len);
if (ret) {
@@ -814,8 +814,6 @@ static int hmac_sha256(struct ceph_connection *con, const struct kvec *kvecs,
goto out;
for (i = 0; i < kvec_cnt; i++) {
- WARN_ON((unsigned long)kvecs[i].iov_base &
- crypto_shash_alignmask(con->v2.hmac_tfm));
ret = crypto_shash_update(desc, kvecs[i].iov_base,
kvecs[i].iov_len);
if (ret)
@@ -967,12 +965,48 @@ static void init_sgs_cursor(struct scatterlist **sg,
}
}
+/**
+ * init_sgs_pages: set up scatterlist on an array of page pointers
+ * @sg: scatterlist to populate
+ * @pages: pointer to page array
+ * @dpos: position in the array to start (bytes)
+ * @dlen: len to add to sg (bytes)
+ * @pad: pointer to pad destination (if any)
+ *
+ * Populate the scatterlist from the page array, starting at an arbitrary
+ * byte in the array and running for a specified length.
+ */
+static void init_sgs_pages(struct scatterlist **sg, struct page **pages,
+ int dpos, int dlen, u8 *pad)
+{
+ int idx = dpos >> PAGE_SHIFT;
+ int off = offset_in_page(dpos);
+ int resid = dlen;
+
+ do {
+ int len = min(resid, (int)PAGE_SIZE - off);
+
+ sg_set_page(*sg, pages[idx], len, off);
+ *sg = sg_next(*sg);
+ off = 0;
+ ++idx;
+ resid -= len;
+ } while (resid);
+
+ if (need_padding(dlen)) {
+ sg_set_buf(*sg, pad, padding_len(dlen));
+ *sg = sg_next(*sg);
+ }
+}
+
static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
u8 *front_pad, u8 *middle_pad, u8 *data_pad,
- void *epilogue, bool add_tag)
+ void *epilogue, struct page **pages, int dpos,
+ bool add_tag)
{
struct ceph_msg_data_cursor cursor;
struct scatterlist *cur_sg;
+ int dlen = data_len(msg);
int sg_cnt;
int ret;
@@ -986,9 +1020,15 @@ static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
if (middle_len(msg))
sg_cnt += calc_sg_cnt(msg->middle->vec.iov_base,
middle_len(msg));
- if (data_len(msg)) {
- ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
- sg_cnt += calc_sg_cnt_cursor(&cursor);
+ if (dlen) {
+ if (pages) {
+ sg_cnt += calc_pages_for(dpos, dlen);
+ if (need_padding(dlen))
+ sg_cnt++;
+ } else {
+ ceph_msg_data_cursor_init(&cursor, msg, dlen);
+ sg_cnt += calc_sg_cnt_cursor(&cursor);
+ }
}
ret = sg_alloc_table(sgt, sg_cnt, GFP_NOIO);
@@ -1002,9 +1042,13 @@ static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
if (middle_len(msg))
init_sgs(&cur_sg, msg->middle->vec.iov_base, middle_len(msg),
middle_pad);
- if (data_len(msg)) {
- ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
- init_sgs_cursor(&cur_sg, &cursor, data_pad);
+ if (dlen) {
+ if (pages) {
+ init_sgs_pages(&cur_sg, pages, dpos, dlen, data_pad);
+ } else {
+ ceph_msg_data_cursor_init(&cursor, msg, dlen);
+ init_sgs_cursor(&cur_sg, &cursor, data_pad);
+ }
}
WARN_ON(!sg_is_last(cur_sg));
@@ -1039,10 +1083,53 @@ static int decrypt_control_remainder(struct ceph_connection *con)
padded_len(rem_len) + CEPH_GCM_TAG_LEN);
}
+/* Process sparse read data that lives in a buffer */
+static int process_v2_sparse_read(struct ceph_connection *con,
+ struct page **pages, int spos)
+{
+ struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
+ int ret;
+
+ for (;;) {
+ char *buf = NULL;
+
+ ret = con->ops->sparse_read(con, cursor, &buf);
+ if (ret <= 0)
+ return ret;
+
+ dout("%s: sparse_read return %x buf %p\n", __func__, ret, buf);
+
+ do {
+ int idx = spos >> PAGE_SHIFT;
+ int soff = offset_in_page(spos);
+ struct page *spage = con->v2.in_enc_pages[idx];
+ int len = min_t(int, ret, PAGE_SIZE - soff);
+
+ if (buf) {
+ memcpy_from_page(buf, spage, soff, len);
+ buf += len;
+ } else {
+ struct bio_vec bv;
+
+ get_bvec_at(cursor, &bv);
+ len = min_t(int, len, bv.bv_len);
+ memcpy_page(bv.bv_page, bv.bv_offset,
+ spage, soff, len);
+ ceph_msg_data_advance(cursor, len);
+ }
+ spos += len;
+ ret -= len;
+ } while (ret);
+ }
+}
+
static int decrypt_tail(struct ceph_connection *con)
{
struct sg_table enc_sgt = {};
struct sg_table sgt = {};
+ struct page **pages = NULL;
+ bool sparse = !!con->in_msg->sparse_read_total;
+ int dpos = 0;
int tail_len;
int ret;
@@ -1053,9 +1140,14 @@ static int decrypt_tail(struct ceph_connection *con)
if (ret)
goto out;
+ if (sparse) {
+ dpos = padded_len(front_len(con->in_msg) + padded_len(middle_len(con->in_msg)));
+ pages = con->v2.in_enc_pages;
+ }
+
ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
- MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
- con->v2.in_buf, true);
+ MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
+ con->v2.in_buf, pages, dpos, true);
if (ret)
goto out;
@@ -1065,6 +1157,12 @@ static int decrypt_tail(struct ceph_connection *con)
if (ret)
goto out;
+ if (sparse && data_len(con->in_msg)) {
+ ret = process_v2_sparse_read(con, con->v2.in_enc_pages, dpos);
+ if (ret)
+ goto out;
+ }
+
WARN_ON(!con->v2.in_enc_page_cnt);
ceph_release_page_vector(con->v2.in_enc_pages,
con->v2.in_enc_page_cnt);
@@ -1588,7 +1686,7 @@ static int prepare_message_secure(struct ceph_connection *con)
encode_epilogue_secure(con, false);
ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop,
- &con->v2.out_epil, false);
+ &con->v2.out_epil, NULL, 0, false);
if (ret)
goto out;
@@ -1825,6 +1923,123 @@ static void prepare_read_data_cont(struct ceph_connection *con)
con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
+static int prepare_sparse_read_cont(struct ceph_connection *con)
+{
+ int ret;
+ struct bio_vec bv;
+ char *buf = NULL;
+ struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
+
+ WARN_ON(con->v2.in_state != IN_S_PREPARE_SPARSE_DATA_CONT);
+
+ if (iov_iter_is_bvec(&con->v2.in_iter)) {
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ con->in_data_crc = crc32c(con->in_data_crc,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+ get_bvec_at(cursor, &bv);
+ memcpy_to_page(bv.bv_page, bv.bv_offset,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+ } else {
+ con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
+ con->v2.in_bvec.bv_page,
+ con->v2.in_bvec.bv_offset,
+ con->v2.in_bvec.bv_len);
+ }
+
+ ceph_msg_data_advance(cursor, con->v2.in_bvec.bv_len);
+ cursor->sr_resid -= con->v2.in_bvec.bv_len;
+ dout("%s: advance by 0x%x sr_resid 0x%x\n", __func__,
+ con->v2.in_bvec.bv_len, cursor->sr_resid);
+ WARN_ON_ONCE(cursor->sr_resid > cursor->total_resid);
+ if (cursor->sr_resid) {
+ get_bvec_at(cursor, &bv);
+ if (bv.bv_len > cursor->sr_resid)
+ bv.bv_len = cursor->sr_resid;
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ }
+ set_in_bvec(con, &bv);
+ con->v2.data_len_remain -= bv.bv_len;
+ return 0;
+ }
+ } else if (iov_iter_is_kvec(&con->v2.in_iter)) {
+ /* On first call, we have no kvec so don't compute crc */
+ if (con->v2.in_kvec_cnt) {
+ WARN_ON_ONCE(con->v2.in_kvec_cnt > 1);
+ con->in_data_crc = crc32c(con->in_data_crc,
+ con->v2.in_kvecs[0].iov_base,
+ con->v2.in_kvecs[0].iov_len);
+ }
+ } else {
+ return -EIO;
+ }
+
+ /* get next extent */
+ ret = con->ops->sparse_read(con, cursor, &buf);
+ if (ret <= 0) {
+ if (ret < 0)
+ return ret;
+
+ reset_in_kvecs(con);
+ add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+ con->v2.in_state = IN_S_HANDLE_EPILOGUE;
+ return 0;
+ }
+
+ if (buf) {
+ /* receive into buffer */
+ reset_in_kvecs(con);
+ add_in_kvec(con, buf, ret);
+ con->v2.data_len_remain -= ret;
+ return 0;
+ }
+
+ if (ret > cursor->total_resid) {
+ pr_warn("%s: ret 0x%x total_resid 0x%zx resid 0x%zx\n",
+ __func__, ret, cursor->total_resid, cursor->resid);
+ return -EIO;
+ }
+ get_bvec_at(cursor, &bv);
+ if (bv.bv_len > cursor->sr_resid)
+ bv.bv_len = cursor->sr_resid;
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ if (unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ }
+ set_in_bvec(con, &bv);
+ con->v2.data_len_remain -= ret;
+ return ret;
+}
+
+static int prepare_sparse_read_data(struct ceph_connection *con)
+{
+ struct ceph_msg *msg = con->in_msg;
+
+ dout("%s: starting sparse read\n", __func__);
+
+ if (WARN_ON_ONCE(!con->ops->sparse_read))
+ return -EOPNOTSUPP;
+
+ if (!con_secure(con))
+ con->in_data_crc = -1;
+
+ reset_in_kvecs(con);
+ con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
+ con->v2.data_len_remain = data_len(msg);
+ return prepare_sparse_read_cont(con);
+}
+
static int prepare_read_tail_plain(struct ceph_connection *con)
{
struct ceph_msg *msg = con->in_msg;
@@ -1845,7 +2060,10 @@ static int prepare_read_tail_plain(struct ceph_connection *con)
}
if (data_len(msg)) {
- con->v2.in_state = IN_S_PREPARE_READ_DATA;
+ if (msg->sparse_read_total)
+ con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
+ else
+ con->v2.in_state = IN_S_PREPARE_READ_DATA;
} else {
add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
con->v2.in_state = IN_S_HANDLE_EPILOGUE;
@@ -2898,6 +3116,12 @@ static int populate_in_iter(struct ceph_connection *con)
prepare_read_enc_page(con);
ret = 0;
break;
+ case IN_S_PREPARE_SPARSE_DATA:
+ ret = prepare_sparse_read_data(con);
+ break;
+ case IN_S_PREPARE_SPARSE_DATA_CONT:
+ ret = prepare_sparse_read_cont(con);
+ break;
case IN_S_HANDLE_EPILOGUE:
ret = handle_epilogue(con);
break;
@@ -3489,6 +3713,23 @@ static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
con->v2.in_state = IN_S_FINISH_SKIP;
}
+static void revoke_at_prepare_sparse_data(struct ceph_connection *con)
+{
+ int resid; /* current piece of data */
+ int remaining;
+
+ WARN_ON(con_secure(con));
+ WARN_ON(!data_len(con->in_msg));
+ WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
+ resid = iov_iter_count(&con->v2.in_iter);
+ dout("%s con %p resid %d\n", __func__, con, resid);
+
+ remaining = CEPH_EPILOGUE_PLAIN_LEN + con->v2.data_len_remain;
+ con->v2.in_iter.count -= resid;
+ set_in_skip(con, resid + remaining);
+ con->v2.in_state = IN_S_FINISH_SKIP;
+}
+
static void revoke_at_handle_epilogue(struct ceph_connection *con)
{
int resid;
@@ -3505,6 +3746,7 @@ static void revoke_at_handle_epilogue(struct ceph_connection *con)
void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
{
switch (con->v2.in_state) {
+ case IN_S_PREPARE_SPARSE_DATA:
case IN_S_PREPARE_READ_DATA:
revoke_at_prepare_read_data(con);
break;
@@ -3514,6 +3756,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
case IN_S_PREPARE_READ_ENC_PAGE:
revoke_at_prepare_read_enc_page(con);
break;
+ case IN_S_PREPARE_SPARSE_DATA_CONT:
+ revoke_at_prepare_sparse_data(con);
+ break;
case IN_S_HANDLE_EPILOGUE:
revoke_at_handle_epilogue(con);
break;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index faabad6603db..f263f7e91a21 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1136,6 +1136,7 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
GFP_KERNEL);
if (!monc->monmap)
return -ENOMEM;
+ monc->monmap->num_mon = num_mon;
for (i = 0; i < num_mon; i++) {
struct ceph_entity_inst *inst = &monc->monmap->mon_inst[i];
@@ -1147,7 +1148,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
inst->name.type = CEPH_ENTITY_TYPE_MON;
inst->name.num = cpu_to_le64(i);
}
- monc->monmap->num_mon = num_mon;
return 0;
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 658a6f2320cf..9d078b37fe0b 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -171,6 +171,13 @@ static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
osd_data->num_bvecs = num_bvecs;
}
+static void ceph_osd_iter_init(struct ceph_osd_data *osd_data,
+ struct iov_iter *iter)
+{
+ osd_data->type = CEPH_OSD_DATA_TYPE_ITER;
+ osd_data->iter = *iter;
+}
+
static struct ceph_osd_data *
osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
{
@@ -264,6 +271,22 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
}
EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos);
+/**
+ * osd_req_op_extent_osd_iter - Set up an operation with an iterator buffer
+ * @osd_req: The request to set up
+ * @which: Index of the operation in which to set the iter
+ * @iter: The buffer iterator
+ */
+void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
+ unsigned int which, struct iov_iter *iter)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+ ceph_osd_iter_init(osd_data, iter);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_iter);
+
static void osd_req_op_cls_request_info_pagelist(
struct ceph_osd_request *osd_req,
unsigned int which, struct ceph_pagelist *pagelist)
@@ -346,6 +369,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
#endif /* CONFIG_BLOCK */
case CEPH_OSD_DATA_TYPE_BVECS:
return osd_data->bvec_pos.iter.bi_size;
+ case CEPH_OSD_DATA_TYPE_ITER:
+ return iov_iter_count(&osd_data->iter);
default:
WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
return 0;
@@ -376,8 +401,10 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
switch (op->op) {
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
case CEPH_OSD_OP_WRITE:
case CEPH_OSD_OP_WRITEFULL:
+ kfree(op->extent.sparse_ext);
ceph_osd_data_release(&op->extent.osd_data);
break;
case CEPH_OSD_OP_CALL:
@@ -669,6 +696,7 @@ static void get_num_data_items(struct ceph_osd_request *req,
/* reply */
case CEPH_OSD_OP_STAT:
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
case CEPH_OSD_OP_LIST_WATCHERS:
*num_reply_data_items += 1;
break;
@@ -738,7 +766,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
opcode != CEPH_OSD_OP_WRITEFULL && opcode != CEPH_OSD_OP_ZERO &&
- opcode != CEPH_OSD_OP_TRUNCATE);
+ opcode != CEPH_OSD_OP_TRUNCATE && opcode != CEPH_OSD_OP_SPARSE_READ);
op->extent.offset = offset;
op->extent.length = length;
@@ -951,6 +979,8 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
#endif
} else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) {
ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos);
+ } else if (osd_data->type == CEPH_OSD_DATA_TYPE_ITER) {
+ ceph_msg_data_add_iter(msg, &osd_data->iter);
} else {
BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
}
@@ -963,6 +993,7 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
case CEPH_OSD_OP_STAT:
break;
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
case CEPH_OSD_OP_WRITE:
case CEPH_OSD_OP_WRITEFULL:
case CEPH_OSD_OP_ZERO:
@@ -1017,6 +1048,10 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
dst->copy_from.src_fadvise_flags =
cpu_to_le32(src->copy_from.src_fadvise_flags);
break;
+ case CEPH_OSD_OP_ASSERT_VER:
+ dst->assert_ver.unused = cpu_to_le64(0);
+ dst->assert_ver.ver = cpu_to_le64(src->assert_ver.ver);
+ break;
default:
pr_err("unsupported osd opcode %s\n",
ceph_osd_op_name(src->op));
@@ -1059,7 +1094,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE &&
- opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE);
+ opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE &&
+ opcode != CEPH_OSD_OP_SPARSE_READ);
req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
GFP_NOFS);
@@ -1100,15 +1136,30 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
if (flags & CEPH_OSD_FLAG_WRITE)
req->r_data_offset = off;
- if (num_ops > 1)
+ if (num_ops > 1) {
+ int num_req_ops, num_rep_ops;
+
/*
- * This is a special case for ceph_writepages_start(), but it
- * also covers ceph_uninline_data(). If more multi-op request
- * use cases emerge, we will need a separate helper.
+ * If this is a multi-op write request, assume that we'll need
+ * request ops. If it's a multi-op read then assume we'll need
+ * reply ops. Anything else and call it -EINVAL.
*/
- r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_ops, 0);
- else
+ if (flags & CEPH_OSD_FLAG_WRITE) {
+ num_req_ops = num_ops;
+ num_rep_ops = 0;
+ } else if (flags & CEPH_OSD_FLAG_READ) {
+ num_req_ops = 0;
+ num_rep_ops = num_ops;
+ } else {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_req_ops,
+ num_rep_ops);
+ } else {
r = ceph_osdc_alloc_messages(req, GFP_NOFS);
+ }
if (r)
goto fail;
@@ -1120,6 +1171,18 @@ fail:
}
EXPORT_SYMBOL(ceph_osdc_new_request);
+int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt)
+{
+ op->extent.sparse_ext_cnt = cnt;
+ op->extent.sparse_ext = kmalloc_array(cnt,
+ sizeof(*op->extent.sparse_ext),
+ GFP_NOFS);
+ if (!op->extent.sparse_ext)
+ return -ENOMEM;
+ return 0;
+}
+EXPORT_SYMBOL(__ceph_alloc_sparse_ext_map);
+
/*
* We keep osd requests in an rbtree, sorted by ->r_tid.
*/
@@ -1177,6 +1240,7 @@ static void osd_init(struct ceph_osd *osd)
{
refcount_set(&osd->o_ref, 1);
RB_CLEAR_NODE(&osd->o_node);
+ spin_lock_init(&osd->o_requests_lock);
osd->o_requests = RB_ROOT;
osd->o_linger_requests = RB_ROOT;
osd->o_backoff_mappings = RB_ROOT;
@@ -1187,6 +1251,13 @@ static void osd_init(struct ceph_osd *osd)
mutex_init(&osd->lock);
}
+static void ceph_init_sparse_read(struct ceph_sparse_read *sr)
+{
+ kfree(sr->sr_extent);
+ memset(sr, '\0', sizeof(*sr));
+ sr->sr_state = CEPH_SPARSE_READ_HDR;
+}
+
static void osd_cleanup(struct ceph_osd *osd)
{
WARN_ON(!RB_EMPTY_NODE(&osd->o_node));
@@ -1197,6 +1268,8 @@ static void osd_cleanup(struct ceph_osd *osd)
WARN_ON(!list_empty(&osd->o_osd_lru));
WARN_ON(!list_empty(&osd->o_keepalive_item));
+ ceph_init_sparse_read(&osd->o_sparse_read);
+
if (osd->o_auth.authorizer) {
WARN_ON(osd_homeless(osd));
ceph_auth_destroy_authorizer(osd->o_auth.authorizer);
@@ -1216,6 +1289,9 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
osd_init(osd);
osd->o_osdc = osdc;
osd->o_osd = onum;
+ osd->o_sparse_op_idx = -1;
+
+ ceph_init_sparse_read(&osd->o_sparse_read);
ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
@@ -1406,7 +1482,9 @@ static void link_request(struct ceph_osd *osd, struct ceph_osd_request *req)
atomic_inc(&osd->o_osdc->num_homeless);
get_osd(osd);
+ spin_lock(&osd->o_requests_lock);
insert_request(&osd->o_requests, req);
+ spin_unlock(&osd->o_requests_lock);
req->r_osd = osd;
}
@@ -1418,7 +1496,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req)
req, req->r_tid);
req->r_osd = NULL;
+ spin_lock(&osd->o_requests_lock);
erase_request(&osd->o_requests, req);
+ spin_unlock(&osd->o_requests_lock);
put_osd(osd);
if (!osd_homeless(osd))
@@ -2016,6 +2096,7 @@ static void setup_request_data(struct ceph_osd_request *req)
&op->raw_data_in);
break;
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
ceph_osdc_msg_data_add(reply_msg,
&op->extent.osd_data);
break;
@@ -2435,8 +2516,10 @@ static void finish_request(struct ceph_osd_request *req)
req->r_end_latency = ktime_get();
- if (req->r_osd)
+ if (req->r_osd) {
+ ceph_init_sparse_read(&req->r_osd->o_sparse_read);
unlink_request(req->r_osd, req);
+ }
atomic_dec(&osdc->num_requests);
/*
@@ -3795,6 +3878,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
* one (type of) reply back.
*/
WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK));
+ req->r_version = m.user_version;
req->r_result = m.result ?: data_len;
finish_request(req);
mutex_unlock(&osd->lock);
@@ -5348,6 +5432,24 @@ static void osd_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
ceph_msg_put(msg);
}
+/* How much sparse data was requested? */
+static u64 sparse_data_requested(struct ceph_osd_request *req)
+{
+ u64 len = 0;
+
+ if (req->r_flags & CEPH_OSD_FLAG_READ) {
+ int i;
+
+ for (i = 0; i < req->r_num_ops; ++i) {
+ struct ceph_osd_req_op *op = &req->r_ops[i];
+
+ if (op->op == CEPH_OSD_OP_SPARSE_READ)
+ len += op->extent.length;
+ }
+ }
+ return len;
+}
+
/*
* Lookup and return message for incoming reply. Don't try to do
* anything about a larger than preallocated data portion of the
@@ -5364,6 +5466,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
int front_len = le32_to_cpu(hdr->front_len);
int data_len = le32_to_cpu(hdr->data_len);
u64 tid = le64_to_cpu(hdr->tid);
+ u64 srlen;
down_read(&osdc->lock);
if (!osd_registered(osd)) {
@@ -5396,7 +5499,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
req->r_reply = m;
}
- if (data_len > req->r_reply->data_length) {
+ srlen = sparse_data_requested(req);
+ if (!srlen && data_len > req->r_reply->data_length) {
pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n",
__func__, osd->o_osd, req->r_tid, data_len,
req->r_reply->data_length);
@@ -5406,6 +5510,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
}
m = ceph_msg_get(req->r_reply);
+ m->sparse_read_total = srlen;
+
dout("get_reply tid %lld %p\n", tid, m);
out_unlock_session:
@@ -5638,9 +5744,217 @@ static int osd_check_message_signature(struct ceph_msg *msg)
return ceph_auth_check_message_signature(auth, msg);
}
+static void advance_cursor(struct ceph_msg_data_cursor *cursor, size_t len,
+ bool zero)
+{
+ while (len) {
+ struct page *page;
+ size_t poff, plen;
+
+ page = ceph_msg_data_next(cursor, &poff, &plen);
+ if (plen > len)
+ plen = len;
+ if (zero)
+ zero_user_segment(page, poff, poff + plen);
+ len -= plen;
+ ceph_msg_data_advance(cursor, plen);
+ }
+}
+
+static int prep_next_sparse_read(struct ceph_connection *con,
+ struct ceph_msg_data_cursor *cursor)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_sparse_read *sr = &o->o_sparse_read;
+ struct ceph_osd_request *req;
+ struct ceph_osd_req_op *op;
+
+ spin_lock(&o->o_requests_lock);
+ req = lookup_request(&o->o_requests, le64_to_cpu(con->in_msg->hdr.tid));
+ if (!req) {
+ spin_unlock(&o->o_requests_lock);
+ return -EBADR;
+ }
+
+ if (o->o_sparse_op_idx < 0) {
+ dout("%s: [%d] starting new sparse read req\n",
+ __func__, o->o_osd);
+ } else {
+ u64 end;
+
+ op = &req->r_ops[o->o_sparse_op_idx];
+
+ WARN_ON_ONCE(op->extent.sparse_ext);
+
+ /* hand back buffer we took earlier */
+ op->extent.sparse_ext = sr->sr_extent;
+ sr->sr_extent = NULL;
+ op->extent.sparse_ext_cnt = sr->sr_count;
+ sr->sr_ext_len = 0;
+ dout("%s: [%d] completed extent array len %d cursor->resid %zd\n",
+ __func__, o->o_osd, op->extent.sparse_ext_cnt, cursor->resid);
+ /* Advance to end of data for this operation */
+ end = ceph_sparse_ext_map_end(op);
+ if (end < sr->sr_req_len)
+ advance_cursor(cursor, sr->sr_req_len - end, false);
+ }
+
+ ceph_init_sparse_read(sr);
+
+ /* find next op in this request (if any) */
+ while (++o->o_sparse_op_idx < req->r_num_ops) {
+ op = &req->r_ops[o->o_sparse_op_idx];
+ if (op->op == CEPH_OSD_OP_SPARSE_READ)
+ goto found;
+ }
+
+ /* reset for next sparse read request */
+ spin_unlock(&o->o_requests_lock);
+ o->o_sparse_op_idx = -1;
+ return 0;
+found:
+ sr->sr_req_off = op->extent.offset;
+ sr->sr_req_len = op->extent.length;
+ sr->sr_pos = sr->sr_req_off;
+ dout("%s: [%d] new sparse read op at idx %d 0x%llx~0x%llx\n", __func__,
+ o->o_osd, o->o_sparse_op_idx, sr->sr_req_off, sr->sr_req_len);
+
+ /* hand off request's sparse extent map buffer */
+ sr->sr_ext_len = op->extent.sparse_ext_cnt;
+ op->extent.sparse_ext_cnt = 0;
+ sr->sr_extent = op->extent.sparse_ext;
+ op->extent.sparse_ext = NULL;
+
+ spin_unlock(&o->o_requests_lock);
+ return 1;
+}
+
+#ifdef __BIG_ENDIAN
+static inline void convert_extent_map(struct ceph_sparse_read *sr)
+{
+ int i;
+
+ for (i = 0; i < sr->sr_count; i++) {
+ struct ceph_sparse_extent *ext = &sr->sr_extent[i];
+
+ ext->off = le64_to_cpu((__force __le64)ext->off);
+ ext->len = le64_to_cpu((__force __le64)ext->len);
+ }
+}
+#else
+static inline void convert_extent_map(struct ceph_sparse_read *sr)
+{
+}
+#endif
+
+static int osd_sparse_read(struct ceph_connection *con,
+ struct ceph_msg_data_cursor *cursor,
+ char **pbuf)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_sparse_read *sr = &o->o_sparse_read;
+ u32 count = sr->sr_count;
+ u64 eoff, elen, len = 0;
+ int i, ret;
+
+ switch (sr->sr_state) {
+ case CEPH_SPARSE_READ_HDR:
+next_op:
+ ret = prep_next_sparse_read(con, cursor);
+ if (ret <= 0)
+ return ret;
+
+ /* number of extents */
+ ret = sizeof(sr->sr_count);
+ *pbuf = (char *)&sr->sr_count;
+ sr->sr_state = CEPH_SPARSE_READ_EXTENTS;
+ break;
+ case CEPH_SPARSE_READ_EXTENTS:
+ /* Convert sr_count to host-endian */
+ count = le32_to_cpu((__force __le32)sr->sr_count);
+ sr->sr_count = count;
+ dout("[%d] got %u extents\n", o->o_osd, count);
+
+ if (count > 0) {
+ if (!sr->sr_extent || count > sr->sr_ext_len) {
+ /* no extent array provided, or too short */
+ kfree(sr->sr_extent);
+ sr->sr_extent = kmalloc_array(count,
+ sizeof(*sr->sr_extent),
+ GFP_NOIO);
+ if (!sr->sr_extent) {
+ pr_err("%s: failed to allocate %u extents\n",
+ __func__, count);
+ return -ENOMEM;
+ }
+ sr->sr_ext_len = count;
+ }
+ ret = count * sizeof(*sr->sr_extent);
+ *pbuf = (char *)sr->sr_extent;
+ sr->sr_state = CEPH_SPARSE_READ_DATA_LEN;
+ break;
+ }
+ /* No extents? Read data len */
+ fallthrough;
+ case CEPH_SPARSE_READ_DATA_LEN:
+ convert_extent_map(sr);
+ ret = sizeof(sr->sr_datalen);
+ *pbuf = (char *)&sr->sr_datalen;
+ sr->sr_state = CEPH_SPARSE_READ_DATA_PRE;
+ break;
+ case CEPH_SPARSE_READ_DATA_PRE:
+ /* Convert sr_datalen to host-endian */
+ sr->sr_datalen = le32_to_cpu((__force __le32)sr->sr_datalen);
+ for (i = 0; i < count; i++)
+ len += sr->sr_extent[i].len;
+ if (sr->sr_datalen != len) {
+ pr_warn_ratelimited("data len %u != extent len %llu\n",
+ sr->sr_datalen, len);
+ return -EREMOTEIO;
+ }
+ sr->sr_state = CEPH_SPARSE_READ_DATA;
+ fallthrough;
+ case CEPH_SPARSE_READ_DATA:
+ if (sr->sr_index >= count) {
+ sr->sr_state = CEPH_SPARSE_READ_HDR;
+ goto next_op;
+ }
+
+ eoff = sr->sr_extent[sr->sr_index].off;
+ elen = sr->sr_extent[sr->sr_index].len;
+
+ dout("[%d] ext %d off 0x%llx len 0x%llx\n",
+ o->o_osd, sr->sr_index, eoff, elen);
+
+ if (elen > INT_MAX) {
+ dout("Sparse read extent length too long (0x%llx)\n",
+ elen);
+ return -EREMOTEIO;
+ }
+
+ /* zero out anything from sr_pos to start of extent */
+ if (sr->sr_pos < eoff)
+ advance_cursor(cursor, eoff - sr->sr_pos, true);
+
+ /* Set position to end of extent */
+ sr->sr_pos = eoff + elen;
+
+ /* send back the new length and nullify the ptr */
+ cursor->sr_resid = elen;
+ ret = elen;
+ *pbuf = NULL;
+
+ /* Bump the array index */
+ ++sr->sr_index;
+ break;
+ }
+ return ret;
+}
+
static const struct ceph_connection_operations osd_con_ops = {
.get = osd_get_con,
.put = osd_put_con,
+ .sparse_read = osd_sparse_read,
.alloc_msg = osd_alloc_msg,
.dispatch = osd_dispatch,
.fault = osd_fault,
diff --git a/net/compat.c b/net/compat.c
index 6564720f32b7..485db8ee9b28 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -297,7 +297,7 @@ void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm)
int err = 0, i;
for (i = 0; i < fdmax; i++) {
- err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
+ err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags);
if (err < 0)
break;
}
diff --git a/net/core/Makefile b/net/core/Makefile
index 731db2eaa610..821aec06abf1 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -18,7 +18,7 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
obj-y += net-sysfs.o
-obj-$(CONFIG_PAGE_POOL) += page_pool.o
+obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NETPOLL) += netpoll.o
@@ -40,3 +40,4 @@ obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
obj-$(CONFIG_OF) += of_net.o
+obj-$(CONFIG_NET_TEST) += gso_test.o
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index cca7594be92e..6c4d90b24d46 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -275,9 +275,10 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
void *owner, u32 size)
{
- int optmem_max = READ_ONCE(sysctl_optmem_max);
struct sock *sk = (struct sock *)owner;
+ int optmem_max;
+ optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
/* same check as in sock_kmalloc() */
if (size <= optmem_max &&
atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 176eb5834746..a8b625abe242 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -50,7 +50,7 @@
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
-#include <linux/uio.h>
+#include <linux/iov_iter.h>
#include <linux/indirect_call_wrapper.h>
#include <net/protocol.h>
@@ -61,6 +61,7 @@
#include <net/tcp_states.h>
#include <trace/events/skb.h>
#include <net/busy_poll.h>
+#include <crypto/hash.h>
/*
* Is a socket 'connection oriented' ?
@@ -489,6 +490,24 @@ short_copy:
return 0;
}
+static size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
+ struct iov_iter *i)
+{
+#ifdef CONFIG_CRYPTO_HASH
+ struct ahash_request *hash = hashp;
+ struct scatterlist sg;
+ size_t copied;
+
+ copied = copy_to_iter(addr, bytes, i);
+ sg_init_one(&sg, addr, copied);
+ ahash_request_set_crypt(hash, &sg, NULL, copied);
+ crypto_ahash_update(hash);
+ return copied;
+#else
+ return 0;
+#endif
+}
+
/**
* skb_copy_and_hash_datagram_iter - Copy datagram to an iovec iterator
* and update a hash.
@@ -716,6 +735,60 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
}
EXPORT_SYMBOL(zerocopy_sg_from_iter);
+static __always_inline
+size_t copy_to_user_iter_csum(void __user *iter_to, size_t progress,
+ size_t len, void *from, void *priv2)
+{
+ __wsum next, *csum = priv2;
+
+ next = csum_and_copy_to_user(from + progress, iter_to, len);
+ *csum = csum_block_add(*csum, next, progress);
+ return next ? 0 : len;
+}
+
+static __always_inline
+size_t memcpy_to_iter_csum(void *iter_to, size_t progress,
+ size_t len, void *from, void *priv2)
+{
+ __wsum *csum = priv2;
+ __wsum next = csum_partial_copy_nocheck(from + progress, iter_to, len);
+
+ *csum = csum_block_add(*csum, next, progress);
+ return 0;
+}
+
+struct csum_state {
+ __wsum csum;
+ size_t off;
+};
+
+static size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
+ struct iov_iter *i)
+{
+ struct csum_state *csstate = _csstate;
+ __wsum sum;
+
+ if (WARN_ON_ONCE(i->data_source))
+ return 0;
+ if (unlikely(iov_iter_is_discard(i))) {
+ // can't use csum_memcpy() for that one - data is not copied
+ csstate->csum = csum_block_add(csstate->csum,
+ csum_partial(addr, bytes, 0),
+ csstate->off);
+ csstate->off += bytes;
+ return bytes;
+ }
+
+ sum = csum_shift(csstate->csum, csstate->off);
+
+ bytes = iterate_and_advance2(i, bytes, (void *)addr, &sum,
+ copy_to_user_iter_csum,
+ memcpy_to_iter_csum);
+ csstate->csum = csum_shift(sum, csstate->off);
+ csstate->off += bytes;
+ return bytes;
+}
+
/**
* skb_copy_and_csum_datagram - Copy datagram to an iovec iterator
* and update a checksum.
diff --git a/net/core/dev.c b/net/core/dev.c
index ccff2b6ef958..cb2dab0feee0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -69,7 +69,7 @@
*/
#include <linux/uaccess.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
#include <linux/capability.h>
#include <linux/cpu.h>
#include <linux/types.h>
@@ -165,7 +165,6 @@ static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_extack(unsigned long val,
struct net_device *dev,
struct netlink_ext_ack *extack);
-static struct napi_struct *napi_by_id(unsigned int napi_id);
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -345,7 +344,6 @@ int netdev_name_node_alt_create(struct net_device *dev, const char *name)
static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
{
list_del(&name_node->list);
- netdev_name_node_del(name_node);
kfree(name_node->name);
netdev_name_node_free(name_node);
}
@@ -364,6 +362,8 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
if (name_node == dev->name_node || name_node->dev != dev)
return -EINVAL;
+ netdev_name_node_del(name_node);
+ synchronize_rcu();
__netdev_name_node_alt_destroy(name_node);
return 0;
@@ -380,6 +380,7 @@ static void netdev_name_node_alt_flush(struct net_device *dev)
/* Device list insertion */
static void list_netdevice(struct net_device *dev)
{
+ struct netdev_name_node *name_node;
struct net *net = dev_net(dev);
ASSERT_RTNL();
@@ -390,6 +391,10 @@ static void list_netdevice(struct net_device *dev)
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock(&dev_base_lock);
+
+ netdev_for_each_altname(dev, name_node)
+ netdev_name_node_add(net, name_node);
+
/* We reserved the ifindex, this can't fail */
WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));
@@ -401,12 +406,16 @@ static void list_netdevice(struct net_device *dev)
*/
static void unlist_netdevice(struct net_device *dev, bool lock)
{
+ struct netdev_name_node *name_node;
struct net *net = dev_net(dev);
ASSERT_RTNL();
xa_erase(&net->dev_by_index, dev->ifindex);
+ netdev_for_each_altname(dev, name_node)
+ netdev_name_node_del(name_node);
+
/* Unlink dev from the device chain */
if (lock)
write_lock(&dev_base_lock);
@@ -1047,7 +1056,7 @@ EXPORT_SYMBOL(dev_valid_name);
* __dev_alloc_name - allocate a name for a device
* @net: network namespace to allocate the device name in
* @name: name format string
- * @buf: scratch buffer and result name string
+ * @res: result name string
*
* Passed a format string - eg "lt%d" it will try and find a suitable
* id. It scans list of devices to build up a free map, then chooses
@@ -1058,83 +1067,79 @@ EXPORT_SYMBOL(dev_valid_name);
* Returns the number of the unit assigned or a negative errno code.
*/
-static int __dev_alloc_name(struct net *net, const char *name, char *buf)
+static int __dev_alloc_name(struct net *net, const char *name, char *res)
{
int i = 0;
const char *p;
const int max_netdevices = 8*PAGE_SIZE;
unsigned long *inuse;
struct net_device *d;
+ char buf[IFNAMSIZ];
- if (!dev_valid_name(name))
- return -EINVAL;
-
+ /* Verify the string as this thing may have come from the user.
+ * There must be one "%d" and no other "%" characters.
+ */
p = strchr(name, '%');
- if (p) {
- /*
- * Verify the string as this thing may have come from
- * the user. There must be either one "%d" and no other "%"
- * characters.
- */
- if (p[1] != 'd' || strchr(p + 2, '%'))
- return -EINVAL;
+ if (!p || p[1] != 'd' || strchr(p + 2, '%'))
+ return -EINVAL;
- /* Use one page as a bit array of possible slots */
- inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
- if (!inuse)
- return -ENOMEM;
+ /* Use one page as a bit array of possible slots */
+ inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC);
+ if (!inuse)
+ return -ENOMEM;
- for_each_netdev(net, d) {
- struct netdev_name_node *name_node;
- list_for_each_entry(name_node, &d->name_node->list, list) {
- if (!sscanf(name_node->name, name, &i))
- continue;
- if (i < 0 || i >= max_netdevices)
- continue;
+ for_each_netdev(net, d) {
+ struct netdev_name_node *name_node;
- /* avoid cases where sscanf is not exact inverse of printf */
- snprintf(buf, IFNAMSIZ, name, i);
- if (!strncmp(buf, name_node->name, IFNAMSIZ))
- __set_bit(i, inuse);
- }
- if (!sscanf(d->name, name, &i))
+ netdev_for_each_altname(d, name_node) {
+ if (!sscanf(name_node->name, name, &i))
continue;
if (i < 0 || i >= max_netdevices)
continue;
- /* avoid cases where sscanf is not exact inverse of printf */
+ /* avoid cases where sscanf is not exact inverse of printf */
snprintf(buf, IFNAMSIZ, name, i);
- if (!strncmp(buf, d->name, IFNAMSIZ))
+ if (!strncmp(buf, name_node->name, IFNAMSIZ))
__set_bit(i, inuse);
}
+ if (!sscanf(d->name, name, &i))
+ continue;
+ if (i < 0 || i >= max_netdevices)
+ continue;
- i = find_first_zero_bit(inuse, max_netdevices);
- free_page((unsigned long) inuse);
+ /* avoid cases where sscanf is not exact inverse of printf */
+ snprintf(buf, IFNAMSIZ, name, i);
+ if (!strncmp(buf, d->name, IFNAMSIZ))
+ __set_bit(i, inuse);
}
- snprintf(buf, IFNAMSIZ, name, i);
- if (!netdev_name_in_use(net, buf))
- return i;
+ i = find_first_zero_bit(inuse, max_netdevices);
+ bitmap_free(inuse);
+ if (i == max_netdevices)
+ return -ENFILE;
- /* It is possible to run out of possible slots
- * when the name is long and there isn't enough space left
- * for the digits, or if all bits are used.
- */
- return -ENFILE;
+ /* 'res' and 'name' could overlap, use 'buf' as an intermediate buffer */
+ strscpy(buf, name, IFNAMSIZ);
+ snprintf(res, IFNAMSIZ, buf, i);
+ return i;
}
-static int dev_alloc_name_ns(struct net *net,
- struct net_device *dev,
- const char *name)
+/* Returns negative errno or allocated unit id (see __dev_alloc_name()) */
+static int dev_prep_valid_name(struct net *net, struct net_device *dev,
+ const char *want_name, char *out_name,
+ int dup_errno)
{
- char buf[IFNAMSIZ];
- int ret;
+ if (!dev_valid_name(want_name))
+ return -EINVAL;
- BUG_ON(!net);
- ret = __dev_alloc_name(net, name, buf);
- if (ret >= 0)
- strscpy(dev->name, buf, IFNAMSIZ);
- return ret;
+ if (strchr(want_name, '%'))
+ return __dev_alloc_name(net, want_name, out_name);
+
+ if (netdev_name_in_use(net, want_name))
+ return -dup_errno;
+ if (out_name != want_name)
+ strscpy(out_name, want_name, IFNAMSIZ);
+ return 0;
}
/**
@@ -1153,26 +1158,17 @@ static int dev_alloc_name_ns(struct net *net,
int dev_alloc_name(struct net_device *dev, const char *name)
{
- return dev_alloc_name_ns(dev_net(dev), dev, name);
+ return dev_prep_valid_name(dev_net(dev), dev, name, dev->name, ENFILE);
}
EXPORT_SYMBOL(dev_alloc_name);
static int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
- BUG_ON(!net);
-
- if (!dev_valid_name(name))
- return -EINVAL;
-
- if (strchr(name, '%'))
- return dev_alloc_name_ns(net, dev, name);
- else if (netdev_name_in_use(net, name))
- return -EEXIST;
- else if (dev->name != name)
- strscpy(dev->name, name, IFNAMSIZ);
+ int ret;
- return 0;
+ ret = dev_prep_valid_name(net, dev, name, dev->name, EEXIST);
+ return ret < 0 ? ret : 0;
}
/**
@@ -3292,15 +3288,19 @@ int skb_checksum_help(struct sk_buff *skb)
offset = skb_checksum_start_offset(skb);
ret = -EINVAL;
- if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
+ if (unlikely(offset >= skb_headlen(skb))) {
DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
+ WARN_ONCE(true, "offset (%d) >= skb_headlen() (%u)\n",
+ offset, skb_headlen(skb));
goto out;
}
csum = skb_checksum(skb, offset, skb->len - offset, 0);
offset += skb->csum_offset;
- if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb))) {
+ if (unlikely(offset + sizeof(__sum16) > skb_headlen(skb))) {
DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
+ WARN_ONCE(true, "offset+2 (%zu) > skb_headlen() (%u)\n",
+ offset + sizeof(__sum16), skb_headlen(skb));
goto out;
}
ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
@@ -3471,6 +3471,9 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb,
if (gso_segs > READ_ONCE(dev->gso_max_segs))
return features & ~NETIF_F_GSO_MASK;
+ if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size)))
+ return features & ~NETIF_F_GSO_MASK;
+
if (!skb_shinfo(skb)->gso_type) {
skb_warn_bad_offload(skb);
return features & ~NETIF_F_GSO_MASK;
@@ -3753,6 +3756,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
+ tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_DROP);
+
if (q->flags & TCQ_F_NOLOCK) {
if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
qdisc_run_begin(q)) {
@@ -3782,7 +3787,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
no_lock_out:
if (unlikely(to_free))
kfree_skb_list_reason(to_free,
- SKB_DROP_REASON_QDISC_DROP);
+ tcf_get_drop_reason(to_free));
return rc;
}
@@ -3837,7 +3842,8 @@ no_lock_out:
}
spin_unlock(root_lock);
if (unlikely(to_free))
- kfree_skb_list_reason(to_free, SKB_DROP_REASON_QDISC_DROP);
+ kfree_skb_list_reason(to_free,
+ tcf_get_drop_reason(to_free));
if (unlikely(contended))
spin_unlock(&q->busylock);
return rc;
@@ -3910,7 +3916,8 @@ EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
#endif /* CONFIG_NET_EGRESS */
#ifdef CONFIG_NET_XGRESS
-static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
+static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
+ enum skb_drop_reason *drop_reason)
{
int ret = TC_ACT_UNSPEC;
#ifdef CONFIG_NET_CLS_ACT
@@ -3922,12 +3929,14 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false;
+ tcf_set_drop_reason(skb, *drop_reason);
mini_qdisc_bstats_cpu_update(miniq, skb);
ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
/* Only tcf related quirks below. */
switch (ret) {
case TC_ACT_SHOT:
+ *drop_reason = tcf_get_drop_reason(skb);
mini_qdisc_qstats_cpu_drop(miniq);
break;
case TC_ACT_OK:
@@ -3977,6 +3986,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev, bool *another)
{
struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
+ enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_INGRESS;
int sch_ret;
if (!entry)
@@ -3994,7 +4004,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
if (sch_ret != TC_ACT_UNSPEC)
goto ingress_verdict;
}
- sch_ret = tc_run(tcx_entry(entry), skb);
+ sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason);
ingress_verdict:
switch (sch_ret) {
case TC_ACT_REDIRECT:
@@ -4011,7 +4021,7 @@ ingress_verdict:
*ret = NET_RX_SUCCESS;
return NULL;
case TC_ACT_SHOT:
- kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
+ kfree_skb_reason(skb, drop_reason);
*ret = NET_RX_DROP;
return NULL;
/* used by tc_run */
@@ -4032,6 +4042,7 @@ static __always_inline struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
+ enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_EGRESS;
int sch_ret;
if (!entry)
@@ -4045,7 +4056,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
if (sch_ret != TC_ACT_UNSPEC)
goto egress_verdict;
}
- sch_ret = tc_run(tcx_entry(entry), skb);
+ sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason);
egress_verdict:
switch (sch_ret) {
case TC_ACT_REDIRECT:
@@ -4054,7 +4065,7 @@ egress_verdict:
*ret = NET_XMIT_SUCCESS;
return NULL;
case TC_ACT_SHOT:
- kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+ kfree_skb_reason(skb, drop_reason);
*ret = NET_XMIT_DROP;
return NULL;
/* used by tc_run */
@@ -6133,7 +6144,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
EXPORT_SYMBOL(napi_complete_done);
/* must be called under rcu_read_lock(), as we dont take a reference */
-static struct napi_struct *napi_by_id(unsigned int napi_id)
+struct napi_struct *napi_by_id(unsigned int napi_id)
{
unsigned int hash = napi_id % HASH_SIZE(napi_hash);
struct napi_struct *napi;
@@ -6394,6 +6405,43 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
}
EXPORT_SYMBOL(dev_set_threaded);
+/**
+ * netif_queue_set_napi - Associate queue with the napi
+ * @dev: device to which NAPI and queue belong
+ * @queue_index: Index of queue
+ * @type: queue type as RX or TX
+ * @napi: NAPI context, pass NULL to clear previously set NAPI
+ *
+ * Set queue with its corresponding napi context. This should be done after
+ * registering the NAPI handler for the queue-vector and the queues have been
+ * mapped to the corresponding interrupt vector.
+ */
+void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
+ enum netdev_queue_type type, struct napi_struct *napi)
+{
+ struct netdev_rx_queue *rxq;
+ struct netdev_queue *txq;
+
+ if (WARN_ON_ONCE(napi && !napi->dev))
+ return;
+ if (dev->reg_state >= NETREG_REGISTERED)
+ ASSERT_RTNL();
+
+ switch (type) {
+ case NETDEV_QUEUE_TYPE_RX:
+ rxq = __netif_get_rx_queue(dev, queue_index);
+ rxq->napi = napi;
+ return;
+ case NETDEV_QUEUE_TYPE_TX:
+ txq = netdev_get_tx_queue(dev, queue_index);
+ txq->napi = napi;
+ return;
+ default:
+ return;
+ }
+}
+EXPORT_SYMBOL(netif_queue_set_napi);
+
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6429,6 +6477,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
*/
if (dev->threaded && napi_kthread_create(napi))
dev->threaded = 0;
+ netif_napi_set_irq(napi, -1);
}
EXPORT_SYMBOL(netif_napi_add_weight);
@@ -6523,9 +6572,11 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
* accidentally calling ->poll() when NAPI is not scheduled.
*/
work = 0;
- if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+ if (napi_is_scheduled(n)) {
work = n->poll(n, weight);
trace_napi_poll(n, work, weight);
+
+ xdp_do_check_flushed(n);
}
if (unlikely(work > weight))
@@ -9023,6 +9074,28 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
}
EXPORT_SYMBOL(netdev_port_same_parent_id);
+static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin)
+{
+#if IS_ENABLED(CONFIG_DPLL)
+ rtnl_lock();
+ dev->dpll_pin = dpll_pin;
+ rtnl_unlock();
+#endif
+}
+
+void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)
+{
+ WARN_ON(!dpll_pin);
+ netdev_dpll_pin_assign(dev, dpll_pin);
+}
+EXPORT_SYMBOL(netdev_dpll_pin_set);
+
+void netdev_dpll_pin_clear(struct net_device *dev)
+{
+ netdev_dpll_pin_assign(dev, NULL);
+}
+EXPORT_SYMBOL(netdev_dpll_pin_clear);
+
/**
* dev_change_proto_down - set carrier according to proto_down.
*
@@ -10021,6 +10094,54 @@ void netif_tx_stop_all_queues(struct net_device *dev)
}
EXPORT_SYMBOL(netif_tx_stop_all_queues);
+static int netdev_do_alloc_pcpu_stats(struct net_device *dev)
+{
+ void __percpu *v;
+
+ /* Drivers implementing ndo_get_peer_dev must support tstat
+ * accounting, so that skb_do_redirect() can bump the dev's
+ * RX stats upon network namespace switch.
+ */
+ if (dev->netdev_ops->ndo_get_peer_dev &&
+ dev->pcpu_stat_type != NETDEV_PCPU_STAT_TSTATS)
+ return -EOPNOTSUPP;
+
+ switch (dev->pcpu_stat_type) {
+ case NETDEV_PCPU_STAT_NONE:
+ return 0;
+ case NETDEV_PCPU_STAT_LSTATS:
+ v = dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
+ break;
+ case NETDEV_PCPU_STAT_TSTATS:
+ v = dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ break;
+ case NETDEV_PCPU_STAT_DSTATS:
+ v = dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return v ? 0 : -ENOMEM;
+}
+
+static void netdev_do_free_pcpu_stats(struct net_device *dev)
+{
+ switch (dev->pcpu_stat_type) {
+ case NETDEV_PCPU_STAT_NONE:
+ return;
+ case NETDEV_PCPU_STAT_LSTATS:
+ free_percpu(dev->lstats);
+ break;
+ case NETDEV_PCPU_STAT_TSTATS:
+ free_percpu(dev->tstats);
+ break;
+ case NETDEV_PCPU_STAT_DSTATS:
+ free_percpu(dev->dstats);
+ break;
+ }
+}
+
/**
* register_netdevice() - register a network device
* @dev: device to register
@@ -10081,9 +10202,13 @@ int register_netdevice(struct net_device *dev)
goto err_uninit;
}
+ ret = netdev_do_alloc_pcpu_stats(dev);
+ if (ret)
+ goto err_uninit;
+
ret = dev_index_reserve(net, dev->ifindex);
if (ret < 0)
- goto err_uninit;
+ goto err_free_pcpu;
dev->ifindex = ret;
/* Transfer changeable features to wanted_features and enable
@@ -10189,6 +10314,8 @@ err_uninit_notify:
call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
err_ifindex_release:
dev_index_release(net, dev->ifindex);
+err_free_pcpu:
+ netdev_do_free_pcpu_stats(dev);
err_uninit:
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
@@ -10427,7 +10554,7 @@ void netdev_run_todo(void)
write_lock(&dev_base_lock);
dev->reg_state = NETREG_UNREGISTERED;
write_unlock(&dev_base_lock);
- linkwatch_forget_dev(dev);
+ linkwatch_sync_dev(dev);
}
while (!list_empty(&list)) {
@@ -10441,6 +10568,7 @@ void netdev_run_todo(void)
WARN_ON(rcu_access_pointer(dev->ip_ptr));
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+ netdev_do_free_pcpu_stats(dev);
if (dev->priv_destructor)
dev->priv_destructor(dev);
if (dev->needs_free_netdev)
@@ -10475,7 +10603,8 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
}
EXPORT_SYMBOL(netdev_stats_to_stats64);
-struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
+static __cold struct net_device_core_stats __percpu *netdev_core_stats_alloc(
+ struct net_device *dev)
{
struct net_device_core_stats __percpu *p;
@@ -10488,7 +10617,23 @@ struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device
/* This READ_ONCE() pairs with the cmpxchg() above */
return READ_ONCE(dev->core_stats);
}
-EXPORT_SYMBOL(netdev_core_stats_alloc);
+
+noinline void netdev_core_stats_inc(struct net_device *dev, u32 offset)
+{
+ /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
+ struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
+ unsigned long __percpu *field;
+
+ if (unlikely(!p)) {
+ p = netdev_core_stats_alloc(dev);
+ if (!p)
+ return;
+ }
+
+ field = (__force unsigned long __percpu *)((__force void *)p + offset);
+ this_cpu_inc(*field);
+}
+EXPORT_SYMBOL_GPL(netdev_core_stats_inc);
/**
* dev_get_stats - get network device statistics
@@ -11033,7 +11178,9 @@ EXPORT_SYMBOL(unregister_netdev);
int __dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex)
{
+ struct netdev_name_node *name_node;
struct net *net_old = dev_net(dev);
+ char new_name[IFNAMSIZ] = {};
int err, new_nsid;
ASSERT_RTNL();
@@ -11060,10 +11207,15 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
/* We get here if we can't use the current device name */
if (!pat)
goto out;
- err = dev_get_valid_name(net, dev, pat);
+ err = dev_prep_valid_name(net, dev, pat, new_name, EEXIST);
if (err < 0)
goto out;
}
+ /* Check that none of the altnames conflicts. */
+ err = -EEXIST;
+ netdev_for_each_altname(dev, name_node)
+ if (netdev_name_in_use(net, name_node->name))
+ goto out;
/* Check that new_ifindex isn't used yet. */
if (new_ifindex) {
@@ -11127,14 +11279,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_net_set(dev, net);
dev->ifindex = new_ifindex;
- /* Send a netdev-add uevent to the new namespace */
- kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
- netdev_adjacent_add_links(dev);
+ if (new_name[0]) /* Rename the netdev to prepared name */
+ strscpy(dev->name, new_name, IFNAMSIZ);
/* Fixup kobjects */
+ dev_set_uevent_suppress(&dev->dev, 1);
err = device_rename(&dev->dev, dev->name);
+ dev_set_uevent_suppress(&dev->dev, 0);
WARN_ON(err);
+ /* Send a netdev-add uevent to the new namespace */
+ kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
+ netdev_adjacent_add_links(dev);
+
/* Adapt owner in case owning user namespace of target network
* namespace is different from the original one.
*/
@@ -11394,6 +11551,7 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
static void __net_exit default_device_exit_net(struct net *net)
{
+ struct netdev_name_node *name_node, *tmp;
struct net_device *dev, *aux;
/*
* Push all migratable network devices back to the
@@ -11416,6 +11574,14 @@ static void __net_exit default_device_exit_net(struct net *net)
snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
if (netdev_name_in_use(&init_net, fb_name))
snprintf(fb_name, IFNAMSIZ, "dev%%d");
+
+ netdev_for_each_altname_safe(dev, name_node, tmp)
+ if (netdev_name_in_use(&init_net, name_node->name)) {
+ netdev_name_node_del(name_node);
+ synchronize_rcu();
+ __netdev_name_node_alt_destroy(name_node);
+ }
+
err = dev_change_net_namespace(dev, &init_net, fb_name);
if (err) {
pr_emerg("%s: failed to move %s to init_net: %d\n",
@@ -11458,6 +11624,61 @@ static struct pernet_operations __net_initdata default_device_ops = {
.exit_batch = default_device_exit_batch,
};
+static void __init net_dev_struct_check(void)
+{
+ /* TX read-mostly hotpath */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, netdev_ops);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, header_ops);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, _tx);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, real_num_tx_queues);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_size);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_ipv4_max_size);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_segs);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_partial_features);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, num_tc);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, mtu);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, needed_headroom);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tc_to_txq);
+#ifdef CONFIG_XPS
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, xps_maps);
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, nf_hooks_egress);
+#endif
+#ifdef CONFIG_NET_XGRESS
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tcx_egress);
+#endif
+ CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160);
+
+ /* TXRX read-mostly hotpath */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr);
+ CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 30);
+
+ /* RX read-mostly hotpath */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler_data);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, nd_net);
+#ifdef CONFIG_NETPOLL
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, npinfo);
+#endif
+#ifdef CONFIG_NET_XGRESS
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress);
+#endif
+ CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104);
+}
+
/*
* Initialize the DEV module. At boot time this walks the device list and
* unhooks any devices that fail to initialise (normally hardware not
@@ -11475,6 +11696,8 @@ static int __init net_dev_init(void)
BUG_ON(!dev_boot_phase);
+ net_dev_struct_check();
+
if (dev_proc_init())
goto out;
diff --git a/net/core/dev.h b/net/core/dev.h
index e075e198092c..7480b4c84298 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -30,7 +30,6 @@ int __init dev_proc_init(void);
#endif
void linkwatch_init_dev(struct net_device *dev);
-void linkwatch_forget_dev(struct net_device *dev);
void linkwatch_run_queue(void);
void dev_addr_flush(struct net_device *dev);
@@ -62,6 +61,12 @@ struct netdev_name_node {
int netdev_get_name(struct net *net, char *name, int ifindex);
int dev_change_name(struct net_device *dev, const char *newname);
+#define netdev_for_each_altname(dev, namenode) \
+ list_for_each_entry((namenode), &(dev)->name_node->list, list)
+#define netdev_for_each_altname_safe(dev, namenode, next) \
+ list_for_each_entry_safe((namenode), (next), &(dev)->name_node->list, \
+ list)
+
int netdev_name_node_alt_create(struct net_device *dev, const char *name);
int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
@@ -136,4 +141,12 @@ static inline void netif_set_gro_ipv4_max_size(struct net_device *dev,
}
int rps_cpumask_housekeeping(struct cpumask *mask);
+
+#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
+void xdp_do_check_flushed(struct napi_struct *napi);
+#else
+static inline void xdp_do_check_flushed(struct napi_struct *napi) { }
+#endif
+
+struct napi_struct *napi_by_id(unsigned int napi_id);
#endif
diff --git a/net/core/dev_addr_lists_test.c b/net/core/dev_addr_lists_test.c
index 90e7e3811ae7..4dbd0dc6aea2 100644
--- a/net/core/dev_addr_lists_test.c
+++ b/net/core/dev_addr_lists_test.c
@@ -233,4 +233,5 @@ static struct kunit_suite dev_addr_test_suite = {
};
kunit_test_suite(dev_addr_test_suite);
+MODULE_DESCRIPTION("KUnit tests for struct netdev_hw_addr_list");
MODULE_LICENSE("GPL");
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b46aedc36939..9a66cf5015f2 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -322,9 +322,9 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr)
* frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in
* dev->priv_flags.
*/
-static int dev_set_hwtstamp_phylib(struct net_device *dev,
- struct kernel_hwtstamp_config *cfg,
- struct netlink_ext_ack *extack)
+int dev_set_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
bool phy_ts = phy_has_hwtstamp(dev->phydev);
@@ -363,6 +363,7 @@ static int dev_set_hwtstamp_phylib(struct net_device *dev,
return 0;
}
+EXPORT_SYMBOL_GPL(dev_set_hwtstamp_phylib);
static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
{
@@ -382,7 +383,7 @@ static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
if (err)
return err;
- err = dsa_master_hwtstamp_validate(dev, &kernel_cfg, &extack);
+ err = dsa_conduit_hwtstamp_validate(dev, &kernel_cfg, &extack);
if (err) {
if (extack._msg)
netdev_err(dev, "%s\n", extack._msg);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index aff31cd944c2..b0f221d658be 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -183,7 +183,7 @@ out:
}
static const struct genl_multicast_group dropmon_mcgrps[] = {
- { .name = "events", },
+ { .name = "events", .flags = GENL_MCAST_CAP_SYS_ADMIN, },
};
static void send_dm_alert(struct work_struct *work)
@@ -1619,11 +1619,13 @@ static const struct genl_small_ops dropmon_ops[] = {
.cmd = NET_DM_CMD_START,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = net_dm_cmd_trace,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NET_DM_CMD_STOP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = net_dm_cmd_trace,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NET_DM_CMD_CONFIG_GET,
diff --git a/net/core/dst.c b/net/core/dst.c
index 980e2fd2f013..6838d3212c37 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -45,7 +45,7 @@ const struct dst_metrics dst_default_metrics = {
EXPORT_SYMBOL(dst_default_metrics);
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
- struct net_device *dev, int initial_ref, int initial_obsolete,
+ struct net_device *dev, int initial_obsolete,
unsigned short flags)
{
dst->dev = dev;
@@ -66,7 +66,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
dst->tclassid = 0;
#endif
dst->lwtstate = NULL;
- rcuref_init(&dst->__rcuref, initial_ref);
+ rcuref_init(&dst->__rcuref, 1);
INIT_LIST_HEAD(&dst->rt_uncached);
dst->__use = 0;
dst->lastuse = jiffies;
@@ -77,7 +77,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
EXPORT_SYMBOL(dst_init);
void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
- int initial_ref, int initial_obsolete, unsigned short flags)
+ int initial_obsolete, unsigned short flags)
{
struct dst_entry *dst;
@@ -90,7 +90,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
if (!dst)
return NULL;
- dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags);
+ dst_init(dst, ops, dev, initial_obsolete, flags);
return dst;
}
@@ -270,7 +270,7 @@ static void __metadata_dst_init(struct metadata_dst *md_dst,
struct dst_entry *dst;
dst = &md_dst->dst;
- dst_init(dst, &dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE,
+ dst_init(dst, &dst_blackhole_ops, NULL, DST_OBSOLETE_NONE,
DST_METADATA | DST_NOCOUNT);
memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
md_dst->type = type;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 75282222e0b4..3f933ffcefc3 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -53,7 +53,7 @@ bool fib_rule_matchall(const struct fib_rule *rule)
EXPORT_SYMBOL_GPL(fib_rule_matchall);
int fib_default_rule_add(struct fib_rules_ops *ops,
- u32 pref, u32 table, u32 flags)
+ u32 pref, u32 table)
{
struct fib_rule *r;
@@ -65,7 +65,6 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->action = FR_ACT_TO_TBL;
r->pref = pref;
r->table = table;
- r->flags = flags;
r->proto = RTPROT_KERNEL;
r->fr_net = ops->fro_net;
r->uid_range = fib_kuid_range_unset;
@@ -594,7 +593,6 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[FRA_TUN_ID])
nlrule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
- err = -EINVAL;
if (tb[FRA_L3MDEV] &&
fib_nl2rule_l3mdev(tb[FRA_L3MDEV], nlrule, extack) < 0)
goto errout_free;
diff --git a/net/core/filter.c b/net/core/filter.c
index a094694899c9..ef3e78b6a39c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -81,6 +81,11 @@
#include <net/xdp.h>
#include <net/mptcp.h>
#include <net/netfilter/nf_conntrack_bpf.h>
+#include <net/netkit.h>
+#include <linux/un.h>
+#include <net/xdp_sock_drv.h>
+
+#include "dev.h"
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -199,7 +204,7 @@ BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
return 0;
nla = (struct nlattr *) &skb->data[a];
- if (nla->nla_len > skb->len - a)
+ if (!nla_ok(nla, skb->len - a))
return 0;
nla = nla_find_nested(nla, x);
@@ -1215,8 +1220,8 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
*/
static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
+ int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
u32 filter_size = bpf_prog_size(fp->prog->len);
- int optmem_max = READ_ONCE(sysctl_optmem_max);
/* same check as in sock_kmalloc() */
if (filter_size <= optmem_max &&
@@ -1546,12 +1551,13 @@ EXPORT_SYMBOL_GPL(sk_attach_filter);
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct bpf_prog *prog = __get_filter(fprog, sk);
- int err;
+ int err, optmem_max;
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max))
+ optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
+ if (bpf_prog_size(prog->len) > optmem_max)
err = -ENOMEM;
else
err = reuseport_attach_prog(sk, prog);
@@ -1590,7 +1596,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
{
struct bpf_prog *prog;
- int err;
+ int err, optmem_max;
if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;
@@ -1618,7 +1624,8 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
}
} else {
/* BPF_PROG_TYPE_SOCKET_FILTER */
- if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) {
+ optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
+ if (bpf_prog_size(prog->len) > optmem_max) {
err = -ENOMEM;
goto err_prog_put;
}
@@ -2465,6 +2472,16 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
+static struct net_device *skb_get_peer_dev(struct net_device *dev)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (likely(ops->ndo_get_peer_dev))
+ return INDIRECT_CALL_1(ops->ndo_get_peer_dev,
+ netkit_peer_dev, dev);
+ return NULL;
+}
+
int skb_do_redirect(struct sk_buff *skb)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
@@ -2478,17 +2495,15 @@ int skb_do_redirect(struct sk_buff *skb)
if (unlikely(!dev))
goto out_drop;
if (flags & BPF_F_PEER) {
- const struct net_device_ops *ops = dev->netdev_ops;
-
- if (unlikely(!ops->ndo_get_peer_dev ||
- !skb_at_tc_ingress(skb)))
+ if (unlikely(!skb_at_tc_ingress(skb)))
goto out_drop;
- dev = ops->ndo_get_peer_dev(dev);
+ dev = skb_get_peer_dev(dev);
if (unlikely(!dev ||
!(dev->flags & IFF_UP) ||
net_eq(net, dev_net(dev))))
goto out_drop;
skb->dev = dev;
+ dev_sw_netstats_rx_add(dev, skb->len);
return -EAGAIN;
}
return flags & BPF_F_NEIGH ?
@@ -2590,6 +2605,22 @@ BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
return 0;
}
+static void sk_msg_reset_curr(struct sk_msg *msg)
+{
+ u32 i = msg->sg.start;
+ u32 len = 0;
+
+ do {
+ len += sk_msg_elem(msg, i)->length;
+ sk_msg_iter_var_next(i);
+ if (len >= msg->sg.size)
+ break;
+ } while (i != msg->sg.end);
+
+ msg->sg.curr = i;
+ msg->sg.copybreak = 0;
+}
+
static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
.func = bpf_msg_cork_bytes,
.gpl_only = false,
@@ -2709,6 +2740,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
msg->sg.end - shift + NR_MSG_FRAG_IDS :
msg->sg.end - shift;
out:
+ sk_msg_reset_curr(msg);
msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
msg->data_end = msg->data + bytes;
return 0;
@@ -2845,6 +2877,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
msg->sg.data[new] = rsge;
}
+ sk_msg_reset_curr(msg);
sk_msg_compute_data_pointers(msg);
return 0;
}
@@ -3013,6 +3046,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
sk_mem_uncharge(msg->sk, len - pop);
msg->sg.size -= (len - pop);
+ sk_msg_reset_curr(msg);
sk_msg_compute_data_pointers(msg);
return 0;
}
@@ -4059,10 +4093,46 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
skb_frag_size_add(frag, offset);
sinfo->xdp_frags_size += offset;
+ if (rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
+ xsk_buff_get_tail(xdp)->data_end += offset;
return 0;
}
+static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
+ struct xdp_mem_info *mem_info, bool release)
+{
+ struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
+
+ if (release) {
+ xsk_buff_del_tail(zc_frag);
+ __xdp_return(NULL, mem_info, false, zc_frag);
+ } else {
+ zc_frag->data_end -= shrink;
+ }
+}
+
+static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
+ int shrink)
+{
+ struct xdp_mem_info *mem_info = &xdp->rxq->mem;
+ bool release = skb_frag_size(frag) == shrink;
+
+ if (mem_info->type == MEM_TYPE_XSK_BUFF_POOL) {
+ bpf_xdp_shrink_data_zc(xdp, shrink, mem_info, release);
+ goto out;
+ }
+
+ if (release) {
+ struct page *page = skb_frag_page(frag);
+
+ __xdp_return(page_address(page), mem_info, false, NULL);
+ }
+
+out:
+ return release;
+}
+
static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
@@ -4077,12 +4147,7 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
len_free += shrink;
offset -= shrink;
-
- if (skb_frag_size(frag) == shrink) {
- struct page *page = skb_frag_page(frag);
-
- __xdp_return(page_address(page), &xdp->rxq->mem,
- false, NULL);
+ if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
n_frags_free++;
} else {
skb_frag_size_sub(frag, shrink);
@@ -4207,6 +4272,20 @@ void xdp_do_flush(void)
}
EXPORT_SYMBOL_GPL(xdp_do_flush);
+#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
+void xdp_do_check_flushed(struct napi_struct *napi)
+{
+ bool ret;
+
+ ret = dev_check_flush();
+ ret |= cpu_map_check_flush();
+ ret |= xsk_map_check_flush();
+
+ WARN_ONCE(ret, "Missing xdp_do_flush() invocation after NAPI by %ps\n",
+ napi->poll);
+}
+#endif
+
void bpf_clear_redirect_map(struct bpf_map *map)
{
struct bpf_redirect_info *ri;
@@ -5850,6 +5929,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.fi->fib_priority;
params->ifindex = dev->ifindex;
+ if (flags & BPF_FIB_LOOKUP_SRC)
+ params->ipv4_src = fib_result_prefsrc(net, &res);
+
/* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here
*/
@@ -5992,6 +6074,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.f6i->fib6_metric;
params->ifindex = dev->ifindex;
+ if (flags & BPF_FIB_LOOKUP_SRC) {
+ if (res.f6i->fib6_prefsrc.plen) {
+ *src = res.f6i->fib6_prefsrc.addr;
+ } else {
+ err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
+ &fl6.daddr, 0,
+ src);
+ if (err)
+ return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
+ }
+ }
+
if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
goto set_fwd_params;
@@ -6010,7 +6104,8 @@ set_fwd_params:
#endif
#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
- BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID)
+ BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
+ BPF_FIB_LOOKUP_SRC)
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
@@ -7196,7 +7291,6 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
struct tcphdr *, th, u32, th_len)
{
#ifdef CONFIG_SYN_COOKIES
- u32 cookie;
int ret;
if (unlikely(!sk || th_len < sizeof(*th)))
@@ -7218,8 +7312,6 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (tcp_synq_no_recent_overflow(sk))
return -ENOENT;
- cookie = ntohl(th->ack_seq) - 1;
-
/* Both struct iphdr and struct ipv6hdr have the version field at the
* same offset so we can cast to the shorter header (struct iphdr).
*/
@@ -7228,7 +7320,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
return -EINVAL;
- ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
+ ret = __cookie_v4_check((struct iphdr *)iph, th);
break;
#if IS_BUILTIN(CONFIG_IPV6)
@@ -7239,7 +7331,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (sk->sk_family != AF_INET6)
return -EINVAL;
- ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
+ ret = __cookie_v6_check((struct ipv6hdr *)iph, th);
break;
#endif /* CONFIG_IPV6 */
@@ -7692,9 +7784,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph,
struct tcphdr *, th)
{
- u32 cookie = ntohl(th->ack_seq) - 1;
-
- if (__cookie_v4_check(iph, th, cookie) > 0)
+ if (__cookie_v4_check(iph, th) > 0)
return 0;
return -EACCES;
@@ -7715,9 +7805,7 @@ BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph,
struct tcphdr *, th)
{
#if IS_BUILTIN(CONFIG_IPV6)
- u32 cookie = ntohl(th->ack_seq) - 1;
-
- if (__cookie_v6_check(iph, th, cookie) > 0)
+ if (__cookie_v6_check(iph, th) > 0)
return 0;
return -EACCES;
@@ -7858,14 +7946,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UNIX_CONNECT:
case BPF_CGROUP_UDP4_RECVMSG:
case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UNIX_RECVMSG:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UNIX_SENDMSG:
case BPF_CGROUP_INET4_GETPEERNAME:
case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_UNIX_GETPEERNAME:
case BPF_CGROUP_INET4_GETSOCKNAME:
case BPF_CGROUP_INET6_GETSOCKNAME:
+ case BPF_CGROUP_UNIX_GETSOCKNAME:
return &bpf_sock_addr_setsockopt_proto;
default:
return NULL;
@@ -7876,14 +7969,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UNIX_CONNECT:
case BPF_CGROUP_UDP4_RECVMSG:
case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UNIX_RECVMSG:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UNIX_SENDMSG:
case BPF_CGROUP_INET4_GETPEERNAME:
case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_UNIX_GETPEERNAME:
case BPF_CGROUP_INET4_GETSOCKNAME:
case BPF_CGROUP_INET6_GETSOCKNAME:
+ case BPF_CGROUP_UNIX_GETSOCKNAME:
return &bpf_sock_addr_getsockopt_proto;
default:
return NULL;
@@ -8931,8 +9029,8 @@ static bool sock_addr_is_valid_access(int off, int size,
if (off % size != 0)
return false;
- /* Disallow access to IPv6 fields from IPv4 contex and vise
- * versa.
+ /* Disallow access to fields not belonging to the attach type's address
+ * family.
*/
switch (off) {
case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
@@ -11724,9 +11822,7 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
return func;
}
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
- "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
__bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags,
struct bpf_dynptr_kern *ptr__uninit)
{
@@ -11752,7 +11848,28 @@ __bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags,
return 0;
}
-__diag_pop();
+
+__bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
+ const u8 *sun_path, u32 sun_path__sz)
+{
+ struct sockaddr_un *un;
+
+ if (sa_kern->sk->sk_family != AF_UNIX)
+ return -EINVAL;
+
+ /* We do not allow changing the address to unnamed or larger than the
+ * maximum allowed address size for a unix sockaddr.
+ */
+ if (sun_path__sz == 0 || sun_path__sz > UNIX_PATH_MAX)
+ return -EINVAL;
+
+ un = (struct sockaddr_un *)sa_kern->uaddr;
+ memcpy(un->sun_path, sun_path, sun_path__sz);
+ sa_kern->uaddrlen = offsetof(struct sockaddr_un, sun_path) + sun_path__sz;
+
+ return 0;
+}
+__bpf_kfunc_end_defs();
int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
struct bpf_dynptr_kern *ptr__uninit)
@@ -11776,6 +11893,10 @@ BTF_SET8_START(bpf_kfunc_check_set_xdp)
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
BTF_SET8_END(bpf_kfunc_check_set_xdp)
+BTF_SET8_START(bpf_kfunc_check_set_sock_addr)
+BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path)
+BTF_SET8_END(bpf_kfunc_check_set_sock_addr)
+
static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
.owner = THIS_MODULE,
.set = &bpf_kfunc_check_set_skb,
@@ -11786,6 +11907,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = {
.set = &bpf_kfunc_check_set_xdp,
};
+static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_sock_addr,
+};
+
static int __init bpf_kfunc_init(void)
{
int ret;
@@ -11800,14 +11926,13 @@ static int __init bpf_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
- return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
+ return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ &bpf_kfunc_set_sock_addr);
}
late_initcall(bpf_kfunc_init);
-/* Disables missing prototype warnings */
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
- "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
/* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code.
*
@@ -11841,7 +11966,7 @@ __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
return sk->sk_prot->diag_destroy(sk, ECONNABORTED);
}
-__diag_pop()
+__bpf_kfunc_end_defs();
BTF_SET8_START(bpf_sk_iter_kfunc_ids)
BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 89d15ceaf9af..272f09251343 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1446,7 +1446,7 @@ proto_again:
break;
}
- nhoff += ntohs(hdr->message_length);
+ nhoff += sizeof(struct ptp_header);
fdret = FLOW_DISSECT_RET_OUT_GOOD;
break;
}
@@ -1831,8 +1831,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
memset(&keys, 0, sizeof(keys));
__skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric,
- &keys, NULL, 0, 0, 0,
- FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+ &keys, NULL, 0, 0, 0, 0);
return __flow_hash_from_keys(&keys, &hashrnd);
}
diff --git a/net/core/gso_test.c b/net/core/gso_test.c
new file mode 100644
index 000000000000..4c2e77bd12f4
--- /dev/null
+++ b/net/core/gso_test.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <kunit/test.h>
+#include <linux/skbuff.h>
+
+static const char hdr[] = "abcdefgh";
+#define GSO_TEST_SIZE 1000
+
+static void __init_skb(struct sk_buff *skb)
+{
+ skb_reset_mac_header(skb);
+ memcpy(skb_mac_header(skb), hdr, sizeof(hdr));
+
+ /* skb_segment expects skb->data at start of payload */
+ skb_pull(skb, sizeof(hdr));
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+
+ /* proto is arbitrary, as long as not ETH_P_TEB or vlan */
+ skb->protocol = htons(ETH_P_ATALK);
+ skb_shinfo(skb)->gso_size = GSO_TEST_SIZE;
+}
+
+enum gso_test_nr {
+ GSO_TEST_LINEAR,
+ GSO_TEST_NO_GSO,
+ GSO_TEST_FRAGS,
+ GSO_TEST_FRAGS_PURE,
+ GSO_TEST_GSO_PARTIAL,
+ GSO_TEST_FRAG_LIST,
+ GSO_TEST_FRAG_LIST_PURE,
+ GSO_TEST_FRAG_LIST_NON_UNIFORM,
+ GSO_TEST_GSO_BY_FRAGS,
+};
+
+struct gso_test_case {
+ enum gso_test_nr id;
+ const char *name;
+
+ /* input */
+ unsigned int linear_len;
+ unsigned int nr_frags;
+ const unsigned int *frags;
+ unsigned int nr_frag_skbs;
+ const unsigned int *frag_skbs;
+
+ /* output as expected */
+ unsigned int nr_segs;
+ const unsigned int *segs;
+};
+
+static struct gso_test_case cases[] = {
+ {
+ .id = GSO_TEST_NO_GSO,
+ .name = "no_gso",
+ .linear_len = GSO_TEST_SIZE,
+ .nr_segs = 1,
+ .segs = (const unsigned int[]) { GSO_TEST_SIZE },
+ },
+ {
+ .id = GSO_TEST_LINEAR,
+ .name = "linear",
+ .linear_len = GSO_TEST_SIZE + GSO_TEST_SIZE + 1,
+ .nr_segs = 3,
+ .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 1 },
+ },
+ {
+ .id = GSO_TEST_FRAGS,
+ .name = "frags",
+ .linear_len = GSO_TEST_SIZE,
+ .nr_frags = 2,
+ .frags = (const unsigned int[]) { GSO_TEST_SIZE, 1 },
+ .nr_segs = 3,
+ .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 1 },
+ },
+ {
+ .id = GSO_TEST_FRAGS_PURE,
+ .name = "frags_pure",
+ .nr_frags = 3,
+ .frags = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 2 },
+ .nr_segs = 3,
+ .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 2 },
+ },
+ {
+ .id = GSO_TEST_GSO_PARTIAL,
+ .name = "gso_partial",
+ .linear_len = GSO_TEST_SIZE,
+ .nr_frags = 2,
+ .frags = (const unsigned int[]) { GSO_TEST_SIZE, 3 },
+ .nr_segs = 2,
+ .segs = (const unsigned int[]) { 2 * GSO_TEST_SIZE, 3 },
+ },
+ {
+ /* commit 89319d3801d1: frag_list on mss boundaries */
+ .id = GSO_TEST_FRAG_LIST,
+ .name = "frag_list",
+ .linear_len = GSO_TEST_SIZE,
+ .nr_frag_skbs = 2,
+ .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE },
+ .nr_segs = 3,
+ .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, GSO_TEST_SIZE },
+ },
+ {
+ .id = GSO_TEST_FRAG_LIST_PURE,
+ .name = "frag_list_pure",
+ .nr_frag_skbs = 2,
+ .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE },
+ .nr_segs = 2,
+ .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE },
+ },
+ {
+ /* commit 43170c4e0ba7: GRO of frag_list trains */
+ .id = GSO_TEST_FRAG_LIST_NON_UNIFORM,
+ .name = "frag_list_non_uniform",
+ .linear_len = GSO_TEST_SIZE,
+ .nr_frag_skbs = 4,
+ .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, 1, GSO_TEST_SIZE, 2 },
+ .nr_segs = 4,
+ .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, GSO_TEST_SIZE, 3 },
+ },
+ {
+ /* commit 3953c46c3ac7 ("sk_buff: allow segmenting based on frag sizes") and
+ * commit 90017accff61 ("sctp: Add GSO support")
+ *
+ * "there will be a cover skb with protocol headers and
+ * children ones containing the actual segments"
+ */
+ .id = GSO_TEST_GSO_BY_FRAGS,
+ .name = "gso_by_frags",
+ .nr_frag_skbs = 4,
+ .frag_skbs = (const unsigned int[]) { 100, 200, 300, 400 },
+ .nr_segs = 4,
+ .segs = (const unsigned int[]) { 100, 200, 300, 400 },
+ },
+};
+
+static void gso_test_case_to_desc(struct gso_test_case *t, char *desc)
+{
+ sprintf(desc, "%s", t->name);
+}
+
+KUNIT_ARRAY_PARAM(gso_test, cases, gso_test_case_to_desc);
+
+static void gso_test_func(struct kunit *test)
+{
+ const int shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ struct sk_buff *skb, *segs, *cur, *next, *last;
+ const struct gso_test_case *tcase;
+ netdev_features_t features;
+ struct page *page;
+ int i;
+
+ tcase = test->param_value;
+
+ page = alloc_page(GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, page);
+ skb = build_skb(page_address(page), sizeof(hdr) + tcase->linear_len + shinfo_size);
+ KUNIT_ASSERT_NOT_NULL(test, skb);
+ __skb_put(skb, sizeof(hdr) + tcase->linear_len);
+
+ __init_skb(skb);
+
+ if (tcase->nr_frags) {
+ unsigned int pg_off = 0;
+
+ page = alloc_page(GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, page);
+ page_ref_add(page, tcase->nr_frags - 1);
+
+ for (i = 0; i < tcase->nr_frags; i++) {
+ skb_fill_page_desc(skb, i, page, pg_off, tcase->frags[i]);
+ pg_off += tcase->frags[i];
+ }
+
+ KUNIT_ASSERT_LE(test, pg_off, PAGE_SIZE);
+
+ skb->data_len = pg_off;
+ skb->len += skb->data_len;
+ skb->truesize += skb->data_len;
+ }
+
+ if (tcase->frag_skbs) {
+ unsigned int total_size = 0, total_true_size = 0;
+ struct sk_buff *frag_skb, *prev = NULL;
+
+ for (i = 0; i < tcase->nr_frag_skbs; i++) {
+ unsigned int frag_size;
+
+ page = alloc_page(GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, page);
+
+ frag_size = tcase->frag_skbs[i];
+ frag_skb = build_skb(page_address(page),
+ frag_size + shinfo_size);
+ KUNIT_ASSERT_NOT_NULL(test, frag_skb);
+ __skb_put(frag_skb, frag_size);
+
+ if (prev)
+ prev->next = frag_skb;
+ else
+ skb_shinfo(skb)->frag_list = frag_skb;
+ prev = frag_skb;
+
+ total_size += frag_size;
+ total_true_size += frag_skb->truesize;
+ }
+
+ skb->len += total_size;
+ skb->data_len += total_size;
+ skb->truesize += total_true_size;
+
+ if (tcase->id == GSO_TEST_GSO_BY_FRAGS)
+ skb_shinfo(skb)->gso_size = GSO_BY_FRAGS;
+ }
+
+ features = NETIF_F_SG | NETIF_F_HW_CSUM;
+ if (tcase->id == GSO_TEST_GSO_PARTIAL)
+ features |= NETIF_F_GSO_PARTIAL;
+
+ /* TODO: this should also work with SG,
+ * rather than hit BUG_ON(i >= nfrags)
+ */
+ if (tcase->id == GSO_TEST_FRAG_LIST_NON_UNIFORM)
+ features &= ~NETIF_F_SG;
+
+ segs = skb_segment(skb, features);
+ if (IS_ERR(segs)) {
+ KUNIT_FAIL(test, "segs error %lld", PTR_ERR(segs));
+ goto free_gso_skb;
+ } else if (!segs) {
+ KUNIT_FAIL(test, "no segments");
+ goto free_gso_skb;
+ }
+
+ last = segs->prev;
+ for (cur = segs, i = 0; cur; cur = next, i++) {
+ next = cur->next;
+
+ KUNIT_ASSERT_EQ(test, cur->len, sizeof(hdr) + tcase->segs[i]);
+
+ /* segs have skb->data pointing to the mac header */
+ KUNIT_ASSERT_PTR_EQ(test, skb_mac_header(cur), cur->data);
+ KUNIT_ASSERT_PTR_EQ(test, skb_network_header(cur), cur->data + sizeof(hdr));
+
+ /* header was copied to all segs */
+ KUNIT_ASSERT_EQ(test, memcmp(skb_mac_header(cur), hdr, sizeof(hdr)), 0);
+
+ /* last seg can be found through segs->prev pointer */
+ if (!next)
+ KUNIT_ASSERT_PTR_EQ(test, cur, last);
+
+ consume_skb(cur);
+ }
+
+ KUNIT_ASSERT_EQ(test, i, tcase->nr_segs);
+
+free_gso_skb:
+ consume_skb(skb);
+}
+
+static struct kunit_case gso_test_cases[] = {
+ KUNIT_CASE_PARAM(gso_test_func, gso_test_gen_params),
+ {}
+};
+
+static struct kunit_suite gso_test_suite = {
+ .name = "net_core_gso",
+ .test_cases = gso_test_cases,
+};
+
+kunit_test_suite(gso_test_suite);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("KUnit tests for segmentation offload");
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index c469d1c4db5d..429571c258da 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -192,7 +192,10 @@ static void __linkwatch_run_queue(int urgent_only)
#define MAX_DO_DEV_PER_LOOP 100
int do_dev = MAX_DO_DEV_PER_LOOP;
- struct net_device *dev;
+ /* Use a local list here since we add non-urgent
+ * events back to the global one when called with
+ * urgent_only=1.
+ */
LIST_HEAD(wrk);
/* Give urgent case more budget */
@@ -218,6 +221,7 @@ static void __linkwatch_run_queue(int urgent_only)
list_splice_init(&lweventlist, &wrk);
while (!list_empty(&wrk) && do_dev > 0) {
+ struct net_device *dev;
dev = list_first_entry(&wrk, struct net_device, link_watch_list);
list_del_init(&dev->link_watch_list);
@@ -245,7 +249,7 @@ static void __linkwatch_run_queue(int urgent_only)
spin_unlock_irq(&lweventlist_lock);
}
-void linkwatch_forget_dev(struct net_device *dev)
+void linkwatch_sync_dev(struct net_device *dev)
{
unsigned long flags;
int clean = 0;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6b76cd103195..552719c3bbc3 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -251,10 +251,13 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
static int neigh_forced_gc(struct neigh_table *tbl)
{
- int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
+ int max_clean = atomic_read(&tbl->gc_entries) -
+ READ_ONCE(tbl->gc_thresh2);
+ u64 tmax = ktime_get_ns() + NSEC_PER_MSEC;
unsigned long tref = jiffies - 5 * HZ;
struct neighbour *n, *tmp;
int shrunk = 0;
+ int loop = 0;
NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
@@ -277,11 +280,16 @@ static int neigh_forced_gc(struct neigh_table *tbl)
shrunk++;
if (shrunk >= max_clean)
break;
+ if (++loop == 16) {
+ if (ktime_get_ns() > tmax)
+ goto unlock;
+ loop = 0;
+ }
}
}
- tbl->last_flush = jiffies;
-
+ WRITE_ONCE(tbl->last_flush, jiffies);
+unlock:
write_unlock_bh(&tbl->lock);
return shrunk;
@@ -410,7 +418,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
*/
__skb_queue_purge(&n->arp_queue);
n->arp_queue_len_bytes = 0;
- n->output = neigh_blackhole;
+ WRITE_ONCE(n->output, neigh_blackhole);
if (n->nud_state & NUD_VALID)
n->nud_state = NUD_NOARP;
else
@@ -464,17 +472,17 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl,
{
struct neighbour *n = NULL;
unsigned long now = jiffies;
- int entries;
+ int entries, gc_thresh3;
if (exempt_from_gc)
goto do_alloc;
entries = atomic_inc_return(&tbl->gc_entries) - 1;
- if (entries >= tbl->gc_thresh3 ||
- (entries >= tbl->gc_thresh2 &&
- time_after(now, tbl->last_flush + 5 * HZ))) {
- if (!neigh_forced_gc(tbl) &&
- entries >= tbl->gc_thresh3) {
+ gc_thresh3 = READ_ONCE(tbl->gc_thresh3);
+ if (entries >= gc_thresh3 ||
+ (entries >= READ_ONCE(tbl->gc_thresh2) &&
+ time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
+ if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) {
net_info_ratelimited("%s: neighbor table overflow!\n",
tbl->id);
NEIGH_CACHE_STAT_INC(tbl, table_fulls);
@@ -920,7 +928,7 @@ static void neigh_suspect(struct neighbour *neigh)
{
neigh_dbg(2, "neigh %p is suspected\n", neigh);
- neigh->output = neigh->ops->output;
+ WRITE_ONCE(neigh->output, neigh->ops->output);
}
/* Neighbour state is OK;
@@ -932,7 +940,7 @@ static void neigh_connect(struct neighbour *neigh)
{
neigh_dbg(2, "neigh %p is connected\n", neigh);
- neigh->output = neigh->ops->connected_output;
+ WRITE_ONCE(neigh->output, neigh->ops->connected_output);
}
static void neigh_periodic_work(struct work_struct *work)
@@ -955,13 +963,14 @@ static void neigh_periodic_work(struct work_struct *work)
if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
struct neigh_parms *p;
- tbl->last_rand = jiffies;
+
+ WRITE_ONCE(tbl->last_rand, jiffies);
list_for_each_entry(p, &tbl->parms_list, list)
p->reachable_time =
neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
}
- if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
+ if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1))
goto out;
for (i = 0 ; i < (1 << nht->hash_shift); i++) {
@@ -988,7 +997,9 @@ static void neigh_periodic_work(struct work_struct *work)
(state == NUD_FAILED ||
!time_in_range_open(jiffies, n->used,
n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
- *np = n->next;
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock)));
neigh_mark_dead(n);
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
@@ -1447,7 +1458,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
if (n2)
n1 = n2;
}
- n1->output(n1, skb);
+ READ_ONCE(n1->output)(n1, skb);
if (n2)
neigh_release(n2);
rcu_read_unlock();
@@ -2165,15 +2176,16 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
ndtmsg->ndtm_pad2 = 0;
if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
- nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
- nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
- nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
- nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
+ nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval),
+ NDTA_PAD) ||
+ nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) ||
+ nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) ||
+ nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3)))
goto nla_put_failure;
{
unsigned long now = jiffies;
- long flush_delta = now - tbl->last_flush;
- long rand_delta = now - tbl->last_rand;
+ long flush_delta = now - READ_ONCE(tbl->last_flush);
+ long rand_delta = now - READ_ONCE(tbl->last_rand);
struct neigh_hash_table *nht;
struct ndt_config ndc = {
.ndtc_key_len = tbl->key_len,
@@ -2181,7 +2193,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
.ndtc_entries = atomic_read(&tbl->entries),
.ndtc_last_flush = jiffies_to_msecs(flush_delta),
.ndtc_last_rand = jiffies_to_msecs(rand_delta),
- .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
+ .ndtc_proxy_qlen = READ_ONCE(tbl->proxy_queue.qlen),
};
rcu_read_lock();
@@ -2204,17 +2216,17 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
struct neigh_statistics *st;
st = per_cpu_ptr(tbl->stats, cpu);
- ndst.ndts_allocs += st->allocs;
- ndst.ndts_destroys += st->destroys;
- ndst.ndts_hash_grows += st->hash_grows;
- ndst.ndts_res_failed += st->res_failed;
- ndst.ndts_lookups += st->lookups;
- ndst.ndts_hits += st->hits;
- ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
- ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
- ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
- ndst.ndts_forced_gc_runs += st->forced_gc_runs;
- ndst.ndts_table_fulls += st->table_fulls;
+ ndst.ndts_allocs += READ_ONCE(st->allocs);
+ ndst.ndts_destroys += READ_ONCE(st->destroys);
+ ndst.ndts_hash_grows += READ_ONCE(st->hash_grows);
+ ndst.ndts_res_failed += READ_ONCE(st->res_failed);
+ ndst.ndts_lookups += READ_ONCE(st->lookups);
+ ndst.ndts_hits += READ_ONCE(st->hits);
+ ndst.ndts_rcv_probes_mcast += READ_ONCE(st->rcv_probes_mcast);
+ ndst.ndts_rcv_probes_ucast += READ_ONCE(st->rcv_probes_ucast);
+ ndst.ndts_periodic_gc_runs += READ_ONCE(st->periodic_gc_runs);
+ ndst.ndts_forced_gc_runs += READ_ONCE(st->forced_gc_runs);
+ ndst.ndts_table_fulls += READ_ONCE(st->table_fulls);
}
if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
@@ -2443,16 +2455,16 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout_tbl_lock;
if (tb[NDTA_THRESH1])
- tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
+ WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1]));
if (tb[NDTA_THRESH2])
- tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
+ WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2]));
if (tb[NDTA_THRESH3])
- tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
+ WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3]));
if (tb[NDTA_GC_INTERVAL])
- tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
+ WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL]));
err = 0;
@@ -3153,7 +3165,7 @@ int neigh_xmit(int index, struct net_device *dev,
rcu_read_unlock();
goto out_kfree_skb;
}
- err = neigh->output(neigh, skb);
+ err = READ_ONCE(neigh->output)(neigh, skb);
rcu_read_unlock();
}
else if (index == NEIGH_LINK_TABLE) {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index fccaa5bac0ed..a09d507c5b03 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -193,11 +193,22 @@ static ssize_t carrier_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
+ int ret = -EINVAL;
- if (netif_running(netdev))
- return sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
+ if (!rtnl_trylock())
+ return restart_syscall();
- return -EINVAL;
+ if (netif_running(netdev)) {
+ /* Synchronize carrier state with link watch,
+ * see also rtnl_getlink().
+ */
+ linkwatch_sync_dev(netdev);
+
+ ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
+ }
+ rtnl_unlock();
+
+ return ret;
}
static DEVICE_ATTR_RW(carrier);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index f4183c4c1ec8..72799533426b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -372,6 +372,10 @@ out_undo:
static int __net_init net_defaults_init_net(struct net *net)
{
net->core.sysctl_somaxconn = SOMAXCONN;
+ /* Limits per socket sk_omem_alloc usage.
+ * TCP zerocopy regular usage needs 128 KB.
+ */
+ net->core.sysctl_optmem_max = 128 * 1024;
net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
return 0;
@@ -1099,11 +1103,56 @@ out:
rtnl_set_sk_err(net, RTNLGRP_NSID, err);
}
+#ifdef CONFIG_NET_NS
+static void __init netns_ipv4_struct_check(void)
+{
+ /* TX readonly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_early_retrans);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_tso_win_divisor);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_tso_rtt_log);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_autocorking);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_min_snd_mss);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_notsent_lowat);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_limit_output_bytes);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_min_rtt_wlen);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_wmem);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_ip_fwd_use_pmtu);
+ CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_tx, 33);
+
+ /* TXRX readonly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_txrx,
+ sysctl_tcp_moderate_rcvbuf);
+ CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_txrx, 1);
+
+ /* RX readonly hotpath cache line */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
+ sysctl_ip_early_demux);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
+ sysctl_tcp_early_demux);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
+ sysctl_tcp_reordering);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
+ sysctl_tcp_rmem);
+ CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18);
+}
+#endif
+
void __init net_ns_init(void)
{
struct net_generic *ng;
#ifdef CONFIG_NET_NS
+ netns_ipv4_struct_check();
net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
SMP_CACHE_BYTES,
SLAB_PANIC|SLAB_ACCOUNT, NULL);
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index d6a70aeaa503..d22f0919821e 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -88,6 +88,12 @@ static void update_classid_task(struct task_struct *p, u32 classid)
};
unsigned int fd = 0;
+ /* Only update the leader task, when many threads in this task,
+ * so it can avoid the useless traversal.
+ */
+ if (p != p->group_leader)
+ return;
+
do {
task_lock(p);
fd = iterate_fd(p->files, fd, update_classid_sock, &ctx);
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index ea9231378aa6..be7f2ebd61b2 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -10,11 +10,64 @@
#include <uapi/linux/netdev.h>
+/* Integer value ranges */
+static const struct netlink_range_validation netdev_a_page_pool_id_range = {
+ .min = 1ULL,
+ .max = 4294967295ULL,
+};
+
+static const struct netlink_range_validation netdev_a_page_pool_ifindex_range = {
+ .min = 1ULL,
+ .max = 2147483647ULL,
+};
+
+/* Common nested types */
+const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = {
+ [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range),
+ [NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range),
+};
+
/* NETDEV_CMD_DEV_GET - do */
static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = {
[NETDEV_A_DEV_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
};
+/* NETDEV_CMD_PAGE_POOL_GET - do */
+#ifdef CONFIG_PAGE_POOL
+static const struct nla_policy netdev_page_pool_get_nl_policy[NETDEV_A_PAGE_POOL_ID + 1] = {
+ [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range),
+};
+#endif /* CONFIG_PAGE_POOL */
+
+/* NETDEV_CMD_PAGE_POOL_STATS_GET - do */
+#ifdef CONFIG_PAGE_POOL_STATS
+static const struct nla_policy netdev_page_pool_stats_get_nl_policy[NETDEV_A_PAGE_POOL_STATS_INFO + 1] = {
+ [NETDEV_A_PAGE_POOL_STATS_INFO] = NLA_POLICY_NESTED(netdev_page_pool_info_nl_policy),
+};
+#endif /* CONFIG_PAGE_POOL_STATS */
+
+/* NETDEV_CMD_QUEUE_GET - do */
+static const struct nla_policy netdev_queue_get_do_nl_policy[NETDEV_A_QUEUE_TYPE + 1] = {
+ [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+ [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1),
+ [NETDEV_A_QUEUE_ID] = { .type = NLA_U32, },
+};
+
+/* NETDEV_CMD_QUEUE_GET - dump */
+static const struct nla_policy netdev_queue_get_dump_nl_policy[NETDEV_A_QUEUE_IFINDEX + 1] = {
+ [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
+/* NETDEV_CMD_NAPI_GET - do */
+static const struct nla_policy netdev_napi_get_do_nl_policy[NETDEV_A_NAPI_ID + 1] = {
+ [NETDEV_A_NAPI_ID] = { .type = NLA_U32, },
+};
+
+/* NETDEV_CMD_NAPI_GET - dump */
+static const struct nla_policy netdev_napi_get_dump_nl_policy[NETDEV_A_NAPI_IFINDEX + 1] = {
+ [NETDEV_A_NAPI_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
/* Ops table for netdev */
static const struct genl_split_ops netdev_nl_ops[] = {
{
@@ -29,10 +82,67 @@ static const struct genl_split_ops netdev_nl_ops[] = {
.dumpit = netdev_nl_dev_get_dumpit,
.flags = GENL_CMD_CAP_DUMP,
},
+#ifdef CONFIG_PAGE_POOL
+ {
+ .cmd = NETDEV_CMD_PAGE_POOL_GET,
+ .doit = netdev_nl_page_pool_get_doit,
+ .policy = netdev_page_pool_get_nl_policy,
+ .maxattr = NETDEV_A_PAGE_POOL_ID,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_PAGE_POOL_GET,
+ .dumpit = netdev_nl_page_pool_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+#endif /* CONFIG_PAGE_POOL */
+#ifdef CONFIG_PAGE_POOL_STATS
+ {
+ .cmd = NETDEV_CMD_PAGE_POOL_STATS_GET,
+ .doit = netdev_nl_page_pool_stats_get_doit,
+ .policy = netdev_page_pool_stats_get_nl_policy,
+ .maxattr = NETDEV_A_PAGE_POOL_STATS_INFO,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_PAGE_POOL_STATS_GET,
+ .dumpit = netdev_nl_page_pool_stats_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+#endif /* CONFIG_PAGE_POOL_STATS */
+ {
+ .cmd = NETDEV_CMD_QUEUE_GET,
+ .doit = netdev_nl_queue_get_doit,
+ .policy = netdev_queue_get_do_nl_policy,
+ .maxattr = NETDEV_A_QUEUE_TYPE,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_QUEUE_GET,
+ .dumpit = netdev_nl_queue_get_dumpit,
+ .policy = netdev_queue_get_dump_nl_policy,
+ .maxattr = NETDEV_A_QUEUE_IFINDEX,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = NETDEV_CMD_NAPI_GET,
+ .doit = netdev_nl_napi_get_doit,
+ .policy = netdev_napi_get_do_nl_policy,
+ .maxattr = NETDEV_A_NAPI_ID,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_NAPI_GET,
+ .dumpit = netdev_nl_napi_get_dumpit,
+ .policy = netdev_napi_get_dump_nl_policy,
+ .maxattr = NETDEV_A_NAPI_IFINDEX,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
};
static const struct genl_multicast_group netdev_nl_mcgrps[] = {
[NETDEV_NLGRP_MGMT] = { "mgmt", },
+ [NETDEV_NLGRP_PAGE_POOL] = { "page-pool", },
};
struct genl_family netdev_nl_family __ro_after_init = {
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index 7b370c073e7d..a47f2bcbe4fa 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -11,11 +11,27 @@
#include <uapi/linux/netdev.h>
+/* Common nested types */
+extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1];
+
int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int netdev_nl_page_pool_get_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_page_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_queue_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
enum {
NETDEV_NLGRP_MGMT,
+ NETDEV_NLGRP_PAGE_POOL,
};
extern struct genl_family netdev_nl_family;
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index c1aea8b756b6..fd98936da3ae 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -5,22 +5,60 @@
#include <linux/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/sock.h>
+#include <net/xdp.h>
+#include <net/xdp_sock.h>
+#include <net/netdev_rx_queue.h>
+#include <net/busy_poll.h>
#include "netdev-genl-gen.h"
+#include "dev.h"
+
+struct netdev_nl_dump_ctx {
+ unsigned long ifindex;
+ unsigned int rxq_idx;
+ unsigned int txq_idx;
+ unsigned int napi_id;
+};
+
+static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb)
+{
+ NL_ASSERT_DUMP_CTX_FITS(struct netdev_nl_dump_ctx);
+
+ return (struct netdev_nl_dump_ctx *)cb->ctx;
+}
static int
netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
const struct genl_info *info)
{
+ u64 xsk_features = 0;
+ u64 xdp_rx_meta = 0;
void *hdr;
hdr = genlmsg_iput(rsp, info);
if (!hdr)
return -EMSGSIZE;
+#define XDP_METADATA_KFUNC(_, flag, __, xmo) \
+ if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \
+ xdp_rx_meta |= flag;
+XDP_METADATA_KFUNC_xxx
+#undef XDP_METADATA_KFUNC
+
+ if (netdev->xsk_tx_metadata_ops) {
+ if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp)
+ xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP;
+ if (netdev->xsk_tx_metadata_ops->tmo_request_checksum)
+ xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM;
+ }
+
if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
- netdev->xdp_features, NETDEV_A_DEV_PAD)) {
+ netdev->xdp_features, NETDEV_A_DEV_PAD) ||
+ nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
+ xdp_rx_meta, NETDEV_A_DEV_PAD) ||
+ nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES,
+ xsk_features, NETDEV_A_DEV_PAD)) {
genlmsg_cancel(rsp, hdr);
return -EINVAL;
}
@@ -101,12 +139,13 @@ err_free_msg:
int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
struct net *net = sock_net(skb->sk);
struct net_device *netdev;
int err = 0;
rtnl_lock();
- for_each_netdev_dump(net, netdev, cb->args[0]) {
+ for_each_netdev_dump(net, netdev, ctx->ifindex) {
err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb));
if (err < 0)
break;
@@ -119,6 +158,317 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
+static int
+netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
+ const struct genl_info *info)
+{
+ void *hdr;
+ pid_t pid;
+
+ if (WARN_ON_ONCE(!napi->dev))
+ return -EINVAL;
+ if (!(napi->dev->flags & IFF_UP))
+ return 0;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (napi->napi_id >= MIN_NAPI_ID &&
+ nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id))
+ goto nla_put_failure;
+
+ if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex))
+ goto nla_put_failure;
+
+ if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq))
+ goto nla_put_failure;
+
+ if (napi->thread) {
+ pid = task_pid_nr(napi->thread);
+ if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid))
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(rsp, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+}
+
+int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct napi_struct *napi;
+ struct sk_buff *rsp;
+ u32 napi_id;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
+ return -EINVAL;
+
+ napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp)
+ return -ENOMEM;
+
+ rtnl_lock();
+
+ napi = napi_by_id(napi_id);
+ if (napi)
+ err = netdev_nl_napi_fill_one(rsp, napi, info);
+ else
+ err = -EINVAL;
+
+ rtnl_unlock();
+
+ if (err)
+ goto err_free_msg;
+
+ return genlmsg_reply(rsp, info);
+
+err_free_msg:
+ nlmsg_free(rsp);
+ return err;
+}
+
+static int
+netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp,
+ const struct genl_info *info,
+ struct netdev_nl_dump_ctx *ctx)
+{
+ struct napi_struct *napi;
+ int err = 0;
+
+ if (!(netdev->flags & IFF_UP))
+ return err;
+
+ list_for_each_entry(napi, &netdev->napi_list, dev_list) {
+ if (ctx->napi_id && napi->napi_id >= ctx->napi_id)
+ continue;
+
+ err = netdev_nl_napi_fill_one(rsp, napi, info);
+ if (err)
+ return err;
+ ctx->napi_id = napi->napi_id;
+ }
+ return err;
+}
+
+int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *netdev;
+ u32 ifindex = 0;
+ int err = 0;
+
+ if (info->attrs[NETDEV_A_NAPI_IFINDEX])
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]);
+
+ rtnl_lock();
+ if (ifindex) {
+ netdev = __dev_get_by_index(net, ifindex);
+ if (netdev)
+ err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
+ else
+ err = -ENODEV;
+ } else {
+ for_each_netdev_dump(net, netdev, ctx->ifindex) {
+ err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
+ if (err < 0)
+ break;
+ ctx->napi_id = 0;
+ }
+ }
+ rtnl_unlock();
+
+ if (err != -EMSGSIZE)
+ return err;
+
+ return skb->len;
+}
+
+static int
+netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
+ u32 q_idx, u32 q_type, const struct genl_info *info)
+{
+ struct netdev_rx_queue *rxq;
+ struct netdev_queue *txq;
+ void *hdr;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx) ||
+ nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type) ||
+ nla_put_u32(rsp, NETDEV_A_QUEUE_IFINDEX, netdev->ifindex))
+ goto nla_put_failure;
+
+ switch (q_type) {
+ case NETDEV_QUEUE_TYPE_RX:
+ rxq = __netif_get_rx_queue(netdev, q_idx);
+ if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
+ rxq->napi->napi_id))
+ goto nla_put_failure;
+ break;
+ case NETDEV_QUEUE_TYPE_TX:
+ txq = netdev_get_tx_queue(netdev, q_idx);
+ if (txq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
+ txq->napi->napi_id))
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(rsp, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+}
+
+static int netdev_nl_queue_validate(struct net_device *netdev, u32 q_id,
+ u32 q_type)
+{
+ switch (q_type) {
+ case NETDEV_QUEUE_TYPE_RX:
+ if (q_id >= netdev->real_num_rx_queues)
+ return -EINVAL;
+ return 0;
+ case NETDEV_QUEUE_TYPE_TX:
+ if (q_id >= netdev->real_num_tx_queues)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int
+netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx,
+ u32 q_type, const struct genl_info *info)
+{
+ int err = 0;
+
+ if (!(netdev->flags & IFF_UP))
+ return err;
+
+ err = netdev_nl_queue_validate(netdev, q_idx, q_type);
+ if (err)
+ return err;
+
+ return netdev_nl_queue_fill_one(rsp, netdev, q_idx, q_type, info);
+}
+
+int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ u32 q_id, q_type, ifindex;
+ struct net_device *netdev;
+ struct sk_buff *rsp;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_ID) ||
+ GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
+ GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX))
+ return -EINVAL;
+
+ q_id = nla_get_u32(info->attrs[NETDEV_A_QUEUE_ID]);
+ q_type = nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]);
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp)
+ return -ENOMEM;
+
+ rtnl_lock();
+
+ netdev = __dev_get_by_index(genl_info_net(info), ifindex);
+ if (netdev)
+ err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info);
+ else
+ err = -ENODEV;
+
+ rtnl_unlock();
+
+ if (err)
+ goto err_free_msg;
+
+ return genlmsg_reply(rsp, info);
+
+err_free_msg:
+ nlmsg_free(rsp);
+ return err;
+}
+
+static int
+netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp,
+ const struct genl_info *info,
+ struct netdev_nl_dump_ctx *ctx)
+{
+ int err = 0;
+ int i;
+
+ if (!(netdev->flags & IFF_UP))
+ return err;
+
+ for (i = ctx->rxq_idx; i < netdev->real_num_rx_queues;) {
+ err = netdev_nl_queue_fill_one(rsp, netdev, i,
+ NETDEV_QUEUE_TYPE_RX, info);
+ if (err)
+ return err;
+ ctx->rxq_idx = i++;
+ }
+ for (i = ctx->txq_idx; i < netdev->real_num_tx_queues;) {
+ err = netdev_nl_queue_fill_one(rsp, netdev, i,
+ NETDEV_QUEUE_TYPE_TX, info);
+ if (err)
+ return err;
+ ctx->txq_idx = i++;
+ }
+
+ return err;
+}
+
+int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *netdev;
+ u32 ifindex = 0;
+ int err = 0;
+
+ if (info->attrs[NETDEV_A_QUEUE_IFINDEX])
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
+
+ rtnl_lock();
+ if (ifindex) {
+ netdev = __dev_get_by_index(net, ifindex);
+ if (netdev)
+ err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
+ else
+ err = -ENODEV;
+ } else {
+ for_each_netdev_dump(net, netdev, ctx->ifindex) {
+ err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
+ if (err < 0)
+ break;
+ ctx->rxq_idx = 0;
+ ctx->txq_idx = 0;
+ }
+ }
+ rtnl_unlock();
+
+ if (err != -EMSGSIZE)
+ return err;
+
+ return skb->len;
+}
+
static int netdev_genl_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 77cb75e63aca..4933762e5a6b 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -23,10 +23,12 @@
#include <trace/events/page_pool.h>
+#include "page_pool_priv.h"
+
#define DEFER_TIME (msecs_to_jiffies(1000))
#define DEFER_WARN_INTERVAL (60 * HZ)
-#define BIAS_MAX LONG_MAX
+#define BIAS_MAX (LONG_MAX >> 1)
#ifdef CONFIG_PAGE_POOL_STATS
/* alloc_stat_inc is intended to be used in softirq context */
@@ -69,7 +71,7 @@ static const char pp_stats[][ETH_GSTRING_LEN] = {
* is passed to this API which is filled in. The caller can then report
* those stats to the user (perhaps via ethtool, debugfs, etc.).
*/
-bool page_pool_get_stats(struct page_pool *pool,
+bool page_pool_get_stats(const struct page_pool *pool,
struct page_pool_stats *stats)
{
int cpu = 0;
@@ -173,7 +175,8 @@ static int page_pool_init(struct page_pool *pool,
{
unsigned int ring_qsize = 1024; /* Default */
- memcpy(&pool->p, params, sizeof(pool->p));
+ memcpy(&pool->p, &params->fast, sizeof(pool->p));
+ memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
/* Validate only known flags were used */
if (pool->p.flags & ~(PP_FLAG_ALL))
@@ -211,9 +214,7 @@ static int page_pool_init(struct page_pool *pool,
*/
}
- if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
- pool->p.flags & PP_FLAG_PAGE_FRAG)
- return -EINVAL;
+ pool->has_init_callback = !!pool->slow.init_callback;
#ifdef CONFIG_PAGE_POOL_STATS
pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
@@ -221,8 +222,12 @@ static int page_pool_init(struct page_pool *pool,
return -ENOMEM;
#endif
- if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
+ if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) {
+#ifdef CONFIG_PAGE_POOL_STATS
+ free_percpu(pool->recycle_stats);
+#endif
return -ENOMEM;
+ }
atomic_set(&pool->pages_state_release_cnt, 0);
@@ -235,6 +240,18 @@ static int page_pool_init(struct page_pool *pool,
return 0;
}
+static void page_pool_uninit(struct page_pool *pool)
+{
+ ptr_ring_cleanup(&pool->ring, NULL);
+
+ if (pool->p.flags & PP_FLAG_DMA_MAP)
+ put_device(pool->p.dev);
+
+#ifdef CONFIG_PAGE_POOL_STATS
+ free_percpu(pool->recycle_stats);
+#endif
+}
+
/**
* page_pool_create() - create a page pool.
* @params: parameters, see struct page_pool_params
@@ -249,13 +266,21 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
return ERR_PTR(-ENOMEM);
err = page_pool_init(pool, params);
- if (err < 0) {
- pr_warn("%s() gave up with errno %d\n", __func__, err);
- kfree(pool);
- return ERR_PTR(err);
- }
+ if (err < 0)
+ goto err_free;
+
+ err = page_pool_list(pool);
+ if (err)
+ goto err_uninit;
return pool;
+
+err_uninit:
+ page_pool_uninit(pool);
+err_free:
+ pr_warn("%s() gave up with errno %d\n", __func__, err);
+ kfree(pool);
+ return ERR_PTR(err);
}
EXPORT_SYMBOL(page_pool_create);
@@ -359,12 +384,20 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
if (dma_mapping_error(pool->p.dev, dma))
return false;
- page_pool_set_dma_addr(page, dma);
+ if (page_pool_set_dma_addr(page, dma))
+ goto unmap_failed;
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
return true;
+
+unmap_failed:
+ WARN_ON_ONCE("unexpected DMA address, please report to netdev@");
+ dma_unmap_page_attrs(pool->p.dev, dma,
+ PAGE_SIZE << pool->p.order, pool->p.dma_dir,
+ DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
+ return false;
}
static void page_pool_set_pp_info(struct page_pool *pool,
@@ -372,8 +405,16 @@ static void page_pool_set_pp_info(struct page_pool *pool,
{
page->pp = pool;
page->pp_magic |= PP_SIGNATURE;
- if (pool->p.init_callback)
- pool->p.init_callback(page, pool->p.init_arg);
+
+ /* Ensuring all pages have been split into one fragment initially:
+ * page_pool_set_pp_info() is only called once for every page when it
+ * is allocated from the page allocator and page_pool_fragment_page()
+ * is dirtying the same cache line as the page->pp_magic above, so
+ * the overhead is negligible.
+ */
+ page_pool_fragment_page(page, 1);
+ if (pool->has_init_callback)
+ pool->slow.init_callback(page, pool->slow.init_arg);
}
static void page_pool_clear_pp_info(struct page *page)
@@ -488,7 +529,7 @@ EXPORT_SYMBOL(page_pool_alloc_pages);
*/
#define _distance(a, b) (s32)((a) - (b))
-static s32 page_pool_inflight(struct page_pool *pool)
+s32 page_pool_inflight(const struct page_pool *pool, bool strict)
{
u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
@@ -496,27 +537,27 @@ static s32 page_pool_inflight(struct page_pool *pool)
inflight = _distance(hold_cnt, release_cnt);
- trace_page_pool_release(pool, inflight, hold_cnt, release_cnt);
- WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
+ if (strict) {
+ trace_page_pool_release(pool, inflight, hold_cnt, release_cnt);
+ WARN(inflight < 0, "Negative(%d) inflight packet-pages",
+ inflight);
+ } else {
+ inflight = max(0, inflight);
+ }
return inflight;
}
-/* Disconnects a page (from a page_pool). API users can have a need
- * to disconnect a page (from a page_pool), to allow it to be used as
- * a regular page (that will eventually be returned to the normal
- * page-allocator via put_page).
- */
-static void page_pool_return_page(struct page_pool *pool, struct page *page)
+static __always_inline
+void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
{
dma_addr_t dma;
- int count;
if (!(pool->p.flags & PP_FLAG_DMA_MAP))
/* Always account for inflight pages, even if we didn't
* map them
*/
- goto skip_dma_unmap;
+ return;
dma = page_pool_get_dma_addr(page);
@@ -525,7 +566,19 @@ static void page_pool_return_page(struct page_pool *pool, struct page *page)
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
page_pool_set_dma_addr(page, 0);
-skip_dma_unmap:
+}
+
+/* Disconnects a page (from a page_pool). API users can have a need
+ * to disconnect a page (from a page_pool), to allow it to be used as
+ * a regular page (that will eventually be returned to the normal
+ * page-allocator via put_page).
+ */
+void page_pool_return_page(struct page_pool *pool, struct page *page)
+{
+ int count;
+
+ __page_pool_release_page_dma(pool, page);
+
page_pool_clear_pp_info(page);
/* This may be the last page returned, releasing the pool, so
@@ -631,8 +684,8 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
return NULL;
}
-void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
- unsigned int dma_sync_size, bool allow_direct)
+void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
+ unsigned int dma_sync_size, bool allow_direct)
{
page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
if (page && !page_pool_recycle_in_ring(pool, page)) {
@@ -641,7 +694,7 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
page_pool_return_page(pool, page);
}
}
-EXPORT_SYMBOL(page_pool_put_defragged_page);
+EXPORT_SYMBOL(page_pool_put_unrefed_page);
/**
* page_pool_put_page_bulk() - release references on multiple pages
@@ -668,7 +721,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
struct page *page = virt_to_head_page(data[i]);
/* It is not the last user for the page frag case */
- if (!page_pool_is_last_frag(pool, page))
+ if (!page_pool_is_last_ref(page))
continue;
page = __page_pool_put_page(pool, page, -1, false);
@@ -710,7 +763,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool,
long drain_count = BIAS_MAX - pool->frag_users;
/* Some user is still using the page frag */
- if (likely(page_pool_defrag_page(page, drain_count)))
+ if (likely(page_pool_unref_page(page, drain_count)))
return NULL;
if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
@@ -731,7 +784,7 @@ static void page_pool_free_frag(struct page_pool *pool)
pool->frag_page = NULL;
- if (!page || page_pool_defrag_page(page, drain_count))
+ if (!page || page_pool_unref_page(page, drain_count))
return;
page_pool_return_page(pool, page);
@@ -744,8 +797,7 @@ struct page *page_pool_alloc_frag(struct page_pool *pool,
unsigned int max_size = PAGE_SIZE << pool->p.order;
struct page *page = pool->frag_page;
- if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
- size > max_size))
+ if (WARN_ON(size > max_size))
return NULL;
size = ALIGN(size, dma_get_cache_alignment());
@@ -798,19 +850,13 @@ static void page_pool_empty_ring(struct page_pool *pool)
}
}
-static void page_pool_free(struct page_pool *pool)
+static void __page_pool_destroy(struct page_pool *pool)
{
if (pool->disconnect)
pool->disconnect(pool);
- ptr_ring_cleanup(&pool->ring, NULL);
-
- if (pool->p.flags & PP_FLAG_DMA_MAP)
- put_device(pool->p.dev);
-
-#ifdef CONFIG_PAGE_POOL_STATS
- free_percpu(pool->recycle_stats);
-#endif
+ page_pool_unlist(pool);
+ page_pool_uninit(pool);
kfree(pool);
}
@@ -847,9 +893,9 @@ static int page_pool_release(struct page_pool *pool)
int inflight;
page_pool_scrub(pool);
- inflight = page_pool_inflight(pool);
+ inflight = page_pool_inflight(pool, true);
if (!inflight)
- page_pool_free(pool);
+ __page_pool_destroy(pool);
return inflight;
}
@@ -858,18 +904,21 @@ static void page_pool_release_retry(struct work_struct *wq)
{
struct delayed_work *dwq = to_delayed_work(wq);
struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw);
+ void *netdev;
int inflight;
inflight = page_pool_release(pool);
if (!inflight)
return;
- /* Periodic warning */
- if (time_after_eq(jiffies, pool->defer_warn)) {
+ /* Periodic warning for page pools the user can't see */
+ netdev = READ_ONCE(pool->slow.netdev);
+ if (time_after_eq(jiffies, pool->defer_warn) &&
+ (!netdev || netdev == NET_PTR_POISON)) {
int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ;
- pr_warn("%s() stalled pool shutdown %d inflight %d sec\n",
- __func__, inflight, sec);
+ pr_warn("%s() stalled pool shutdown: id %u, %d inflight %d sec\n",
+ __func__, pool->user.id, inflight, sec);
pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
}
@@ -914,6 +963,7 @@ void page_pool_destroy(struct page_pool *pool)
if (!page_pool_release(pool))
return;
+ page_pool_detached(pool);
pool->defer_start = jiffies;
pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h
new file mode 100644
index 000000000000..90665d40f1eb
--- /dev/null
+++ b/net/core/page_pool_priv.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __PAGE_POOL_PRIV_H
+#define __PAGE_POOL_PRIV_H
+
+s32 page_pool_inflight(const struct page_pool *pool, bool strict);
+
+int page_pool_list(struct page_pool *pool);
+void page_pool_detached(struct page_pool *pool);
+void page_pool_unlist(struct page_pool *pool);
+
+#endif
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
new file mode 100644
index 000000000000..ffe5244e5597
--- /dev/null
+++ b/net/core/page_pool_user.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/xarray.h>
+#include <net/net_debug.h>
+#include <net/page_pool/types.h>
+#include <net/page_pool/helpers.h>
+#include <net/sock.h>
+
+#include "page_pool_priv.h"
+#include "netdev-genl-gen.h"
+
+static DEFINE_XARRAY_FLAGS(page_pools, XA_FLAGS_ALLOC1);
+/* Protects: page_pools, netdevice->page_pools, pool->slow.netdev, pool->user.
+ * Ordering: inside rtnl_lock
+ */
+static DEFINE_MUTEX(page_pools_lock);
+
+/* Page pools are only reachable from user space (via netlink) if they are
+ * linked to a netdev at creation time. Following page pool "visibility"
+ * states are possible:
+ * - normal
+ * - user.list: linked to real netdev, netdev: real netdev
+ * - orphaned - real netdev has disappeared
+ * - user.list: linked to lo, netdev: lo
+ * - invisible - either (a) created without netdev linking, (b) unlisted due
+ * to error, or (c) the entire namespace which owned this pool disappeared
+ * - user.list: unhashed, netdev: unknown
+ */
+
+typedef int (*pp_nl_fill_cb)(struct sk_buff *rsp, const struct page_pool *pool,
+ const struct genl_info *info);
+
+static int
+netdev_nl_page_pool_get_do(struct genl_info *info, u32 id, pp_nl_fill_cb fill)
+{
+ struct page_pool *pool;
+ struct sk_buff *rsp;
+ int err;
+
+ mutex_lock(&page_pools_lock);
+ pool = xa_load(&page_pools, id);
+ if (!pool || hlist_unhashed(&pool->user.list) ||
+ !net_eq(dev_net(pool->slow.netdev), genl_info_net(info))) {
+ err = -ENOENT;
+ goto err_unlock;
+ }
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp) {
+ err = -ENOMEM;
+ goto err_unlock;
+ }
+
+ err = fill(rsp, pool, info);
+ if (err)
+ goto err_free_msg;
+
+ mutex_unlock(&page_pools_lock);
+
+ return genlmsg_reply(rsp, info);
+
+err_free_msg:
+ nlmsg_free(rsp);
+err_unlock:
+ mutex_unlock(&page_pools_lock);
+ return err;
+}
+
+struct page_pool_dump_cb {
+ unsigned long ifindex;
+ u32 pp_id;
+};
+
+static int
+netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ pp_nl_fill_cb fill)
+{
+ struct page_pool_dump_cb *state = (void *)cb->ctx;
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *netdev;
+ struct page_pool *pool;
+ int err = 0;
+
+ rtnl_lock();
+ mutex_lock(&page_pools_lock);
+ for_each_netdev_dump(net, netdev, state->ifindex) {
+ hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
+ if (state->pp_id && state->pp_id < pool->user.id)
+ continue;
+
+ state->pp_id = pool->user.id;
+ err = fill(skb, pool, info);
+ if (err)
+ break;
+ }
+
+ state->pp_id = 0;
+ }
+ mutex_unlock(&page_pools_lock);
+ rtnl_unlock();
+
+ if (skb->len && err == -EMSGSIZE)
+ return skb->len;
+ return err;
+}
+
+static int
+page_pool_nl_stats_fill(struct sk_buff *rsp, const struct page_pool *pool,
+ const struct genl_info *info)
+{
+#ifdef CONFIG_PAGE_POOL_STATS
+ struct page_pool_stats stats = {};
+ struct nlattr *nest;
+ void *hdr;
+
+ if (!page_pool_get_stats(pool, &stats))
+ return 0;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ nest = nla_nest_start(rsp, NETDEV_A_PAGE_POOL_STATS_INFO);
+
+ if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id) ||
+ (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX &&
+ nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
+ pool->slow.netdev->ifindex)))
+ goto err_cancel_nest;
+
+ nla_nest_end(rsp, nest);
+
+ if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST,
+ stats.alloc_stats.fast) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW,
+ stats.alloc_stats.slow) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER,
+ stats.alloc_stats.slow_high_order) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY,
+ stats.alloc_stats.empty) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL,
+ stats.alloc_stats.refill) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE,
+ stats.alloc_stats.waive) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED,
+ stats.recycle_stats.cached) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL,
+ stats.recycle_stats.cache_full) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING,
+ stats.recycle_stats.ring) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL,
+ stats.recycle_stats.ring_full) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT,
+ stats.recycle_stats.released_refcnt))
+ goto err_cancel_msg;
+
+ genlmsg_end(rsp, hdr);
+
+ return 0;
+err_cancel_nest:
+ nla_nest_cancel(rsp, nest);
+err_cancel_msg:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+#else
+ GENL_SET_ERR_MSG(info, "kernel built without CONFIG_PAGE_POOL_STATS");
+ return -EOPNOTSUPP;
+#endif
+}
+
+int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nlattr *tb[ARRAY_SIZE(netdev_page_pool_info_nl_policy)];
+ struct nlattr *nest;
+ int err;
+ u32 id;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_STATS_INFO))
+ return -EINVAL;
+
+ nest = info->attrs[NETDEV_A_PAGE_POOL_STATS_INFO];
+ err = nla_parse_nested(tb, ARRAY_SIZE(tb) - 1, nest,
+ netdev_page_pool_info_nl_policy,
+ info->extack);
+ if (err)
+ return err;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, nest, tb, NETDEV_A_PAGE_POOL_ID))
+ return -EINVAL;
+ if (tb[NETDEV_A_PAGE_POOL_IFINDEX]) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[NETDEV_A_PAGE_POOL_IFINDEX],
+ "selecting by ifindex not supported");
+ return -EINVAL;
+ }
+
+ id = nla_get_uint(tb[NETDEV_A_PAGE_POOL_ID]);
+
+ return netdev_nl_page_pool_get_do(info, id, page_pool_nl_stats_fill);
+}
+
+int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_stats_fill);
+}
+
+static int
+page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
+ const struct genl_info *info)
+{
+ size_t inflight, refsz;
+ void *hdr;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id))
+ goto err_cancel;
+
+ if (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX &&
+ nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
+ pool->slow.netdev->ifindex))
+ goto err_cancel;
+ if (pool->user.napi_id &&
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, pool->user.napi_id))
+ goto err_cancel;
+
+ inflight = page_pool_inflight(pool, false);
+ refsz = PAGE_SIZE << pool->p.order;
+ if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT, inflight) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
+ inflight * refsz))
+ goto err_cancel;
+ if (pool->user.detach_time &&
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_DETACH_TIME,
+ pool->user.detach_time))
+ goto err_cancel;
+
+ genlmsg_end(rsp, hdr);
+
+ return 0;
+err_cancel:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+}
+
+static void netdev_nl_page_pool_event(const struct page_pool *pool, u32 cmd)
+{
+ struct genl_info info;
+ struct sk_buff *ntf;
+ struct net *net;
+
+ lockdep_assert_held(&page_pools_lock);
+
+ /* 'invisible' page pools don't matter */
+ if (hlist_unhashed(&pool->user.list))
+ return;
+ net = dev_net(pool->slow.netdev);
+
+ if (!genl_has_listeners(&netdev_nl_family, net, NETDEV_NLGRP_PAGE_POOL))
+ return;
+
+ genl_info_init_ntf(&info, &netdev_nl_family, cmd);
+
+ ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!ntf)
+ return;
+
+ if (page_pool_nl_fill(ntf, pool, &info)) {
+ nlmsg_free(ntf);
+ return;
+ }
+
+ genlmsg_multicast_netns(&netdev_nl_family, net, ntf,
+ 0, NETDEV_NLGRP_PAGE_POOL, GFP_KERNEL);
+}
+
+int netdev_nl_page_pool_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ u32 id;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_ID))
+ return -EINVAL;
+
+ id = nla_get_uint(info->attrs[NETDEV_A_PAGE_POOL_ID]);
+
+ return netdev_nl_page_pool_get_do(info, id, page_pool_nl_fill);
+}
+
+int netdev_nl_page_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_fill);
+}
+
+int page_pool_list(struct page_pool *pool)
+{
+ static u32 id_alloc_next;
+ int err;
+
+ mutex_lock(&page_pools_lock);
+ err = xa_alloc_cyclic(&page_pools, &pool->user.id, pool, xa_limit_32b,
+ &id_alloc_next, GFP_KERNEL);
+ if (err < 0)
+ goto err_unlock;
+
+ INIT_HLIST_NODE(&pool->user.list);
+ if (pool->slow.netdev) {
+ hlist_add_head(&pool->user.list,
+ &pool->slow.netdev->page_pools);
+ pool->user.napi_id = pool->p.napi ? pool->p.napi->napi_id : 0;
+
+ netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_ADD_NTF);
+ }
+
+ mutex_unlock(&page_pools_lock);
+ return 0;
+
+err_unlock:
+ mutex_unlock(&page_pools_lock);
+ return err;
+}
+
+void page_pool_detached(struct page_pool *pool)
+{
+ mutex_lock(&page_pools_lock);
+ pool->user.detach_time = ktime_get_boottime_seconds();
+ netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
+ mutex_unlock(&page_pools_lock);
+}
+
+void page_pool_unlist(struct page_pool *pool)
+{
+ mutex_lock(&page_pools_lock);
+ netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_DEL_NTF);
+ xa_erase(&page_pools, pool->user.id);
+ if (!hlist_unhashed(&pool->user.list))
+ hlist_del(&pool->user.list);
+ mutex_unlock(&page_pools_lock);
+}
+
+static void page_pool_unreg_netdev_wipe(struct net_device *netdev)
+{
+ struct page_pool *pool;
+ struct hlist_node *n;
+
+ mutex_lock(&page_pools_lock);
+ hlist_for_each_entry_safe(pool, n, &netdev->page_pools, user.list) {
+ hlist_del_init(&pool->user.list);
+ pool->slow.netdev = NET_PTR_POISON;
+ }
+ mutex_unlock(&page_pools_lock);
+}
+
+static void page_pool_unreg_netdev(struct net_device *netdev)
+{
+ struct page_pool *pool, *last;
+ struct net_device *lo;
+
+ lo = dev_net(netdev)->loopback_dev;
+
+ mutex_lock(&page_pools_lock);
+ last = NULL;
+ hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
+ pool->slow.netdev = lo;
+ netdev_nl_page_pool_event(pool,
+ NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
+ last = pool;
+ }
+ if (last)
+ hlist_splice_init(&netdev->page_pools, &last->user.list,
+ &lo->page_pools);
+ mutex_unlock(&page_pools_lock);
+}
+
+static int
+page_pool_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ if (event != NETDEV_UNREGISTER)
+ return NOTIFY_DONE;
+
+ if (hlist_empty(&netdev->page_pools))
+ return NOTIFY_OK;
+
+ if (netdev->ifindex != LOOPBACK_IFINDEX)
+ page_pool_unreg_netdev(netdev);
+ else
+ page_pool_unreg_netdev_wipe(netdev);
+ return NOTIFY_OK;
+}
+
+static struct notifier_block page_pool_netdevice_nb = {
+ .notifier_call = page_pool_netdevice_event,
+};
+
+static int __init page_pool_user_init(void)
+{
+ return register_netdevice_notifier(&page_pool_netdevice_nb);
+}
+
+subsys_initcall(page_pool_user_init);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index f56b8d697014..ea55a758a475 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -200,6 +200,7 @@
pf(VID_RND) /* Random VLAN ID */ \
pf(SVID_RND) /* Random SVLAN ID */ \
pf(NODE) /* Node memory alloc*/ \
+ pf(SHARED) /* Shared SKB */ \
#define pf(flag) flag##_SHIFT,
enum pkt_flags {
@@ -669,19 +670,19 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_puts(seq, " Flags: ");
for (i = 0; i < NR_PKT_FLAGS; i++) {
- if (i == F_FLOW_SEQ)
+ if (i == FLOW_SEQ_SHIFT)
if (!pkt_dev->cflows)
continue;
- if (pkt_dev->flags & (1 << i))
+ if (pkt_dev->flags & (1 << i)) {
seq_printf(seq, "%s ", pkt_flag_names[i]);
- else if (i == F_FLOW_SEQ)
- seq_puts(seq, "FLOW_RND ");
-
#ifdef CONFIG_XFRM
- if (i == F_IPSEC && pkt_dev->spi)
- seq_printf(seq, "spi:%u", pkt_dev->spi);
+ if (i == IPSEC_SHIFT && pkt_dev->spi)
+ seq_printf(seq, "spi:%u ", pkt_dev->spi);
#endif
+ } else if (i == FLOW_SEQ_SHIFT) {
+ seq_puts(seq, "FLOW_RND ");
+ }
}
seq_puts(seq, "\n");
@@ -1198,7 +1199,8 @@ static ssize_t pktgen_if_write(struct file *file,
((pkt_dev->xmit_mode == M_NETIF_RECEIVE) ||
!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
return -ENOTSUPP;
- if (value > 0 && pkt_dev->n_imix_entries > 0)
+ if (value > 0 && (pkt_dev->n_imix_entries > 0 ||
+ !(pkt_dev->flags & F_SHARED)))
return -EINVAL;
i += len;
@@ -1257,6 +1259,10 @@ static ssize_t pktgen_if_write(struct file *file,
((pkt_dev->xmit_mode == M_START_XMIT) &&
(!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))))
return -ENOTSUPP;
+
+ if (value > 1 && !(pkt_dev->flags & F_SHARED))
+ return -EINVAL;
+
pkt_dev->burst = value < 1 ? 1 : value;
sprintf(pg_result, "OK: burst=%u", pkt_dev->burst);
return count;
@@ -1318,9 +1324,10 @@ static ssize_t pktgen_if_write(struct file *file,
return count;
}
if (!strcmp(name, "flag")) {
+ bool disable = false;
__u32 flag;
char f[32];
- bool disable = false;
+ char *end;
memset(f, 0, 32);
len = strn_len(&user_buffer[i], sizeof(f) - 1);
@@ -1332,28 +1339,42 @@ static ssize_t pktgen_if_write(struct file *file,
i += len;
flag = pktgen_read_flag(f, &disable);
-
if (flag) {
- if (disable)
+ if (disable) {
+ /* If "clone_skb", or "burst" parameters are
+ * configured, it means that the skb still
+ * needs to be referenced by the pktgen, so
+ * the skb must be shared.
+ */
+ if (flag == F_SHARED && (pkt_dev->clone_skb ||
+ pkt_dev->burst > 1))
+ return -EINVAL;
pkt_dev->flags &= ~flag;
- else
+ } else {
pkt_dev->flags |= flag;
- } else {
- sprintf(pg_result,
- "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
- f,
- "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
- "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, "
- "MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, "
- "QUEUE_MAP_RND, QUEUE_MAP_CPU, UDPCSUM, "
- "NO_TIMESTAMP, "
-#ifdef CONFIG_XFRM
- "IPSEC, "
-#endif
- "NODE_ALLOC\n");
+ }
+
+ sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
return count;
}
- sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
+
+ /* Unknown flag */
+ end = pkt_dev->result + sizeof(pkt_dev->result);
+ pg_result += sprintf(pg_result,
+ "Flag -:%s:- unknown\n"
+ "Available flags, (prepend ! to un-set flag):\n", f);
+
+ for (int n = 0; n < NR_PKT_FLAGS && pg_result < end; n++) {
+ if (!IS_ENABLED(CONFIG_XFRM) && n == IPSEC_SHIFT)
+ continue;
+ pg_result += snprintf(pg_result, end - pg_result,
+ "%s, ", pkt_flag_names[n]);
+ }
+ if (!WARN_ON_ONCE(pg_result >= end)) {
+ /* Remove the comma and whitespace at the end */
+ *(pg_result - 2) = '\0';
+ }
+
return count;
}
if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) {
@@ -3440,12 +3461,24 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
static void pktgen_xmit(struct pktgen_dev *pkt_dev)
{
- unsigned int burst = READ_ONCE(pkt_dev->burst);
+ bool skb_shared = !!(READ_ONCE(pkt_dev->flags) & F_SHARED);
struct net_device *odev = pkt_dev->odev;
struct netdev_queue *txq;
+ unsigned int burst = 1;
struct sk_buff *skb;
+ int clone_skb = 0;
int ret;
+ /* If 'skb_shared' is false, the read of possible
+ * new values (if any) for 'burst' and 'clone_skb' will be skipped to
+ * prevent some concurrent changes from slipping in. And the stabilized
+ * config will be read in during the next run of pktgen_xmit.
+ */
+ if (skb_shared) {
+ burst = READ_ONCE(pkt_dev->burst);
+ clone_skb = READ_ONCE(pkt_dev->clone_skb);
+ }
+
/* If device is offline, then don't send */
if (unlikely(!netif_running(odev) || !netif_carrier_ok(odev))) {
pktgen_stop_device(pkt_dev);
@@ -3462,7 +3495,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
/* If no skb or clone count exhausted then get new one */
if (!pkt_dev->skb || (pkt_dev->last_ok &&
- ++pkt_dev->clone_count >= pkt_dev->clone_skb)) {
+ ++pkt_dev->clone_count >= clone_skb)) {
/* build a new pkt */
kfree_skb(pkt_dev->skb);
@@ -3483,7 +3516,8 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) {
skb = pkt_dev->skb;
skb->protocol = eth_type_trans(skb, skb->dev);
- refcount_add(burst, &skb->users);
+ if (skb_shared)
+ refcount_add(burst, &skb->users);
local_bh_disable();
do {
ret = netif_receive_skb(skb);
@@ -3491,6 +3525,10 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->errors++;
pkt_dev->sofar++;
pkt_dev->seq_num++;
+ if (unlikely(!skb_shared)) {
+ pkt_dev->skb = NULL;
+ break;
+ }
if (refcount_read(&skb->users) != burst) {
/* skb was queued by rps/rfs or taps,
* so cannot reuse this skb
@@ -3509,9 +3547,14 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
goto out; /* Skips xmit_mode M_START_XMIT */
} else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
local_bh_disable();
- refcount_inc(&pkt_dev->skb->users);
+ if (skb_shared)
+ refcount_inc(&pkt_dev->skb->users);
ret = dev_queue_xmit(pkt_dev->skb);
+
+ if (!skb_shared && dev_xmit_complete(ret))
+ pkt_dev->skb = NULL;
+
switch (ret) {
case NET_XMIT_SUCCESS:
pkt_dev->sofar++;
@@ -3549,11 +3592,15 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->last_ok = 0;
goto unlock;
}
- refcount_add(burst, &pkt_dev->skb->users);
+ if (skb_shared)
+ refcount_add(burst, &pkt_dev->skb->users);
xmit_more:
ret = netdev_start_xmit(pkt_dev->skb, odev, txq, --burst > 0);
+ if (!skb_shared && dev_xmit_complete(ret))
+ pkt_dev->skb = NULL;
+
switch (ret) {
case NETDEV_TX_OK:
pkt_dev->last_ok = 1;
@@ -3575,7 +3622,8 @@ xmit_more:
fallthrough;
case NETDEV_TX_BUSY:
/* Retry it next time */
- refcount_dec(&(pkt_dev->skb->users));
+ if (skb_shared)
+ refcount_dec(&pkt_dev->skb->users);
pkt_dev->last_ok = 0;
}
if (unlikely(burst))
@@ -3588,7 +3636,8 @@ out:
/* If pkt_dev->count is zero, then run forever */
if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
- pktgen_wait_for_skb(pkt_dev);
+ if (pkt_dev->skb)
+ pktgen_wait_for_skb(pkt_dev);
/* Done with this */
pktgen_stop_device(pkt_dev);
@@ -3620,10 +3669,8 @@ static int pktgen_thread_worker(void *arg)
if (unlikely(!pkt_dev && t->control == 0)) {
if (t->net->pktgen_exiting)
break;
- wait_event_interruptible_timeout(t->queue,
- t->control != 0,
- HZ/10);
- try_to_freeze();
+ wait_event_freezable_timeout(t->queue,
+ t->control != 0, HZ / 10);
continue;
}
@@ -3771,6 +3818,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->svlan_id = 0xffff;
pkt_dev->burst = 1;
pkt_dev->node = NUMA_NO_NODE;
+ pkt_dev->flags = F_SHARED; /* SKB shared by default */
err = pktgen_setup_dev(t->net, pkt_dev, ifname);
if (err)
@@ -3982,8 +4030,7 @@ static void __net_exit pg_net_exit(struct net *net)
list_for_each_safe(q, n, &list) {
t = list_entry(q, struct pktgen_thread, th_list);
list_del(&t->th_list);
- kthread_stop(t->tsk);
- put_task_struct(t->tsk);
+ kthread_stop_put(t->tsk);
kfree(t);
}
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index f35c2e998406..63de5c635842 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -33,9 +33,6 @@
void reqsk_queue_alloc(struct request_sock_queue *queue)
{
- spin_lock_init(&queue->rskq_lock);
-
- spin_lock_init(&queue->fastopenq.lock);
queue->fastopenq.rskq_rst_head = NULL;
queue->fastopenq.rskq_rst_tail = NULL;
queue->fastopenq.qlen = 0;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4a2ec33bfb51..f6f29eb03ec2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -57,6 +57,7 @@
#if IS_ENABLED(CONFIG_IPV6)
#include <net/addrconf.h>
#endif
+#include <linux/dpll.h>
#include "dev.h"
@@ -341,8 +342,7 @@ int rtnl_unregister(int protocol, int msgtype)
return -ENOENT;
}
- link = rtnl_dereference(tab[msgindex]);
- RCU_INIT_POINTER(tab[msgindex], NULL);
+ link = rcu_replace_pointer_rtnl(tab[msgindex], NULL);
rtnl_unlock();
kfree_rcu(link, rcu);
@@ -367,18 +367,13 @@ void rtnl_unregister_all(int protocol)
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
rtnl_lock();
- tab = rtnl_dereference(rtnl_msg_handlers[protocol]);
+ tab = rcu_replace_pointer_rtnl(rtnl_msg_handlers[protocol], NULL);
if (!tab) {
rtnl_unlock();
return;
}
- RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL);
for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) {
- link = rtnl_dereference(tab[msgindex]);
- if (!link)
- continue;
-
- RCU_INIT_POINTER(tab[msgindex], NULL);
+ link = rcu_replace_pointer_rtnl(tab[msgindex], NULL);
kfree_rcu(link, rcu);
}
rtnl_unlock();
@@ -1055,6 +1050,15 @@ static size_t rtnl_devlink_port_size(const struct net_device *dev)
return size;
}
+static size_t rtnl_dpll_pin_size(const struct net_device *dev)
+{
+ size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */
+
+ size += dpll_msg_pin_handle_size(netdev_dpll_pin(dev));
+
+ return size;
+}
+
static noinline size_t if_nlmsg_size(const struct net_device *dev,
u32 ext_filter_mask)
{
@@ -1111,6 +1115,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_prop_list_size(dev)
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */
+ rtnl_devlink_port_size(dev)
+ + rtnl_dpll_pin_size(dev)
+ 0;
}
@@ -1774,6 +1779,28 @@ nest_cancel:
return ret;
}
+static int rtnl_fill_dpll_pin(struct sk_buff *skb,
+ const struct net_device *dev)
+{
+ struct nlattr *dpll_pin_nest;
+ int ret;
+
+ dpll_pin_nest = nla_nest_start(skb, IFLA_DPLL_PIN);
+ if (!dpll_pin_nest)
+ return -EMSGSIZE;
+
+ ret = dpll_msg_add_pin_handle(skb, netdev_dpll_pin(dev));
+ if (ret < 0)
+ goto nest_cancel;
+
+ nla_nest_end(skb, dpll_pin_nest);
+ return 0;
+
+nest_cancel:
+ nla_nest_cancel(skb, dpll_pin_nest);
+ return ret;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb,
struct net_device *dev, struct net *src_net,
int type, u32 pid, u32 seq, u32 change,
@@ -1916,6 +1943,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
if (rtnl_fill_devlink_port(skb, dev))
goto nla_put_failure;
+ if (rtnl_fill_dpll_pin(skb, dev))
+ goto nla_put_failure;
+
nlmsg_end(skb, nlh);
return 0;
@@ -2869,13 +2899,6 @@ static int do_setlink(const struct sk_buff *skb,
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
}
- if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
- if (err)
- goto errout;
- status |= DO_SETLINK_MODIFIED;
- }
-
if (ifm->ifi_flags || ifm->ifi_change) {
err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm),
extack);
@@ -2883,6 +2906,13 @@ static int do_setlink(const struct sk_buff *skb,
goto errout;
}
+ if (tb[IFLA_MASTER]) {
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
+ if (err)
+ goto errout;
+ status |= DO_SETLINK_MODIFIED;
+ }
+
if (tb[IFLA_CARRIER]) {
err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER]));
if (err)
@@ -3813,10 +3843,18 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
err = -ENOBUFS;
- nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
+ nskb = nlmsg_new_large(if_nlmsg_size(dev, ext_filter_mask));
if (nskb == NULL)
goto out;
+ /* Synchronize the carrier state so we don't report a state
+ * that we're not actually going to honour immediately; if
+ * the driver just did a carrier off->on transition, we can
+ * only TX if link watch work has run, but without this we'd
+ * already report carrier on, even if it doesn't work yet.
+ */
+ linkwatch_sync_dev(dev);
+
err = rtnl_fill_ifinfo(nskb, dev, net,
RTM_NEWLINK, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, 0, 0, ext_filter_mask,
@@ -4331,13 +4369,6 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
}
EXPORT_SYMBOL(ndo_dflt_fdb_del);
-static const struct nla_policy fdb_del_bulk_policy[NDA_MAX + 1] = {
- [NDA_VLAN] = { .type = NLA_U16 },
- [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
- [NDA_NDM_STATE_MASK] = { .type = NLA_U16 },
- [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 },
-};
-
static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -4358,8 +4389,10 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
NULL, extack);
} else {
- err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX,
- fdb_del_bulk_policy, extack);
+ /* For bulk delete, the drivers will parse the message with
+ * policy.
+ */
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
}
if (err < 0)
return err;
@@ -4382,6 +4415,10 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
addr = nla_data(tb[NDA_LLADDR]);
+
+ err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
+ if (err)
+ return err;
}
if (dev->type != ARPHRD_ETHER) {
@@ -4389,10 +4426,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
- err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
- if (err)
- return err;
-
err = -EOPNOTSUPP;
/* Support fdb on master device the net/bridge default case */
@@ -4406,8 +4439,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack);
} else {
if (ops->ndo_fdb_del_bulk)
- err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid,
- extack);
+ err = ops->ndo_fdb_del_bulk(nlh, dev, extack);
}
if (err)
@@ -4428,8 +4460,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
/* in case err was cleared by NTF_MASTER call */
err = -EOPNOTSUPP;
if (ops->ndo_fdb_del_bulk)
- err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid,
- extack);
+ err = ops->ndo_fdb_del_bulk(nlh, dev, extack);
}
if (!err) {
@@ -5503,13 +5534,11 @@ static unsigned int
rtnl_offload_xstats_get_size_hw_s_info_one(const struct net_device *dev,
enum netdev_offload_xstats_type type)
{
- bool enabled = netdev_offload_xstats_enabled(dev, type);
-
return nla_total_size(0) +
/* IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST */
nla_total_size(sizeof(u8)) +
/* IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED */
- (enabled ? nla_total_size(sizeof(u8)) : 0) +
+ nla_total_size(sizeof(u8)) +
0;
}
@@ -6192,6 +6221,93 @@ out:
return skb->len;
}
+static int rtnl_validate_mdb_entry_get(const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct br_mdb_entry *entry = nla_data(attr);
+
+ if (nla_len(attr) != sizeof(struct br_mdb_entry)) {
+ NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length");
+ return -EINVAL;
+ }
+
+ if (entry->ifindex) {
+ NL_SET_ERR_MSG(extack, "Entry ifindex cannot be specified");
+ return -EINVAL;
+ }
+
+ if (entry->state) {
+ NL_SET_ERR_MSG(extack, "Entry state cannot be specified");
+ return -EINVAL;
+ }
+
+ if (entry->flags) {
+ NL_SET_ERR_MSG(extack, "Entry flags cannot be specified");
+ return -EINVAL;
+ }
+
+ if (entry->vid >= VLAN_VID_MASK) {
+ NL_SET_ERR_MSG(extack, "Invalid entry VLAN id");
+ return -EINVAL;
+ }
+
+ if (entry->addr.proto != htons(ETH_P_IP) &&
+ entry->addr.proto != htons(ETH_P_IPV6) &&
+ entry->addr.proto != 0) {
+ NL_SET_ERR_MSG(extack, "Unknown entry protocol");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct nla_policy mdba_get_policy[MDBA_GET_ENTRY_MAX + 1] = {
+ [MDBA_GET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
+ rtnl_validate_mdb_entry_get,
+ sizeof(struct br_mdb_entry)),
+ [MDBA_GET_ENTRY_ATTRS] = { .type = NLA_NESTED },
+};
+
+static int rtnl_mdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[MDBA_GET_ENTRY_MAX + 1];
+ struct net *net = sock_net(in_skb->sk);
+ struct br_port_msg *bpm;
+ struct net_device *dev;
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(struct br_port_msg), tb,
+ MDBA_GET_ENTRY_MAX, mdba_get_policy, extack);
+ if (err)
+ return err;
+
+ bpm = nlmsg_data(nlh);
+ if (!bpm->ifindex) {
+ NL_SET_ERR_MSG(extack, "Invalid ifindex");
+ return -EINVAL;
+ }
+
+ dev = __dev_get_by_index(net, bpm->ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Device doesn't exist");
+ return -ENODEV;
+ }
+
+ if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_GET_ENTRY)) {
+ NL_SET_ERR_MSG(extack, "Missing MDBA_GET_ENTRY attribute");
+ return -EINVAL;
+ }
+
+ if (!dev->netdev_ops->ndo_mdb_get) {
+ NL_SET_ERR_MSG(extack, "Device does not support MDB operations");
+ return -EOPNOTSUPP;
+ }
+
+ return dev->netdev_ops->ndo_mdb_get(dev, tb, NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, extack);
+}
+
static int rtnl_validate_mdb_entry(const struct nlattr *attr,
struct netlink_ext_ack *extack)
{
@@ -6294,17 +6410,64 @@ static int rtnl_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
return dev->netdev_ops->ndo_mdb_add(dev, tb, nlh->nlmsg_flags, extack);
}
+static int rtnl_validate_mdb_entry_del_bulk(const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct br_mdb_entry *entry = nla_data(attr);
+ struct br_mdb_entry zero_entry = {};
+
+ if (nla_len(attr) != sizeof(struct br_mdb_entry)) {
+ NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length");
+ return -EINVAL;
+ }
+
+ if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) {
+ NL_SET_ERR_MSG(extack, "Unknown entry state");
+ return -EINVAL;
+ }
+
+ if (entry->flags) {
+ NL_SET_ERR_MSG(extack, "Entry flags cannot be set");
+ return -EINVAL;
+ }
+
+ if (entry->vid >= VLAN_N_VID - 1) {
+ NL_SET_ERR_MSG(extack, "Invalid entry VLAN id");
+ return -EINVAL;
+ }
+
+ if (memcmp(&entry->addr, &zero_entry.addr, sizeof(entry->addr))) {
+ NL_SET_ERR_MSG(extack, "Entry address cannot be set");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct nla_policy mdba_del_bulk_policy[MDBA_SET_ENTRY_MAX + 1] = {
+ [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
+ rtnl_validate_mdb_entry_del_bulk,
+ sizeof(struct br_mdb_entry)),
+ [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED },
+};
+
static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK);
struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1];
struct net *net = sock_net(skb->sk);
struct br_port_msg *bpm;
struct net_device *dev;
int err;
- err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
- MDBA_SET_ENTRY_MAX, mdba_policy, extack);
+ if (!del_bulk)
+ err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
+ MDBA_SET_ENTRY_MAX, mdba_policy,
+ extack);
+ else
+ err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX,
+ mdba_del_bulk_policy, extack);
if (err)
return err;
@@ -6325,6 +6488,14 @@ static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
+ if (del_bulk) {
+ if (!dev->netdev_ops->ndo_mdb_del_bulk) {
+ NL_SET_ERR_MSG(extack, "Device does not support MDB bulk deletion");
+ return -EOPNOTSUPP;
+ }
+ return dev->netdev_ops->ndo_mdb_del_bulk(dev, tb, extack);
+ }
+
if (!dev->netdev_ops->ndo_mdb_del) {
NL_SET_ERR_MSG(extack, "Device does not support MDB operations");
return -EOPNOTSUPP;
@@ -6568,7 +6739,8 @@ void __init rtnetlink_init(void)
0);
rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, rtnl_mdb_dump, 0);
+ rtnl_register(PF_BRIDGE, RTM_GETMDB, rtnl_mdb_get, rtnl_mdb_dump, 0);
rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, 0);
+ rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL,
+ RTNL_FLAG_BULK_DEL_SUPPORTED);
}
diff --git a/net/core/scm.c b/net/core/scm.c
index 880027ecf516..d0e0852a24d5 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -26,6 +26,7 @@
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/errqueue.h>
+#include <linux/io_uring.h>
#include <linux/uaccess.h>
@@ -103,6 +104,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
if (fd < 0 || !(file = fget_raw(fd)))
return -EBADF;
+ /* don't allow io_uring files */
+ if (io_is_uring_fops(file)) {
+ fput(file);
+ return -EINVAL;
+ }
*fpp++ = file;
fpl->count++;
}
@@ -319,7 +325,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
}
for (i = 0; i < fdmax; i++) {
- err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
+ err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags);
if (err < 0)
break;
}
diff --git a/net/core/selftests.c b/net/core/selftests.c
index acb1ee97bbd3..8f801e6e3b91 100644
--- a/net/core/selftests.c
+++ b/net/core/selftests.c
@@ -397,16 +397,14 @@ EXPORT_SYMBOL_GPL(net_selftest_get_count);
void net_selftest_get_strings(u8 *data)
{
- u8 *p = data;
int i;
- for (i = 0; i < net_selftest_get_count(); i++) {
- snprintf(p, ETH_GSTRING_LEN, "%2d. %s", i + 1,
- net_selftests[i].name);
- p += ETH_GSTRING_LEN;
- }
+ for (i = 0; i < net_selftest_get_count(); i++)
+ ethtool_sprintf(&data, "%2d. %s", i + 1,
+ net_selftests[i].name);
}
EXPORT_SYMBOL_GPL(net_selftest_get_strings);
+MODULE_DESCRIPTION("Common library for generic PHY ethtool selftests");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Oleksij Rempel <o.rempel@pengutronix.de>");
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 45707059082f..edbbef563d4d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -62,6 +62,7 @@
#include <linux/if_vlan.h>
#include <linux/mpls.h>
#include <linux/kcov.h>
+#include <linux/iov_iter.h>
#include <net/protocol.h>
#include <net/dst.h>
@@ -336,7 +337,7 @@ static struct sk_buff *napi_skb_cache_get(void)
}
skb = nc->skb_cache[--nc->skb_count];
- kasan_unpoison_object_data(skbuff_cache, skb);
+ kasan_mempool_unpoison_object(skb, kmem_cache_size(skbuff_cache));
return skb;
}
@@ -550,7 +551,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
bool *pfmemalloc)
{
bool ret_pfmemalloc = false;
- unsigned int obj_size;
+ size_t obj_size;
void *obj;
obj_size = SKB_HEAD_ALIGN(*size);
@@ -567,7 +568,13 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
goto out;
}
- *size = obj_size = kmalloc_size_roundup(obj_size);
+
+ obj_size = kmalloc_size_roundup(obj_size);
+ /* The following cast might truncate high-order bits of obj_size, this
+ * is harmless because kmalloc(obj_size >= 2^32) will fail anyway.
+ */
+ *size = (unsigned int)obj_size;
+
/*
* Try a regular allocation, when that fails and we're not entitled
* to the reserves, fail.
@@ -841,6 +848,8 @@ EXPORT_SYMBOL(__napi_alloc_skb);
void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
int size, unsigned int truesize)
{
+ DEBUG_NET_WARN_ON_ONCE(size > truesize);
+
skb_fill_page_desc(skb, i, page, off, size);
skb->len += size;
skb->data_len += size;
@@ -853,6 +862,8 @@ void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
{
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ DEBUG_NET_WARN_ON_ONCE(size > truesize);
+
skb_frag_size_add(frag, size);
skb->len += size;
skb->data_len += size;
@@ -879,6 +890,11 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
+static bool is_pp_page(struct page *page)
+{
+ return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
+}
+
#if IS_ENABLED(CONFIG_PAGE_POOL)
bool napi_pp_put_page(struct page *page, bool napi_safe)
{
@@ -894,7 +910,7 @@ bool napi_pp_put_page(struct page *page, bool napi_safe)
* and page_is_pfmemalloc() is checked in __page_pool_put_page()
* to avoid recycling the pfmemalloc page.
*/
- if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
+ if (unlikely(!is_pp_page(page)))
return false;
pp = page->pp;
@@ -931,6 +947,37 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
return napi_pp_put_page(virt_to_page(data), napi_safe);
}
+/**
+ * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb
+ * @skb: page pool aware skb
+ *
+ * Increase the fragment reference count (pp_ref_count) of a skb. This is
+ * intended to gain fragment references only for page pool aware skbs,
+ * i.e. when skb->pp_recycle is true, and not for fragments in a
+ * non-pp-recycling skb. It has a fallback to increase references on normal
+ * pages, as page pool aware skbs may also have normal page fragments.
+ */
+static int skb_pp_frag_ref(struct sk_buff *skb)
+{
+ struct skb_shared_info *shinfo;
+ struct page *head_page;
+ int i;
+
+ if (!skb->pp_recycle)
+ return -EINVAL;
+
+ shinfo = skb_shinfo(skb);
+
+ for (i = 0; i < shinfo->nr_frags; i++) {
+ head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
+ if (likely(is_pp_page(head_page)))
+ page_pool_ref_page(head_page);
+ else
+ page_ref_inc(head_page);
+ }
+ return 0;
+}
+
static void skb_kfree_head(void *head, unsigned int end_offset)
{
if (end_offset == SKB_SMALL_HEAD_HEADROOM)
@@ -1298,13 +1345,15 @@ static void napi_skb_cache_put(struct sk_buff *skb)
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
u32 i;
- kasan_poison_object_data(skbuff_cache, skb);
+ if (!kasan_mempool_poison_object(skb))
+ return;
+
nc->skb_cache[nc->skb_count++] = skb;
if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++)
- kasan_unpoison_object_data(skbuff_cache,
- nc->skb_cache[i]);
+ kasan_mempool_unpoison_object(nc->skb_cache[i],
+ kmem_cache_size(skbuff_cache));
kmem_cache_free_bulk(skbuff_cache, NAPI_SKB_CACHE_HALF,
nc->skb_cache + NAPI_SKB_CACHE_HALF);
@@ -3712,10 +3761,19 @@ EXPORT_SYMBOL(skb_dequeue_tail);
void skb_queue_purge_reason(struct sk_buff_head *list,
enum skb_drop_reason reason)
{
- struct sk_buff *skb;
+ struct sk_buff_head tmp;
+ unsigned long flags;
+
+ if (skb_queue_empty_lockless(list))
+ return;
+
+ __skb_queue_head_init(&tmp);
- while ((skb = skb_dequeue(list)) != NULL)
- kfree_skb_reason(skb, reason);
+ spin_lock_irqsave(&list->lock, flags);
+ skb_queue_splice_init(list, &tmp);
+ spin_unlock_irqrestore(&list->lock, flags);
+
+ __skb_queue_purge_reason(&tmp, reason);
}
EXPORT_SYMBOL(skb_queue_purge_reason);
@@ -4248,6 +4306,7 @@ static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
unsigned int to, struct ts_config *config)
{
+ unsigned int patlen = config->ops->get_pattern_len(config);
struct ts_state state;
unsigned int ret;
@@ -4259,7 +4318,7 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state));
ret = textsearch_find(config, &state);
- return (ret <= to - from ? ret : UINT_MAX);
+ return (ret + patlen <= to - from ? ret : UINT_MAX);
}
EXPORT_SYMBOL(skb_find_text);
@@ -4423,21 +4482,20 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
- skb_frag_t *frag = skb_shinfo(head_skb)->frags;
unsigned int mss = skb_shinfo(head_skb)->gso_size;
unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
- struct sk_buff *frag_skb = head_skb;
unsigned int offset = doffset;
unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
unsigned int partial_segs = 0;
unsigned int headroom;
unsigned int len = head_skb->len;
+ struct sk_buff *frag_skb;
+ skb_frag_t *frag;
__be16 proto;
bool csum, sg;
- int nfrags = skb_shinfo(head_skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
- int pos;
+ int nfrags, pos;
if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
@@ -4502,8 +4560,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
/* GSO partial only requires that we trim off any excess that
* doesn't fit into an MSS sized block, so take care of that
* now.
+ * Cap len to not accidentally hit GSO_BY_FRAGS.
*/
- partial_segs = len / mss;
+ partial_segs = min(len, GSO_BY_FRAGS - 1) / mss;
if (partial_segs > 1)
mss *= partial_segs;
else
@@ -4514,6 +4573,13 @@ normal:
headroom = skb_headroom(head_skb);
pos = skb_headlen(head_skb);
+ if (skb_orphan_frags(head_skb, GFP_ATOMIC))
+ return ERR_PTR(-ENOMEM);
+
+ nfrags = skb_shinfo(head_skb)->nr_frags;
+ frag = skb_shinfo(head_skb)->frags;
+ frag_skb = head_skb;
+
do {
struct sk_buff *nskb;
skb_frag_t *nskb_frag;
@@ -4534,6 +4600,10 @@ normal:
(skb_headlen(list_skb) == len || sg)) {
BUG_ON(skb_headlen(list_skb) > len);
+ nskb = skb_clone(list_skb, GFP_ATOMIC);
+ if (unlikely(!nskb))
+ goto err;
+
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
@@ -4552,12 +4622,8 @@ normal:
frag++;
}
- nskb = skb_clone(list_skb, GFP_ATOMIC);
list_skb = list_skb->next;
- if (unlikely(!nskb))
- goto err;
-
if (unlikely(pskb_trim(nskb, len))) {
kfree_skb(nskb);
goto err;
@@ -4633,12 +4699,16 @@ normal:
skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
SKBFL_SHARED_FRAG;
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
- skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+ if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
goto err;
while (pos < offset + len) {
if (i >= nfrags) {
+ if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
+ skb_zerocopy_clone(nskb, list_skb,
+ GFP_ATOMIC))
+ goto err;
+
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
@@ -4652,10 +4722,6 @@ normal:
i--;
frag--;
}
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
- skb_zerocopy_clone(nskb, frag_skb,
- GFP_ATOMIC))
- goto err;
list_skb = list_skb->next;
}
@@ -4797,7 +4863,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
static void skb_extensions_init(void)
{
BUILD_BUG_ON(SKB_EXT_NUM >= 8);
+#if !IS_ENABLED(CONFIG_KCOV_INSTRUMENT_ALL)
BUILD_BUG_ON(skb_ext_total_length() > 255);
+#endif
skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache",
SKB_EXT_ALIGN_VALUE * skb_ext_total_length(),
@@ -5137,6 +5205,9 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
bool icmp_next = false;
unsigned long flags;
+ if (skb_queue_empty_lockless(q))
+ return NULL;
+
spin_lock_irqsave(&q->lock, flags);
skb = __skb_dequeue(q);
if (skb && (skb_next = skb_peek(q))) {
@@ -5207,7 +5278,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
serr->ee.ee_info = tstype;
serr->opt_stats = opt_stats;
serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
+ if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk_is_tcp(sk))
serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
@@ -5263,21 +5334,23 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
{
struct sk_buff *skb;
bool tsonly, opt_stats = false;
+ u32 tsflags;
if (!sk)
return;
- if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+ tsflags = READ_ONCE(sk->sk_tsflags);
+ if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
return;
- tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
+ tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
if (!skb_may_tx_timestamp(sk, tsonly))
return;
if (tsonly) {
#ifdef CONFIG_INET
- if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
+ if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
sk_is_tcp(sk)) {
skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
ack_skb);
@@ -5732,17 +5805,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
return false;
/* In general, avoid mixing page_pool and non-page_pool allocated
- * pages within the same SKB. Additionally avoid dealing with clones
- * with page_pool pages, in case the SKB is using page_pool fragment
- * references (PP_FLAG_PAGE_FRAG). Since we only take full page
- * references for cloned SKBs at the moment that would result in
- * inconsistent reference counts.
- * In theory we could take full references if @from is cloned and
- * !@to->pp_recycle but its tricky (due to potential race with
- * the clone disappearing) and rare, so not worth dealing with.
+ * pages within the same SKB. In theory we could take full
+ * references if @from is cloned and !@to->pp_recycle but its
+ * tricky (due to potential race with the clone disappearing) and
+ * rare, so not worth dealing with.
*/
- if (to->pp_recycle != from->pp_recycle ||
- (from->pp_recycle && skb_cloned(from)))
+ if (to->pp_recycle != from->pp_recycle)
return false;
if (len <= skb_tailroom(to)) {
@@ -5799,8 +5867,10 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
/* if the skb is not cloned this does nothing
* since we set nr_frags to 0.
*/
- for (i = 0; i < from_shinfo->nr_frags; i++)
- __skb_frag_ref(&from_shinfo->frags[i]);
+ if (skb_pp_frag_ref(from)) {
+ for (i = 0; i < from_shinfo->nr_frags; i++)
+ __skb_frag_ref(&from_shinfo->frags[i]);
+ }
to->truesize += delta;
to->len += len;
@@ -5927,6 +5997,31 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len)
}
EXPORT_SYMBOL(skb_ensure_writable);
+int skb_ensure_writable_head_tail(struct sk_buff *skb, struct net_device *dev)
+{
+ int needed_headroom = dev->needed_headroom;
+ int needed_tailroom = dev->needed_tailroom;
+
+ /* For tail taggers, we need to pad short frames ourselves, to ensure
+ * that the tail tag does not fail at its role of being at the end of
+ * the packet, once the conduit interface pads the frame. Account for
+ * that pad length here, and pad later.
+ */
+ if (unlikely(needed_tailroom && skb->len < ETH_ZLEN))
+ needed_tailroom += ETH_ZLEN - skb->len;
+ /* skb_headroom() returns unsigned int... */
+ needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0);
+ needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0);
+
+ if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb)))
+ /* No reallocation needed, yay! */
+ return 0;
+
+ return pskb_expand_head(skb, needed_headroom, needed_tailroom,
+ GFP_ATOMIC);
+}
+EXPORT_SYMBOL(skb_ensure_writable_head_tail);
+
/* remove VLAN header from packet and update csum accordingly.
* expects a non skb_vlan_tag_present skb with a vlan tag payload
*/
@@ -6917,3 +7012,42 @@ out:
return spliced ?: ret;
}
EXPORT_SYMBOL(skb_splice_from_iter);
+
+static __always_inline
+size_t memcpy_from_iter_csum(void *iter_from, size_t progress,
+ size_t len, void *to, void *priv2)
+{
+ __wsum *csum = priv2;
+ __wsum next = csum_partial_copy_nocheck(iter_from, to + progress, len);
+
+ *csum = csum_block_add(*csum, next, progress);
+ return 0;
+}
+
+static __always_inline
+size_t copy_from_user_iter_csum(void __user *iter_from, size_t progress,
+ size_t len, void *to, void *priv2)
+{
+ __wsum next, *csum = priv2;
+
+ next = csum_and_copy_from_user(iter_from, to + progress, len);
+ *csum = csum_block_add(*csum, next, progress);
+ return next ? 0 : len;
+}
+
+bool csum_and_copy_from_iter_full(void *addr, size_t bytes,
+ __wsum *csum, struct iov_iter *i)
+{
+ size_t copied;
+
+ if (WARN_ON_ONCE(!i->data_source))
+ return false;
+ copied = iterate_and_advance2(i, bytes, addr, csum,
+ copy_from_user_iter_csum,
+ memcpy_from_iter_csum);
+ if (likely(copied == bytes))
+ return true;
+ iov_iter_revert(i, copied);
+ return false;
+}
+EXPORT_SYMBOL(csum_and_copy_from_iter_full);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index a0659fc29bcc..93ecfceac1bc 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -612,12 +612,18 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
+ int err = 0;
+
if (!ingress) {
if (!sock_writeable(psock->sk))
return -EAGAIN;
return skb_send_sock(psock->sk, skb, off, len);
}
- return sk_psock_skb_ingress(psock, skb, off, len);
+ skb_get(skb);
+ err = sk_psock_skb_ingress(psock, skb, off, len);
+ if (err < 0)
+ kfree_skb(skb);
+ return err;
}
static void sk_psock_skb_state(struct sk_psock *psock,
@@ -685,9 +691,7 @@ static void sk_psock_backlog(struct work_struct *work)
} while (len);
skb = skb_dequeue(&psock->ingress_skb);
- if (!ingress) {
- kfree_skb(skb);
- }
+ kfree_skb(skb);
}
end:
mutex_unlock(&psock->work_mutex);
@@ -822,6 +826,8 @@ static void sk_psock_destroy(struct work_struct *work)
if (psock->sk_redir)
sock_put(psock->sk_redir);
+ if (psock->sk_pair)
+ sock_put(psock->sk_pair);
sock_put(psock->sk);
kfree(psock);
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 666a17cab4f5..0a7f46c37f0c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -107,6 +107,7 @@
#include <linux/interrupt.h>
#include <linux/poll.h>
#include <linux/tcp.h>
+#include <linux/udp.h>
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/user_namespace.h>
@@ -283,10 +284,6 @@ EXPORT_SYMBOL(sysctl_rmem_max);
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
-/* Maximal space eaten by iovec or ancillary data plus some space */
-int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
-EXPORT_SYMBOL(sysctl_optmem_max);
-
int sysctl_tstamp_allow_data __read_mostly = 1;
DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
@@ -600,7 +597,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
dst, cookie) == NULL) {
sk_tx_queue_clear(sk);
- sk->sk_dst_pending_confirm = 0;
+ WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
dst_release(dst);
return NULL;
@@ -759,18 +756,19 @@ out:
return ret;
}
-bool sk_mc_loop(struct sock *sk)
+bool sk_mc_loop(const struct sock *sk)
{
if (dev_recursion_level())
return false;
if (!sk)
return true;
- switch (sk->sk_family) {
+ /* IPV6_ADDRFORM can change sk->sk_family under us. */
+ switch (READ_ONCE(sk->sk_family)) {
case AF_INET:
return inet_test_bit(MC_LOOP, sk);
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- return inet6_sk(sk)->mc_loop;
+ return inet6_test_bit(MC6_LOOP, sk);
#endif
}
WARN_ON_ONCE(1);
@@ -805,9 +803,7 @@ EXPORT_SYMBOL(sock_no_linger);
void sock_set_priority(struct sock *sk, u32 priority)
{
- lock_sock(sk);
WRITE_ONCE(sk->sk_priority, priority);
- release_sock(sk);
}
EXPORT_SYMBOL(sock_set_priority);
@@ -893,7 +889,7 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
if (!match)
return -EINVAL;
- sk->sk_bind_phc = phc_index;
+ WRITE_ONCE(sk->sk_bind_phc, phc_index);
return 0;
}
@@ -936,7 +932,7 @@ int sock_set_timestamping(struct sock *sk, int optname,
return ret;
}
- sk->sk_tsflags = val;
+ WRITE_ONCE(sk->sk_tsflags, val);
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
@@ -1044,7 +1040,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
return -ENOMEM;
}
- sk->sk_forward_alloc += pages << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
WRITE_ONCE(sk->sk_reserved_mem,
sk->sk_reserved_mem + (pages << PAGE_SHIFT));
@@ -1117,6 +1113,83 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
valbool = val ? 1 : 0;
+ /* handle options which do not require locking the socket. */
+ switch (optname) {
+ case SO_PRIORITY:
+ if ((val >= 0 && val <= 6) ||
+ sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
+ sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+ sock_set_priority(sk, val);
+ return 0;
+ }
+ return -EPERM;
+ case SO_PASSSEC:
+ assign_bit(SOCK_PASSSEC, &sock->flags, valbool);
+ return 0;
+ case SO_PASSCRED:
+ assign_bit(SOCK_PASSCRED, &sock->flags, valbool);
+ return 0;
+ case SO_PASSPIDFD:
+ assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool);
+ return 0;
+ case SO_TYPE:
+ case SO_PROTOCOL:
+ case SO_DOMAIN:
+ case SO_ERROR:
+ return -ENOPROTOOPT;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ case SO_BUSY_POLL:
+ if (val < 0)
+ return -EINVAL;
+ WRITE_ONCE(sk->sk_ll_usec, val);
+ return 0;
+ case SO_PREFER_BUSY_POLL:
+ if (valbool && !sockopt_capable(CAP_NET_ADMIN))
+ return -EPERM;
+ WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
+ return 0;
+ case SO_BUSY_POLL_BUDGET:
+ if (val > READ_ONCE(sk->sk_busy_poll_budget) &&
+ !sockopt_capable(CAP_NET_ADMIN))
+ return -EPERM;
+ if (val < 0 || val > U16_MAX)
+ return -EINVAL;
+ WRITE_ONCE(sk->sk_busy_poll_budget, val);
+ return 0;
+#endif
+ case SO_MAX_PACING_RATE:
+ {
+ unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
+ unsigned long pacing_rate;
+
+ if (sizeof(ulval) != sizeof(val) &&
+ optlen >= sizeof(ulval) &&
+ copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
+ return -EFAULT;
+ }
+ if (ulval != ~0UL)
+ cmpxchg(&sk->sk_pacing_status,
+ SK_PACING_NONE,
+ SK_PACING_NEEDED);
+ /* Pairs with READ_ONCE() from sk_getsockopt() */
+ WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
+ pacing_rate = READ_ONCE(sk->sk_pacing_rate);
+ if (ulval < pacing_rate)
+ WRITE_ONCE(sk->sk_pacing_rate, ulval);
+ return 0;
+ }
+ case SO_TXREHASH:
+ if (val < -1 || val > 1)
+ return -EINVAL;
+ if ((u8)val == SOCK_TXREHASH_DEFAULT)
+ val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
+ /* Paired with READ_ONCE() in tcp_rtx_synack()
+ * and sk_getsockopt().
+ */
+ WRITE_ONCE(sk->sk_txrehash, (u8)val);
+ return 0;
+ }
+
sockopt_lock_sock(sk);
switch (optname) {
@@ -1132,12 +1205,6 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
case SO_REUSEPORT:
sk->sk_reuseport = valbool;
break;
- case SO_TYPE:
- case SO_PROTOCOL:
- case SO_DOMAIN:
- case SO_ERROR:
- ret = -ENOPROTOOPT;
- break;
case SO_DONTROUTE:
sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
sk_dst_reset(sk);
@@ -1212,15 +1279,6 @@ set_sndbuf:
sk->sk_no_check_tx = valbool;
break;
- case SO_PRIORITY:
- if ((val >= 0 && val <= 6) ||
- sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
- sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
- WRITE_ONCE(sk->sk_priority, val);
- else
- ret = -EPERM;
- break;
-
case SO_LINGER:
if (optlen < sizeof(ling)) {
ret = -EINVAL; /* 1003.1g */
@@ -1246,14 +1304,6 @@ set_sndbuf:
case SO_BSDCOMPAT:
break;
- case SO_PASSCRED:
- assign_bit(SOCK_PASSCRED, &sock->flags, valbool);
- break;
-
- case SO_PASSPIDFD:
- assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool);
- break;
-
case SO_TIMESTAMP_OLD:
case SO_TIMESTAMP_NEW:
case SO_TIMESTAMPNS_OLD:
@@ -1359,9 +1409,6 @@ set_sndbuf:
sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
break;
- case SO_PASSSEC:
- assign_bit(SOCK_PASSSEC, &sock->flags, valbool);
- break;
case SO_MARK:
if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
@@ -1403,50 +1450,7 @@ set_sndbuf:
sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
break;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- case SO_BUSY_POLL:
- if (val < 0)
- ret = -EINVAL;
- else
- WRITE_ONCE(sk->sk_ll_usec, val);
- break;
- case SO_PREFER_BUSY_POLL:
- if (valbool && !sockopt_capable(CAP_NET_ADMIN))
- ret = -EPERM;
- else
- WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
- break;
- case SO_BUSY_POLL_BUDGET:
- if (val > READ_ONCE(sk->sk_busy_poll_budget) && !sockopt_capable(CAP_NET_ADMIN)) {
- ret = -EPERM;
- } else {
- if (val < 0 || val > U16_MAX)
- ret = -EINVAL;
- else
- WRITE_ONCE(sk->sk_busy_poll_budget, val);
- }
- break;
-#endif
-
- case SO_MAX_PACING_RATE:
- {
- unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
- if (sizeof(ulval) != sizeof(val) &&
- optlen >= sizeof(ulval) &&
- copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
- ret = -EFAULT;
- break;
- }
- if (ulval != ~0UL)
- cmpxchg(&sk->sk_pacing_status,
- SK_PACING_NONE,
- SK_PACING_NEEDED);
- /* Pairs with READ_ONCE() from sk_getsockopt() */
- WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
- sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
- break;
- }
case SO_INCOMING_CPU:
reuseport_update_incoming_cpu(sk, val);
break;
@@ -1531,19 +1535,6 @@ set_sndbuf:
break;
}
- case SO_TXREHASH:
- if (val < -1 || val > 1) {
- ret = -EINVAL;
- break;
- }
- if ((u8)val == SOCK_TXREHASH_DEFAULT)
- val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
- /* Paired with READ_ONCE() in tcp_rtx_synack()
- * and sk_getsockopt().
- */
- WRITE_ONCE(sk->sk_txrehash, (u8)val);
- break;
-
default:
ret = -ENOPROTOOPT;
break;
@@ -1717,9 +1708,16 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;
case SO_TIMESTAMPING_OLD:
+ case SO_TIMESTAMPING_NEW:
lv = sizeof(v.timestamping);
- v.timestamping.flags = sk->sk_tsflags;
- v.timestamping.bind_phc = sk->sk_bind_phc;
+ /* For the later-added case SO_TIMESTAMPING_NEW: Be strict about only
+ * returning the flags when they were set through the same option.
+ * Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
+ */
+ if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
+ v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
+ v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
+ }
break;
case SO_RCVTIMEO_OLD:
@@ -2009,14 +2007,6 @@ lenout:
return 0;
}
-int sock_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- return sk_getsockopt(sock->sk, level, optname,
- USER_SOCKPTR(optval),
- USER_SOCKPTR(optlen));
-}
-
/*
* Initialize an sk_lock.
*
@@ -2665,7 +2655,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
- READ_ONCE(sysctl_optmem_max))
+ READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
return NULL;
skb = alloc_skb(size, priority);
@@ -2683,7 +2673,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
*/
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
{
- int optmem_max = READ_ONCE(sysctl_optmem_max);
+ int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
if ((unsigned int)size <= optmem_max &&
atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
@@ -2746,9 +2736,9 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
break;
- if (sk->sk_shutdown & SEND_SHUTDOWN)
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
break;
- if (sk->sk_err)
+ if (READ_ONCE(sk->sk_err))
break;
timeo = schedule_timeout(timeo);
}
@@ -2776,7 +2766,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
goto failure;
err = -EPIPE;
- if (sk->sk_shutdown & SEND_SHUTDOWN)
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
goto failure;
if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
@@ -2820,6 +2810,7 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
sockc->mark = *(u32 *)CMSG_DATA(cmsg);
break;
case SO_TIMESTAMPING_OLD:
+ case SO_TIMESTAMPING_NEW:
if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
return -EINVAL;
@@ -3000,6 +2991,11 @@ void __sk_flush_backlog(struct sock *sk)
{
spin_lock_bh(&sk->sk_lock.slock);
__release_sock(sk);
+
+ if (sk->sk_prot->release_cb)
+ INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
+ tcp_release_cb, sk);
+
spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL_GPL(__sk_flush_backlog);
@@ -3036,21 +3032,29 @@ EXPORT_SYMBOL(sk_wait_data);
* @amt: pages to allocate
* @kind: allocation type
*
- * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
+ * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc.
+ *
+ * Unlike the globally shared limits among the sockets under same protocol,
+ * consuming the budget of a memcg won't have direct effect on other ones.
+ * So be optimistic about memcg's tolerance, and leave the callers to decide
+ * whether or not to raise allocated through sk_under_memory_pressure() or
+ * its variants.
*/
int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
- bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg;
+ struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL;
struct proto *prot = sk->sk_prot;
- bool charged = true;
+ bool charged = false;
long allocated;
sk_memory_allocated_add(sk, amt);
allocated = sk_memory_allocated(sk);
- if (memcg_charge &&
- !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt,
- gfp_memcg_charge())))
- goto suppress_allocation;
+
+ if (memcg) {
+ if (!mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge()))
+ goto suppress_allocation;
+ charged = true;
+ }
/* Under limit. */
if (allocated <= sk_prot_mem_limits(sk, 0)) {
@@ -3066,7 +3070,14 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
if (allocated > sk_prot_mem_limits(sk, 2))
goto suppress_allocation;
- /* guarantee minimum buffer size under pressure */
+ /* Guarantee minimum buffer size under pressure (either global
+ * or memcg) to make sure features described in RFC 7323 (TCP
+ * Extensions for High Performance) work properly.
+ *
+ * This rule does NOT stand when exceeds global or memcg's hard
+ * limit, or else a DoS attack can be taken place by spawning
+ * lots of sockets whose usage are under minimum buffer size.
+ */
if (kind == SK_MEM_RECV) {
if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
return 1;
@@ -3085,8 +3096,17 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
if (sk_has_memory_pressure(sk)) {
u64 alloc;
- if (!sk_under_memory_pressure(sk))
+ /* The following 'average' heuristic is within the
+ * scope of global accounting, so it only makes
+ * sense for global memory pressure.
+ */
+ if (!sk_under_global_memory_pressure(sk))
return 1;
+
+ /* Try to be fair among all the sockets under global
+ * pressure by allowing the ones that below average
+ * usage to raise.
+ */
alloc = sk_sockets_allocated_read_positive(sk);
if (sk_prot_mem_limits(sk, 2) > alloc *
sk_mem_pages(sk->sk_wmem_queued +
@@ -3105,8 +3125,8 @@ suppress_allocation:
*/
if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
/* Force charge with __GFP_NOFAIL */
- if (memcg_charge && !charged) {
- mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+ if (memcg && !charged) {
+ mem_cgroup_charge_skmem(memcg, amt,
gfp_memcg_charge() | __GFP_NOFAIL);
}
return 1;
@@ -3118,8 +3138,8 @@ suppress_allocation:
sk_memory_allocated_sub(sk, amt);
- if (memcg_charge && charged)
- mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
+ if (charged)
+ mem_cgroup_uncharge_skmem(memcg, amt);
return 0;
}
@@ -3138,10 +3158,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
{
int ret, amt = sk_mem_pages(size);
- sk->sk_forward_alloc += amt << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
ret = __sk_mem_raise_allocated(sk, size, amt, kind);
if (!ret)
- sk->sk_forward_alloc -= amt << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
return ret;
}
EXPORT_SYMBOL(__sk_mem_schedule);
@@ -3173,7 +3193,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
void __sk_mem_reclaim(struct sock *sk, int amount)
{
amount >>= PAGE_SHIFT;
- sk->sk_forward_alloc -= amount << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
__sk_mem_reduce_allocated(sk, amount);
}
EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -3518,11 +3538,9 @@ void release_sock(struct sock *sk)
if (sk->sk_backlog.tail)
__release_sock(sk);
- /* Warning : release_cb() might need to release sk ownership,
- * ie call sock_release_ownership(sk) before us.
- */
if (sk->sk_prot->release_cb)
- sk->sk_prot->release_cb(sk);
+ INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
+ tcp_release_cb, sk);
sock_release_ownership(sk);
if (waitqueue_active(&sk->sk_lock.wq))
@@ -3742,7 +3760,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem)
mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
- mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+ mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
@@ -4127,8 +4145,14 @@ bool sk_busy_loop_end(void *p, unsigned long start_time)
{
struct sock *sk = p;
- return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
- sk_busy_loop_timeout(sk, start_time);
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ return true;
+
+ if (sk_is_udp(sk) &&
+ !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
+ return true;
+
+ return sk_busy_loop_timeout(sk, start_time);
}
EXPORT_SYMBOL(sk_busy_loop_end);
#endif /* CONFIG_NET_RX_BUSY_POLL */
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 8f07fea39d9e..27d733c0f65e 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -18,7 +18,7 @@ struct bpf_stab {
struct bpf_map map;
struct sock **sks;
struct sk_psock_progs progs;
- raw_spinlock_t lock;
+ spinlock_t lock;
};
#define SOCK_CREATE_FLAG_MASK \
@@ -44,7 +44,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&stab->map, attr);
- raw_spin_lock_init(&stab->lock);
+ spin_lock_init(&stab->lock);
stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries *
sizeof(struct sock *),
@@ -411,7 +411,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
struct sock *sk;
int err = 0;
- raw_spin_lock_bh(&stab->lock);
+ spin_lock_bh(&stab->lock);
sk = *psk;
if (!sk_test || sk_test == sk)
sk = xchg(psk, NULL);
@@ -421,7 +421,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
else
err = -EINVAL;
- raw_spin_unlock_bh(&stab->lock);
+ spin_unlock_bh(&stab->lock);
return err;
}
@@ -487,7 +487,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
psock = sk_psock(sk);
WARN_ON_ONCE(!psock);
- raw_spin_lock_bh(&stab->lock);
+ spin_lock_bh(&stab->lock);
osk = stab->sks[idx];
if (osk && flags == BPF_NOEXIST) {
ret = -EEXIST;
@@ -501,10 +501,10 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
stab->sks[idx] = sk;
if (osk)
sock_map_unref(osk, &stab->sks[idx]);
- raw_spin_unlock_bh(&stab->lock);
+ spin_unlock_bh(&stab->lock);
return 0;
out_unlock:
- raw_spin_unlock_bh(&stab->lock);
+ spin_unlock_bh(&stab->lock);
if (psock)
sk_psock_put(sk, psock);
out_free:
@@ -536,6 +536,8 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))
return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
+ if (sk_is_stream_unix(sk))
+ return (1 << sk->sk_state) & TCPF_ESTABLISHED;
return true;
}
@@ -668,6 +670,8 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg,
sk = __sock_map_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
+ if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk))
+ return SK_DROP;
msg->flags = flags;
msg->sk_redir = sk;
@@ -835,7 +839,7 @@ struct bpf_shtab_elem {
struct bpf_shtab_bucket {
struct hlist_head head;
- raw_spinlock_t lock;
+ spinlock_t lock;
};
struct bpf_shtab {
@@ -910,7 +914,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
* is okay since it's going away only after RCU grace period.
* However, we need to check whether it's still present.
*/
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
elem_probe = sock_hash_lookup_elem_raw(&bucket->head, elem->hash,
elem->key, map->key_size);
if (elem_probe && elem_probe == elem) {
@@ -918,7 +922,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
sock_map_unref(elem->sk, elem);
sock_hash_free_elem(htab, elem);
}
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
}
static long sock_hash_delete_elem(struct bpf_map *map, void *key)
@@ -932,7 +936,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
if (elem) {
hlist_del_rcu(&elem->node);
@@ -940,7 +944,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
sock_hash_free_elem(htab, elem);
ret = 0;
}
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
return ret;
}
@@ -1000,7 +1004,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
if (elem && flags == BPF_NOEXIST) {
ret = -EEXIST;
@@ -1026,10 +1030,10 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
sock_map_unref(elem->sk, elem);
sock_hash_free_elem(htab, elem);
}
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
return 0;
out_unlock:
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
sk_psock_put(sk, psock);
out_free:
sk_psock_free_link(link);
@@ -1115,7 +1119,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
for (i = 0; i < htab->buckets_num; i++) {
INIT_HLIST_HEAD(&htab->buckets[i].head);
- raw_spin_lock_init(&htab->buckets[i].lock);
+ spin_lock_init(&htab->buckets[i].lock);
}
return &htab->map;
@@ -1147,11 +1151,11 @@ static void sock_hash_free(struct bpf_map *map)
* exists, psock exists and holds a ref to socket. That
* lets us to grab a socket ref too.
*/
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
hlist_for_each_entry(elem, &bucket->head, node)
sock_hold(elem->sk);
hlist_move_list(&bucket->head, &unlink_list);
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
/* Process removed entries out of atomic context to
* block for socket lock before deleting the psock's
@@ -1267,6 +1271,8 @@ BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg,
sk = __sock_hash_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
+ if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk))
+ return SK_DROP;
msg->flags = flags;
msg->sk_redir = sk;
diff --git a/net/core/stream.c b/net/core/stream.c
index f5c4e47df165..b16dfa568a2d 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -79,7 +79,7 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending--;
} while (!done);
- return 0;
+ return done < 0 ? done : 0;
}
EXPORT_SYMBOL(sk_stream_wait_connect);
@@ -117,7 +117,7 @@ EXPORT_SYMBOL(sk_stream_wait_close);
*/
int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
{
- int err = 0;
+ int ret, err = 0;
long vm_wait = 0;
long current_timeo = *timeo_p;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
@@ -142,11 +142,13 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
sk->sk_write_pending++;
- sk_wait_event(sk, &current_timeo, READ_ONCE(sk->sk_err) ||
- (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) ||
- (sk_stream_memory_free(sk) &&
- !vm_wait), &wait);
+ ret = sk_wait_event(sk, &current_timeo, READ_ONCE(sk->sk_err) ||
+ (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) ||
+ (sk_stream_memory_free(sk) && !vm_wait),
+ &wait);
sk->sk_write_pending--;
+ if (ret < 0)
+ goto do_error;
if (vm_wait) {
vm_wait -= current_timeo;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 03f1edb948d7..0f0cb1465e08 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -509,13 +509,6 @@ static struct ctl_table net_core_table[] = {
.proc_handler = proc_dointvec,
},
{
- .procname = "optmem_max",
- .data = &sysctl_optmem_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tstamp_allow_data",
.data = &sysctl_tstamp_allow_data,
.maxlen = sizeof(int),
@@ -674,6 +667,14 @@ static struct ctl_table netns_core_table[] = {
.proc_handler = proc_dointvec_minmax
},
{
+ .procname = "optmem_max",
+ .data = &init_net.core.sysctl_optmem_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .extra1 = SYSCTL_ZERO,
+ .proc_handler = proc_dointvec_minmax
+ },
+ {
.procname = "txrehash",
.data = &init_net.core.sysctl_txrehash,
.maxlen = sizeof(u8),
diff --git a/net/core/xdp.c b/net/core/xdp.c
index a70670fe9a2d..4869c1c2d8f3 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -696,9 +696,7 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
return nxdpf;
}
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
- "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc_start_defs();
/**
* bpf_xdp_metadata_rx_timestamp - Read XDP frame RX timestamp.
@@ -738,10 +736,43 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
return -EOPNOTSUPP;
}
-__diag_pop();
+/**
+ * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag
+ * @ctx: XDP context pointer.
+ * @vlan_proto: Destination pointer for VLAN Tag protocol identifier (TPID).
+ * @vlan_tci: Destination pointer for VLAN TCI (VID + DEI + PCP)
+ *
+ * In case of success, ``vlan_proto`` contains *Tag protocol identifier (TPID)*,
+ * usually ``ETH_P_8021Q`` or ``ETH_P_8021AD``, but some networks can use
+ * custom TPIDs. ``vlan_proto`` is stored in **network byte order (BE)**
+ * and should be used as follows:
+ * ``if (vlan_proto == bpf_htons(ETH_P_8021Q)) do_something();``
+ *
+ * ``vlan_tci`` contains the remaining 16 bits of a VLAN tag.
+ * Driver is expected to provide those in **host byte order (usually LE)**,
+ * so the bpf program should not perform byte conversion.
+ * According to 802.1Q standard, *VLAN TCI (Tag control information)*
+ * is a bit field that contains:
+ * *VLAN identifier (VID)* that can be read with ``vlan_tci & 0xfff``,
+ * *Drop eligible indicator (DEI)* - 1 bit,
+ * *Priority code point (PCP)* - 3 bits.
+ * For detailed meaning of DEI and PCP, please refer to other sources.
+ *
+ * Return:
+ * * Returns 0 on success or ``-errno`` on error.
+ * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc
+ * * ``-ENODATA`` : VLAN tag was not stripped or is not available
+ */
+__bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
+ __be16 *vlan_proto, u16 *vlan_tci)
+{
+ return -EOPNOTSUPP;
+}
+
+__bpf_kfunc_end_defs();
BTF_SET8_START(xdp_metadata_kfunc_ids)
-#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
+#define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
BTF_SET8_END(xdp_metadata_kfunc_ids)
@@ -752,7 +783,7 @@ static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = {
};
BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted)
-#define XDP_METADATA_KFUNC(name, str) BTF_ID(func, str)
+#define XDP_METADATA_KFUNC(name, _, str, __) BTF_ID(func, str)
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 8f56e8723c73..44b033fe1ef6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -254,13 +254,8 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
int err;
struct net *net = dev_net(skb->dev);
- /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x,
- * which is in byte 7 of the dccp header.
- * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
- *
- * Later on, we want to access the sequence number fields, which are
- * beyond 8 bytes, so we have to pskb_may_pull() ourselves.
- */
+ if (!pskb_may_pull(skb, offset + sizeof(*dh)))
+ return -EINVAL;
dh = (struct dccp_hdr *)(skb->data + offset);
if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
return -EINVAL;
@@ -516,7 +511,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
rcu_dereference(ireq->ireq_opt),
- inet_sk(sk)->tos);
+ READ_ONCE(inet_sk(sk)->tos));
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -634,9 +629,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_parse_options(sk, dreq, skb))
goto drop_and_free;
- if (security_inet_conn_request(sk, skb, req))
- goto drop_and_free;
-
ireq = inet_rsk(req);
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
@@ -644,6 +636,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
ireq->ireq_family = AF_INET;
ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if);
+ if (security_inet_conn_request(sk, skb, req))
+ goto drop_and_free;
+
/*
* Step 3: Process LISTEN state
*
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 33f6ccf6ba77..ded07e09f813 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -83,13 +83,8 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
__u64 seq;
struct net *net = dev_net(skb->dev);
- /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x,
- * which is in byte 7 of the dccp header.
- * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
- *
- * Later on, we want to access the sequence number fields, which are
- * beyond 8 bytes, so we have to pskb_may_pull() ourselves.
- */
+ if (!pskb_may_pull(skb, offset + sizeof(*dh)))
+ return -EINVAL;
dh = (struct dccp_hdr *)(skb->data + offset);
if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
return -EINVAL;
@@ -185,7 +180,7 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
}
- if (!sock_owned_by_user(sk) && np->recverr) {
+ if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
sk->sk_err = err;
sk_error_report(sk);
} else {
@@ -244,7 +239,7 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
if (!opt)
opt = rcu_dereference(np->opt);
err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt,
- np->tclass, sk->sk_priority);
+ np->tclass, READ_ONCE(sk->sk_priority));
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -365,15 +360,15 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_parse_options(sk, dreq, skb))
goto drop_and_free;
- if (security_inet_conn_request(sk, skb, req))
- goto drop_and_free;
-
ireq = inet_rsk(req);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
ireq->ireq_family = AF_INET6;
ireq->ir_mark = inet_request_mark(sk, skb);
+ if (security_inet_conn_request(sk, skb, req))
+ goto drop_and_free;
+
if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -674,12 +669,12 @@ discard:
ipv6_pktoptions:
if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) {
if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
- np->mcast_oif = inet6_iif(opt_skb);
+ WRITE_ONCE(np->mcast_oif, inet6_iif(opt_skb));
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
- np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+ WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit);
if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
- if (np->repflow)
+ if (inet6_test_bit(REPFLOW, sk))
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb,
&DCCP_SKB_CB(opt_skb)->header.h6)) {
@@ -844,7 +839,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
memset(&fl6, 0, sizeof(fl6));
- if (np->sndflow) {
+ if (inet6_test_bit(SNDFLOW, sk)) {
fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
IP6_ECN_flow_init(fl6.flowlabel);
if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
@@ -894,7 +889,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
u32 exthdrlen = icsk->icsk_ext_hdr_len;
struct sockaddr_in sin;
- SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
+ net_dbg_ratelimited("connect: ipv4 mapped\n");
if (ipv6_only_sock(sk))
return -ENETUNREACH;
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index b3255e87cc7e..a4cfb47b60e5 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -196,8 +196,8 @@ static void dccp_delack_timer(struct timer_list *t)
if (inet_csk_ack_scheduled(sk)) {
if (!inet_csk_in_pingpong_mode(sk)) {
/* Delayed ACK missed: inflate ATO. */
- icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
- icsk->icsk_rto);
+ icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1,
+ icsk->icsk_rto);
} else {
/* Delayed ACK missed: leave pingpong mode and
* deflate ATO.
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 6cec4afb01fb..6a58342752b4 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -16,6 +16,222 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
+static struct devlink *devlinks_xa_get(unsigned long index)
+{
+ struct devlink *devlink;
+
+ rcu_read_lock();
+ devlink = xa_find(&devlinks, &index, index, DEVLINK_REGISTERED);
+ if (!devlink || !devlink_try_get(devlink))
+ devlink = NULL;
+ rcu_read_unlock();
+ return devlink;
+}
+
+/* devlink_rels xarray contains 1:1 relationships between
+ * devlink object and related nested devlink instance.
+ * The xarray index is used to get the nested object from
+ * the nested-in object code.
+ */
+static DEFINE_XARRAY_FLAGS(devlink_rels, XA_FLAGS_ALLOC1);
+
+#define DEVLINK_REL_IN_USE XA_MARK_0
+
+struct devlink_rel {
+ u32 index;
+ refcount_t refcount;
+ u32 devlink_index;
+ struct {
+ u32 devlink_index;
+ u32 obj_index;
+ devlink_rel_notify_cb_t *notify_cb;
+ devlink_rel_cleanup_cb_t *cleanup_cb;
+ struct delayed_work notify_work;
+ } nested_in;
+};
+
+static void devlink_rel_free(struct devlink_rel *rel)
+{
+ xa_erase(&devlink_rels, rel->index);
+ kfree(rel);
+}
+
+static void __devlink_rel_get(struct devlink_rel *rel)
+{
+ refcount_inc(&rel->refcount);
+}
+
+static void __devlink_rel_put(struct devlink_rel *rel)
+{
+ if (refcount_dec_and_test(&rel->refcount))
+ devlink_rel_free(rel);
+}
+
+static void devlink_rel_nested_in_notify_work(struct work_struct *work)
+{
+ struct devlink_rel *rel = container_of(work, struct devlink_rel,
+ nested_in.notify_work.work);
+ struct devlink *devlink;
+
+ devlink = devlinks_xa_get(rel->nested_in.devlink_index);
+ if (!devlink)
+ goto rel_put;
+ if (!devl_trylock(devlink)) {
+ devlink_put(devlink);
+ goto reschedule_work;
+ }
+ if (!devl_is_registered(devlink)) {
+ devl_unlock(devlink);
+ devlink_put(devlink);
+ goto rel_put;
+ }
+ if (!xa_get_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE))
+ rel->nested_in.cleanup_cb(devlink, rel->nested_in.obj_index, rel->index);
+ rel->nested_in.notify_cb(devlink, rel->nested_in.obj_index);
+ devl_unlock(devlink);
+ devlink_put(devlink);
+
+rel_put:
+ __devlink_rel_put(rel);
+ return;
+
+reschedule_work:
+ schedule_delayed_work(&rel->nested_in.notify_work, 1);
+}
+
+static void devlink_rel_nested_in_notify_work_schedule(struct devlink_rel *rel)
+{
+ __devlink_rel_get(rel);
+ schedule_delayed_work(&rel->nested_in.notify_work, 0);
+}
+
+static struct devlink_rel *devlink_rel_alloc(void)
+{
+ struct devlink_rel *rel;
+ static u32 next;
+ int err;
+
+ rel = kzalloc(sizeof(*rel), GFP_KERNEL);
+ if (!rel)
+ return ERR_PTR(-ENOMEM);
+
+ err = xa_alloc_cyclic(&devlink_rels, &rel->index, rel,
+ xa_limit_32b, &next, GFP_KERNEL);
+ if (err) {
+ kfree(rel);
+ return ERR_PTR(err);
+ }
+
+ refcount_set(&rel->refcount, 1);
+ INIT_DELAYED_WORK(&rel->nested_in.notify_work,
+ &devlink_rel_nested_in_notify_work);
+ return rel;
+}
+
+static void devlink_rel_put(struct devlink *devlink)
+{
+ struct devlink_rel *rel = devlink->rel;
+
+ if (!rel)
+ return;
+ xa_clear_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE);
+ devlink_rel_nested_in_notify_work_schedule(rel);
+ __devlink_rel_put(rel);
+ devlink->rel = NULL;
+}
+
+void devlink_rel_nested_in_clear(u32 rel_index)
+{
+ xa_clear_mark(&devlink_rels, rel_index, DEVLINK_REL_IN_USE);
+}
+
+int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index,
+ u32 obj_index, devlink_rel_notify_cb_t *notify_cb,
+ devlink_rel_cleanup_cb_t *cleanup_cb,
+ struct devlink *devlink)
+{
+ struct devlink_rel *rel = devlink_rel_alloc();
+
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
+ if (IS_ERR(rel))
+ return PTR_ERR(rel);
+
+ rel->devlink_index = devlink->index;
+ rel->nested_in.devlink_index = devlink_index;
+ rel->nested_in.obj_index = obj_index;
+ rel->nested_in.notify_cb = notify_cb;
+ rel->nested_in.cleanup_cb = cleanup_cb;
+ *rel_index = rel->index;
+ xa_set_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE);
+ devlink->rel = rel;
+ return 0;
+}
+
+/**
+ * devlink_rel_nested_in_notify - Notify the object this devlink
+ * instance is nested in.
+ * @devlink: devlink
+ *
+ * This is called upon network namespace change of devlink instance.
+ * In case this devlink instance is nested in another devlink object,
+ * a notification of a change of this object should be sent
+ * over netlink. The parent devlink instance lock needs to be
+ * taken during the notification preparation.
+ * However, since the devlink lock of nested instance is held here,
+ * we would end with wrong devlink instance lock ordering and
+ * deadlock. Therefore the work is utilized to avoid that.
+ */
+void devlink_rel_nested_in_notify(struct devlink *devlink)
+{
+ struct devlink_rel *rel = devlink->rel;
+
+ if (!rel)
+ return;
+ devlink_rel_nested_in_notify_work_schedule(rel);
+}
+
+static struct devlink_rel *devlink_rel_find(unsigned long rel_index)
+{
+ return xa_find(&devlink_rels, &rel_index, rel_index,
+ DEVLINK_REL_IN_USE);
+}
+
+static struct devlink *devlink_rel_devlink_get(u32 rel_index)
+{
+ struct devlink_rel *rel;
+ u32 devlink_index;
+
+ if (!rel_index)
+ return NULL;
+ xa_lock(&devlink_rels);
+ rel = devlink_rel_find(rel_index);
+ if (rel)
+ devlink_index = rel->devlink_index;
+ xa_unlock(&devlink_rels);
+ if (!rel)
+ return NULL;
+ return devlinks_xa_get(devlink_index);
+}
+
+int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
+ u32 rel_index, int attrtype,
+ bool *msg_updated)
+{
+ struct net *net = devlink_net(devlink);
+ struct devlink *rel_devlink;
+ int err;
+
+ rel_devlink = devlink_rel_devlink_get(rel_index);
+ if (!rel_devlink)
+ return 0;
+ err = devlink_nl_put_nested_handle(msg, net, rel_devlink, attrtype);
+ devlink_put(rel_devlink);
+ if (!err && msg_updated)
+ *msg_updated = true;
+ return err;
+}
+
void *devlink_priv(struct devlink *devlink)
{
return &devlink->priv;
@@ -97,6 +313,7 @@ static void devlink_release(struct work_struct *work)
mutex_destroy(&devlink->lock);
lockdep_unregister_key(&devlink->lock_key);
+ put_device(devlink->dev);
kfree(devlink);
}
@@ -142,6 +359,7 @@ int devl_register(struct devlink *devlink)
xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
devlink_notify_register(devlink);
+ devlink_rel_nested_in_notify(devlink);
return 0;
}
@@ -166,6 +384,7 @@ void devl_unregister(struct devlink *devlink)
devlink_notify_unregister(devlink);
xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
+ devlink_rel_put(devlink);
}
EXPORT_SYMBOL_GPL(devl_unregister);
@@ -210,11 +429,12 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
if (ret < 0)
goto err_xa_alloc;
- devlink->dev = dev;
+ devlink->dev = get_device(dev);
devlink->ops = ops;
xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC);
xa_init_flags(&devlink->params, XA_FLAGS_ALLOC);
xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
+ xa_init_flags(&devlink->nested_rels, XA_FLAGS_ALLOC);
write_pnet(&devlink->_net, net);
INIT_LIST_HEAD(&devlink->rate_list);
INIT_LIST_HEAD(&devlink->linecard_list);
@@ -261,6 +481,7 @@ void devlink_free(struct devlink *devlink)
WARN_ON(!list_empty(&devlink->linecard_list));
WARN_ON(!xa_empty(&devlink->ports));
+ xa_destroy(&devlink->nested_rels);
xa_destroy(&devlink->snapshot_ids);
xa_destroy(&devlink->params);
xa_destroy(&devlink->ports);
@@ -282,14 +503,14 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
* all devlink instances from this namespace into init_net.
*/
devlinks_xa_for_each_registered_get(net, index, devlink) {
- devl_lock(devlink);
+ devl_dev_lock(devlink, true);
err = 0;
if (devl_is_registered(devlink))
err = devlink_reload(devlink, &init_net,
DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
DEVLINK_RELOAD_LIMIT_UNSPEC,
&actions_performed, NULL);
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, true);
devlink_put(devlink);
if (err && err != -EOPNOTSUPP)
pr_warn("Failed to reload devlink instance into init_net\n");
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index bba4ace7d22b..19dbf540748a 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -4,6 +4,7 @@
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/
+#include <linux/device.h>
#include <net/genetlink.h>
#include <net/sock.h>
#include "devl_internal.h"
@@ -138,6 +139,23 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int devlink_nl_nested_fill(struct sk_buff *msg, struct devlink *devlink)
+{
+ unsigned long rel_index;
+ void *unused;
+ int err;
+
+ xa_for_each(&devlink->nested_rels, rel_index, unused) {
+ err = devlink_rel_devlink_handle_put(msg, devlink,
+ rel_index,
+ DEVLINK_ATTR_NESTED_DEVLINK,
+ NULL);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
enum devlink_command cmd, u32 portid,
u32 seq, int flags)
@@ -164,6 +182,10 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
goto dev_stats_nest_cancel;
nla_nest_end(msg, dev_stats);
+
+ if (devlink_nl_nested_fill(msg, devlink))
+ goto nla_put_failure;
+
genlmsg_end(msg, hdr);
return 0;
@@ -180,7 +202,10 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
int err;
WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
- WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
+ WARN_ON(!devl_is_registered(devlink));
+
+ if (!devlink_nl_notify_need(devlink))
+ return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -192,8 +217,7 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info)
@@ -230,6 +254,34 @@ int devlink_nl_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
return devlink_nl_dumpit(msg, cb, devlink_nl_get_dump_one);
}
+static void devlink_rel_notify_cb(struct devlink *devlink, u32 obj_index)
+{
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+static void devlink_rel_cleanup_cb(struct devlink *devlink, u32 obj_index,
+ u32 rel_index)
+{
+ xa_erase(&devlink->nested_rels, rel_index);
+}
+
+int devl_nested_devlink_set(struct devlink *devlink,
+ struct devlink *nested_devlink)
+{
+ u32 rel_index;
+ int err;
+
+ err = devlink_rel_nested_in_add(&rel_index, devlink->index, 0,
+ devlink_rel_notify_cb,
+ devlink_rel_cleanup_cb,
+ nested_devlink);
+ if (err)
+ return err;
+ return xa_insert(&devlink->nested_rels, rel_index,
+ xa_mk_value(0), GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(devl_nested_devlink_set);
+
void devlink_notify_register(struct devlink *devlink)
{
devlink_notify(devlink, DEVLINK_CMD_NEW);
@@ -372,6 +424,19 @@ static void devlink_reload_netns_change(struct devlink *devlink,
devlink_notify_unregister(devlink);
write_pnet(&devlink->_net, dest_net);
devlink_notify_register(devlink);
+ devlink_rel_nested_in_notify(devlink);
+}
+
+static void devlink_reload_reinit_sanity_check(struct devlink *devlink)
+{
+ WARN_ON(!list_empty(&devlink->trap_policer_list));
+ WARN_ON(!list_empty(&devlink->trap_group_list));
+ WARN_ON(!list_empty(&devlink->trap_list));
+ WARN_ON(!list_empty(&devlink->dpipe_table_list));
+ WARN_ON(!list_empty(&devlink->sb_list));
+ WARN_ON(!list_empty(&devlink->rate_list));
+ WARN_ON(!list_empty(&devlink->linecard_list));
+ WARN_ON(!xa_empty(&devlink->ports));
}
int devlink_reload(struct devlink *devlink, struct net *dest_net,
@@ -383,6 +448,13 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net,
struct net *curr_net;
int err;
+ /* Make sure the reload operations are invoked with the device lock
+ * held to allow drivers to trigger functionality that expects it
+ * (e.g., PCI reset) and to close possible races between these
+ * operations and probe/remove.
+ */
+ device_lock_assert(devlink->dev);
+
memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
sizeof(remote_reload_stats));
@@ -394,8 +466,10 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net,
if (dest_net && !net_eq(dest_net, curr_net))
devlink_reload_netns_change(devlink, curr_net, dest_net);
- if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT)
+ if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT) {
devlink_params_driverinit_load_new(devlink);
+ devlink_reload_reinit_sanity_check(devlink);
+ }
err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
devlink_reload_failed_set(devlink, !!err);
@@ -442,7 +516,7 @@ free_msg:
return -EMSGSIZE;
}
-int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
enum devlink_reload_action action;
@@ -608,7 +682,7 @@ nla_put_failure:
return err;
}
-int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct sk_buff *msg;
@@ -629,7 +703,7 @@ int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info)
return genlmsg_reply(msg, info);
}
-int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
const struct devlink_ops *ops = devlink->ops;
@@ -927,7 +1001,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink,
cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -938,8 +1012,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink,
if (err)
goto out_free_msg;
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
return;
out_free_msg:
@@ -1058,7 +1131,7 @@ static int devlink_flash_component_get(struct devlink *devlink,
return 0;
}
-int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_flash_update_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *nla_overwrite_mask, *nla_file_name;
struct devlink_flash_update_params params = {};
@@ -1301,7 +1374,7 @@ static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_
[DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG },
};
-int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
struct devlink *devlink = info->user_ptr[0];
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index f6b5fea2e13c..c7a8e13f917c 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -3,6 +3,7 @@
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/
+#include <linux/device.h>
#include <linux/etherdevice.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
@@ -17,6 +18,8 @@
#include "netlink_gen.h"
+struct devlink_rel;
+
#define DEVLINK_REGISTERED XA_MARK_1
#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
@@ -55,6 +58,8 @@ struct devlink {
u8 reload_failed:1;
refcount_t refcount;
struct rcu_work rwork;
+ struct devlink_rel *rel;
+ struct xarray nested_rels;
char priv[] __aligned(NETDEV_ALIGN);
};
@@ -86,16 +91,46 @@ extern struct genl_family devlink_nl_family;
struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp);
+static inline bool __devl_is_registered(struct devlink *devlink)
+{
+ return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
+}
+
static inline bool devl_is_registered(struct devlink *devlink)
{
devl_assert_locked(devlink);
- return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
+ return __devl_is_registered(devlink);
}
-/* Netlink */
-#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
-#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
+static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock)
+{
+ if (dev_lock)
+ device_lock(devlink->dev);
+ devl_lock(devlink);
+}
+static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock)
+{
+ devl_unlock(devlink);
+ if (dev_lock)
+ device_unlock(devlink->dev);
+}
+
+typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index);
+typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index,
+ u32 rel_index);
+
+void devlink_rel_nested_in_clear(u32 rel_index);
+int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index,
+ u32 obj_index, devlink_rel_notify_cb_t *notify_cb,
+ devlink_rel_cleanup_cb_t *cleanup_cb,
+ struct devlink *devlink);
+void devlink_rel_nested_in_notify(struct devlink *devlink);
+int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
+ u32 rel_index, int attrtype,
+ bool *msg_updated);
+
+/* Netlink */
enum devlink_multicast_groups {
DEVLINK_MCGRP_CONFIG,
};
@@ -122,7 +157,8 @@ typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg,
int flags);
struct devlink *
-devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
+devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs,
+ bool dev_lock);
int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
devlink_nl_dump_one_func_t *dump_one);
@@ -145,8 +181,62 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
return 0;
}
+int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net,
+ struct devlink *devlink, int attrtype);
int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info);
+static inline bool devlink_nl_notify_need(struct devlink *devlink)
+{
+ return genl_has_listeners(&devlink_nl_family, devlink_net(devlink),
+ DEVLINK_MCGRP_CONFIG);
+}
+
+struct devlink_obj_desc {
+ struct rcu_head rcu;
+ const char *bus_name;
+ const char *dev_name;
+ unsigned int port_index;
+ bool port_index_valid;
+ long data[];
+};
+
+static inline void devlink_nl_obj_desc_init(struct devlink_obj_desc *desc,
+ struct devlink *devlink)
+{
+ memset(desc, 0, sizeof(*desc));
+ desc->bus_name = devlink->dev->bus->name;
+ desc->dev_name = dev_name(devlink->dev);
+}
+
+static inline void devlink_nl_obj_desc_port_set(struct devlink_obj_desc *desc,
+ struct devlink_port *devlink_port)
+{
+ desc->port_index = devlink_port->index;
+ desc->port_index_valid = true;
+}
+
+int devlink_nl_notify_filter(struct sock *dsk, struct sk_buff *skb, void *data);
+
+static inline void devlink_nl_notify_send_desc(struct devlink *devlink,
+ struct sk_buff *msg,
+ struct devlink_obj_desc *desc)
+{
+ genlmsg_multicast_netns_filtered(&devlink_nl_family,
+ devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG,
+ GFP_KERNEL,
+ devlink_nl_notify_filter, desc);
+}
+
+static inline void devlink_nl_notify_send(struct devlink *devlink,
+ struct sk_buff *msg)
+{
+ struct devlink_obj_desc desc;
+
+ devlink_nl_obj_desc_init(&desc, devlink);
+ devlink_nl_notify_send_desc(devlink, msg, &desc);
+}
+
/* Notify */
void devlink_notify_register(struct devlink *devlink);
void devlink_notify_unregister(struct devlink *devlink);
@@ -206,80 +296,4 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack);
/* Linecards */
-struct devlink_linecard {
- struct list_head list;
- struct devlink *devlink;
- unsigned int index;
- const struct devlink_linecard_ops *ops;
- void *priv;
- enum devlink_linecard_state state;
- struct mutex state_lock; /* Protects state */
- const char *type;
- struct devlink_linecard_type *types;
- unsigned int types_count;
- struct devlink *nested_devlink;
-};
-
-/* Devlink nl cmds */
-int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_resource_set(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_resource_dump(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb);
-int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
- struct netlink_callback *cb);
-int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
- struct netlink_callback *cb);
-int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
- struct genl_info *info);
-int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
- struct genl_info *info);
+unsigned int devlink_linecard_index(struct devlink_linecard *linecard);
diff --git a/net/devlink/dpipe.c b/net/devlink/dpipe.c
index 431227c412e5..a72a9292efc5 100644
--- a/net/devlink/dpipe.c
+++ b/net/devlink/dpipe.c
@@ -289,7 +289,7 @@ err_table_put:
return err;
}
-int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_dpipe_table_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
const char *table_name = NULL;
@@ -562,8 +562,8 @@ send_done:
return genlmsg_reply(dump_ctx.skb, info);
}
-int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_dpipe_entries_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_dpipe_table *table;
@@ -712,8 +712,8 @@ err_table_put:
return err;
}
-int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_dpipe_headers_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
@@ -746,8 +746,8 @@ static int devlink_dpipe_table_counters_set(struct devlink *devlink,
return 0;
}
-int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_dpipe_table_counters_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
const char *table_name;
diff --git a/net/devlink/health.c b/net/devlink/health.c
index 638cad8d5c65..acb8c0e174bb 100644
--- a/net/devlink/health.c
+++ b/net/devlink/health.c
@@ -19,6 +19,7 @@ struct devlink_fmsg_item {
struct devlink_fmsg {
struct list_head item_list;
+ int err; /* first error encountered on some devlink_fmsg_XXX() call */
bool putting_binary; /* This flag forces enclosing of binary data
* in an array brackets. It forces using
* of designated API:
@@ -58,7 +59,6 @@ struct devlink_health_reporter {
struct devlink *devlink;
struct devlink_port *devlink_port;
struct devlink_fmsg *dump_fmsg;
- struct mutex dump_lock; /* lock parallel read/write from dump buffers */
u64 graceful_period;
bool auto_recover;
bool auto_dump;
@@ -125,7 +125,6 @@ __devlink_health_reporter_create(struct devlink *devlink,
reporter->graceful_period = graceful_period;
reporter->auto_recover = !!ops->recover;
reporter->auto_dump = !!ops->dump;
- mutex_init(&reporter->dump_lock);
return reporter;
}
@@ -226,7 +225,6 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
static void
devlink_health_reporter_free(struct devlink_health_reporter *reporter)
{
- mutex_destroy(&reporter->dump_lock);
if (reporter->dump_fmsg)
devlink_fmsg_free(reporter->dump_fmsg);
kfree(reporter);
@@ -454,8 +452,8 @@ int devlink_nl_health_reporter_get_dumpit(struct sk_buff *skb,
devlink_nl_health_reporter_get_dump_one);
}
-int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_health_reporter_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_health_reporter *reporter;
@@ -492,12 +490,16 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter,
enum devlink_command cmd)
{
struct devlink *devlink = reporter->devlink;
+ struct devlink_obj_desc desc;
struct sk_buff *msg;
int err;
WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
ASSERT_DEVLINK_REGISTERED(devlink);
+ if (!devlink_nl_notify_need(devlink))
+ return;
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return;
@@ -508,8 +510,10 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_obj_desc_init(&desc, devlink);
+ if (reporter->devlink_port)
+ devlink_nl_obj_desc_port_set(&desc, reporter->devlink_port);
+ devlink_nl_notify_send_desc(devlink, msg, &desc);
}
void
@@ -565,21 +569,18 @@ static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
return 0;
reporter->dump_fmsg = devlink_fmsg_alloc();
- if (!reporter->dump_fmsg) {
- err = -ENOMEM;
- return err;
- }
+ if (!reporter->dump_fmsg)
+ return -ENOMEM;
- err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg);
- if (err)
- goto dump_err;
+ devlink_fmsg_obj_nest_start(reporter->dump_fmsg);
err = reporter->ops->dump(reporter, reporter->dump_fmsg,
priv_ctx, extack);
if (err)
goto dump_err;
- err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg);
+ devlink_fmsg_obj_nest_end(reporter->dump_fmsg);
+ err = reporter->dump_fmsg->err;
if (err)
goto dump_err;
@@ -625,10 +626,10 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
}
if (reporter->auto_dump) {
- mutex_lock(&reporter->dump_lock);
+ devl_lock(devlink);
/* store current dump of current error, for later analysis */
devlink_health_do_dump(reporter, priv_ctx, NULL);
- mutex_unlock(&reporter->dump_lock);
+ devl_unlock(devlink);
}
if (!reporter->auto_recover)
@@ -660,8 +661,8 @@ devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
}
EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
-int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_health_reporter_recover_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_health_reporter *reporter;
@@ -673,373 +674,258 @@ int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
return devlink_health_reporter_recover(reporter, NULL, info->extack);
}
-static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg,
- int attrtype)
+static void devlink_fmsg_err_if_binary(struct devlink_fmsg *fmsg)
+{
+ if (!fmsg->err && fmsg->putting_binary)
+ fmsg->err = -EINVAL;
+}
+
+static void devlink_fmsg_nest_common(struct devlink_fmsg *fmsg, int attrtype)
{
struct devlink_fmsg_item *item;
+ if (fmsg->err)
+ return;
+
item = kzalloc(sizeof(*item), GFP_KERNEL);
- if (!item)
- return -ENOMEM;
+ if (!item) {
+ fmsg->err = -ENOMEM;
+ return;
+ }
item->attrtype = attrtype;
list_add_tail(&item->list, &fmsg->item_list);
-
- return 0;
}
-int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg)
+void devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg)
{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START);
+ devlink_fmsg_err_if_binary(fmsg);
+ devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start);
-static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg)
+static void devlink_fmsg_nest_end(struct devlink_fmsg *fmsg)
{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END);
+ devlink_fmsg_err_if_binary(fmsg);
+ devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END);
}
-int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg)
+void devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg)
{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_nest_end(fmsg);
+ devlink_fmsg_nest_end(fmsg);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end);
#define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN)
-static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name)
+static void devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name)
{
struct devlink_fmsg_item *item;
- if (fmsg->putting_binary)
- return -EINVAL;
+ devlink_fmsg_err_if_binary(fmsg);
+ if (fmsg->err)
+ return;
- if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE)
- return -EMSGSIZE;
+ if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE) {
+ fmsg->err = -EMSGSIZE;
+ return;
+ }
item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL);
- if (!item)
- return -ENOMEM;
+ if (!item) {
+ fmsg->err = -ENOMEM;
+ return;
+ }
item->nla_type = NLA_NUL_STRING;
item->len = strlen(name) + 1;
item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME;
memcpy(&item->value, name, item->len);
list_add_tail(&item->list, &fmsg->item_list);
-
- return 0;
}
-int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name)
+void devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name)
{
- int err;
-
- if (fmsg->putting_binary)
- return -EINVAL;
-
- err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START);
- if (err)
- return err;
-
- err = devlink_fmsg_put_name(fmsg, name);
- if (err)
- return err;
-
- return 0;
+ devlink_fmsg_err_if_binary(fmsg);
+ devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START);
+ devlink_fmsg_put_name(fmsg, name);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start);
-int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg)
+void devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg)
{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_nest_end(fmsg);
+ devlink_fmsg_nest_end(fmsg);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end);
-int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
- const char *name)
+void devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name)
{
- int err;
-
- if (fmsg->putting_binary)
- return -EINVAL;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START);
- if (err)
- return err;
-
- return 0;
+ devlink_fmsg_pair_nest_start(fmsg, name);
+ devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start);
-int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg)
+void devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg)
{
- int err;
-
- if (fmsg->putting_binary)
- return -EINVAL;
-
- err = devlink_fmsg_nest_end(fmsg);
- if (err)
- return err;
-
- err = devlink_fmsg_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
+ devlink_fmsg_nest_end(fmsg);
+ devlink_fmsg_nest_end(fmsg);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end);
-int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg,
- const char *name)
+void devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name)
{
- int err;
-
- err = devlink_fmsg_arr_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
+ devlink_fmsg_arr_pair_nest_start(fmsg, name);
fmsg->putting_binary = true;
- return err;
}
EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_start);
-int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg)
+void devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg)
{
+ if (fmsg->err)
+ return;
+
if (!fmsg->putting_binary)
- return -EINVAL;
+ fmsg->err = -EINVAL;
fmsg->putting_binary = false;
- return devlink_fmsg_arr_pair_nest_end(fmsg);
+ devlink_fmsg_arr_pair_nest_end(fmsg);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_end);
-static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg,
- const void *value, u16 value_len,
- u8 value_nla_type)
+static void devlink_fmsg_put_value(struct devlink_fmsg *fmsg,
+ const void *value, u16 value_len,
+ u8 value_nla_type)
{
struct devlink_fmsg_item *item;
- if (value_len > DEVLINK_FMSG_MAX_SIZE)
- return -EMSGSIZE;
+ if (fmsg->err)
+ return;
+
+ if (value_len > DEVLINK_FMSG_MAX_SIZE) {
+ fmsg->err = -EMSGSIZE;
+ return;
+ }
item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL);
- if (!item)
- return -ENOMEM;
+ if (!item) {
+ fmsg->err = -ENOMEM;
+ return;
+ }
item->nla_type = value_nla_type;
item->len = value_len;
item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
memcpy(&item->value, value, item->len);
list_add_tail(&item->list, &fmsg->item_list);
-
- return 0;
}
-static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
+static void devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG);
+ devlink_fmsg_err_if_binary(fmsg);
+ devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG);
}
-static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
+static void devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8);
+ devlink_fmsg_err_if_binary(fmsg);
+ devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8);
}
-int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
+void devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32);
+ devlink_fmsg_err_if_binary(fmsg);
+ devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put);
-static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
+static void devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64);
+ devlink_fmsg_err_if_binary(fmsg);
+ devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64);
}
-int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
+void devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1,
- NLA_NUL_STRING);
+ devlink_fmsg_err_if_binary(fmsg);
+ devlink_fmsg_put_value(fmsg, value, strlen(value) + 1, NLA_NUL_STRING);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_string_put);
-int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
- u16 value_len)
+void devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
+ u16 value_len)
{
- if (!fmsg->putting_binary)
- return -EINVAL;
+ if (!fmsg->err && !fmsg->putting_binary)
+ fmsg->err = -EINVAL;
- return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY);
+ devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put);
-int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
- bool value)
+void devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ bool value)
{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_bool_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
+ devlink_fmsg_pair_nest_start(fmsg, name);
+ devlink_fmsg_bool_put(fmsg, value);
+ devlink_fmsg_pair_nest_end(fmsg);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put);
-int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u8 value)
+void devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u8 value)
{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_u8_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
+ devlink_fmsg_pair_nest_start(fmsg, name);
+ devlink_fmsg_u8_put(fmsg, value);
+ devlink_fmsg_pair_nest_end(fmsg);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put);
-int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u32 value)
+void devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u32 value)
{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_u32_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
+ devlink_fmsg_pair_nest_start(fmsg, name);
+ devlink_fmsg_u32_put(fmsg, value);
+ devlink_fmsg_pair_nest_end(fmsg);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put);
-int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u64 value)
+void devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u64 value)
{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_u64_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
+ devlink_fmsg_pair_nest_start(fmsg, name);
+ devlink_fmsg_u64_put(fmsg, value);
+ devlink_fmsg_pair_nest_end(fmsg);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put);
-int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
- const char *value)
+void devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const char *value)
{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_string_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
+ devlink_fmsg_pair_nest_start(fmsg, name);
+ devlink_fmsg_string_put(fmsg, value);
+ devlink_fmsg_pair_nest_end(fmsg);
}
EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put);
-int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
- const void *value, u32 value_len)
+void devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const void *value, u32 value_len)
{
u32 data_size;
- int end_err;
u32 offset;
- int err;
- err = devlink_fmsg_binary_pair_nest_start(fmsg, name);
- if (err)
- return err;
+ devlink_fmsg_binary_pair_nest_start(fmsg, name);
for (offset = 0; offset < value_len; offset += data_size) {
data_size = value_len - offset;
if (data_size > DEVLINK_FMSG_MAX_SIZE)
data_size = DEVLINK_FMSG_MAX_SIZE;
- err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
- if (err)
- break;
- /* Exit from loop with a break (instead of
- * return) to make sure putting_binary is turned off in
- * devlink_fmsg_binary_pair_nest_end
- */
- }
- end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
- if (end_err)
- err = end_err;
+ devlink_fmsg_binary_put(fmsg, value + offset, data_size);
+ }
- return err;
+ devlink_fmsg_binary_pair_nest_end(fmsg);
+ fmsg->putting_binary = false;
}
EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put);
@@ -1149,6 +1035,9 @@ static int devlink_fmsg_snd(struct devlink_fmsg *fmsg,
void *hdr;
int err;
+ if (fmsg->err)
+ return fmsg->err;
+
while (!last) {
int tmp_index = index;
@@ -1202,6 +1091,9 @@ static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb,
void *hdr;
int err;
+ if (fmsg->err)
+ return fmsg->err;
+
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, cmd);
if (!hdr) {
@@ -1222,8 +1114,8 @@ nla_put_failure:
return err;
}
-int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_health_reporter_diagnose_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_health_reporter *reporter;
@@ -1241,17 +1133,13 @@ int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
if (!fmsg)
return -ENOMEM;
- err = devlink_fmsg_obj_nest_start(fmsg);
- if (err)
- goto out;
+ devlink_fmsg_obj_nest_start(fmsg);
err = reporter->ops->diagnose(reporter, fmsg, info->extack);
if (err)
goto out;
- err = devlink_fmsg_obj_nest_end(fmsg);
- if (err)
- goto out;
+ devlink_fmsg_obj_nest_end(fmsg);
err = devlink_fmsg_snd(fmsg, info,
DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0);
@@ -1262,38 +1150,45 @@ out:
}
static struct devlink_health_reporter *
-devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
+devlink_health_reporter_get_from_cb_lock(struct netlink_callback *cb)
{
const struct genl_info *info = genl_info_dump(cb);
struct devlink_health_reporter *reporter;
struct nlattr **attrs = info->attrs;
struct devlink *devlink;
- devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs,
+ false);
if (IS_ERR(devlink))
return NULL;
- devl_unlock(devlink);
reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
- devlink_put(devlink);
+ if (!reporter) {
+ devl_unlock(devlink);
+ devlink_put(devlink);
+ }
return reporter;
}
-int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
- struct netlink_callback *cb)
+int devlink_nl_health_reporter_dump_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_health_reporter *reporter;
+ struct devlink *devlink;
int err;
- reporter = devlink_health_reporter_get_from_cb(cb);
+ reporter = devlink_health_reporter_get_from_cb_lock(cb);
if (!reporter)
return -EINVAL;
- if (!reporter->ops->dump)
+ devlink = reporter->devlink;
+ if (!reporter->ops->dump) {
+ devl_unlock(devlink);
+ devlink_put(devlink);
return -EOPNOTSUPP;
+ }
- mutex_lock(&reporter->dump_lock);
if (!state->idx) {
err = devlink_health_do_dump(reporter, NULL, cb->extack);
if (err)
@@ -1309,12 +1204,13 @@ int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
err = devlink_fmsg_dumpit(reporter->dump_fmsg, skb, cb,
DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET);
unlock:
- mutex_unlock(&reporter->dump_lock);
+ devl_unlock(devlink);
+ devlink_put(devlink);
return err;
}
-int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_health_reporter_dump_clear_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_health_reporter *reporter;
@@ -1326,14 +1222,12 @@ int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
if (!reporter->ops->dump)
return -EOPNOTSUPP;
- mutex_lock(&reporter->dump_lock);
devlink_health_dump_clear(reporter);
- mutex_unlock(&reporter->dump_lock);
return 0;
}
-int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_health_reporter_test_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_health_reporter *reporter;
diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c
index 85c32c314b0f..67f70a621d27 100644
--- a/net/devlink/linecard.c
+++ b/net/devlink/linecard.c
@@ -6,6 +6,25 @@
#include "devl_internal.h"
+struct devlink_linecard {
+ struct list_head list;
+ struct devlink *devlink;
+ unsigned int index;
+ const struct devlink_linecard_ops *ops;
+ void *priv;
+ enum devlink_linecard_state state;
+ struct mutex state_lock; /* Protects state */
+ const char *type;
+ struct devlink_linecard_type *types;
+ unsigned int types_count;
+ u32 rel_index;
+};
+
+unsigned int devlink_linecard_index(struct devlink_linecard *linecard)
+{
+ return linecard->index;
+}
+
static struct devlink_linecard *
devlink_linecard_get_by_index(struct devlink *devlink,
unsigned int linecard_index)
@@ -46,24 +65,6 @@ devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
return devlink_linecard_get_from_attrs(devlink, info->attrs);
}
-static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
-{
- struct nlattr *nested_attr;
-
- nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK);
- if (!nested_attr)
- return -EMSGSIZE;
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- nla_nest_end(msg, nested_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, nested_attr);
- return -EMSGSIZE;
-}
-
struct devlink_linecard_type {
const char *type;
const void *priv;
@@ -111,8 +112,10 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg,
nla_nest_end(msg, attr);
}
- if (linecard->nested_devlink &&
- devlink_nl_put_nested_handle(msg, linecard->nested_devlink))
+ if (devlink_rel_devlink_handle_put(msg, devlink,
+ linecard->rel_index,
+ DEVLINK_ATTR_NESTED_DEVLINK,
+ NULL))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -133,7 +136,7 @@ static void devlink_linecard_notify(struct devlink_linecard *linecard,
WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW &&
cmd != DEVLINK_CMD_LINECARD_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -147,8 +150,7 @@ static void devlink_linecard_notify(struct devlink_linecard *linecard,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
void devlink_linecards_notify_register(struct devlink *devlink)
@@ -366,8 +368,7 @@ out:
return err;
}
-int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_linecard_set_doit(struct sk_buff *skb, struct genl_info *info)
{
struct netlink_ext_ack *extack = info->extack;
struct devlink *devlink = info->user_ptr[0];
@@ -521,7 +522,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
{
mutex_lock(&linecard->state_lock);
- WARN_ON(linecard->nested_devlink);
linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
linecard->type = NULL;
devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
@@ -540,7 +540,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear);
void devlink_linecard_provision_fail(struct devlink_linecard *linecard)
{
mutex_lock(&linecard->state_lock);
- WARN_ON(linecard->nested_devlink);
linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED;
devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
mutex_unlock(&linecard->state_lock);
@@ -588,6 +587,27 @@ void devlink_linecard_deactivate(struct devlink_linecard *linecard)
}
EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
+static void devlink_linecard_rel_notify_cb(struct devlink *devlink,
+ u32 linecard_index)
+{
+ struct devlink_linecard *linecard;
+
+ linecard = devlink_linecard_get_by_index(devlink, linecard_index);
+ if (!linecard)
+ return;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+}
+
+static void devlink_linecard_rel_cleanup_cb(struct devlink *devlink,
+ u32 linecard_index, u32 rel_index)
+{
+ struct devlink_linecard *linecard;
+
+ linecard = devlink_linecard_get_by_index(devlink, linecard_index);
+ if (linecard && linecard->rel_index == rel_index)
+ linecard->rel_index = 0;
+}
+
/**
* devlink_linecard_nested_dl_set - Attach/detach nested devlink
* instance to linecard.
@@ -595,12 +615,14 @@ EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
* @linecard: devlink linecard
* @nested_devlink: devlink instance to attach or NULL to detach
*/
-void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
- struct devlink *nested_devlink)
+int devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
+ struct devlink *nested_devlink)
{
- mutex_lock(&linecard->state_lock);
- linecard->nested_devlink = nested_devlink;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
+ return devlink_rel_nested_in_add(&linecard->rel_index,
+ linecard->devlink->index,
+ linecard->index,
+ devlink_linecard_rel_notify_cb,
+ devlink_linecard_rel_cleanup_cb,
+ nested_devlink);
}
EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c
index fc3e7c029a3b..499885c8b9ca 100644
--- a/net/devlink/netlink.c
+++ b/net/devlink/netlink.c
@@ -9,79 +9,159 @@
#include "devl_internal.h"
+#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
+#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
+#define DEVLINK_NL_FLAG_NEED_DEV_LOCK BIT(2)
+
static const struct genl_multicast_group devlink_nl_mcgrps[] = {
[DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME },
};
-static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
- [DEVLINK_ATTR_UNSPEC] = { .strict_start_type =
- DEVLINK_ATTR_TRAP_POLICER_ID },
- [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
- [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_PORT_TYPE_AUTO,
- DEVLINK_PORT_TYPE_IB),
- [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
- [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 },
- [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 },
- [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 },
- [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
- [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
- [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_ESWITCH_MODE_LEGACY,
- DEVLINK_ESWITCH_MODE_SWITCHDEV),
- [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
- [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64},
- [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64},
- [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
- [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 },
- [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 },
- [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
- [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
- [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] =
- NLA_POLICY_BITFIELD32(DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS),
- [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
- [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
- [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
- [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
- [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8 },
- [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 },
- [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 },
- [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 },
- [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED },
- [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
- DEVLINK_RELOAD_ACTION_MAX),
- [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(DEVLINK_RELOAD_LIMITS_VALID_MASK),
- [DEVLINK_ATTR_PORT_FLAVOUR] = { .type = NLA_U16 },
- [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 },
- [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 },
- [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 },
- [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 },
- [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 },
- [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 },
- [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 },
- [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED },
- [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 },
- [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 },
- [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG },
+struct devlink_nl_sock_priv {
+ struct devlink_obj_desc __rcu *flt;
+ spinlock_t flt_lock; /* Protects flt. */
};
+static void devlink_nl_sock_priv_init(void *priv)
+{
+ struct devlink_nl_sock_priv *sk_priv = priv;
+
+ spin_lock_init(&sk_priv->flt_lock);
+}
+
+static void devlink_nl_sock_priv_destroy(void *priv)
+{
+ struct devlink_nl_sock_priv *sk_priv = priv;
+ struct devlink_obj_desc *flt;
+
+ flt = rcu_dereference_protected(sk_priv->flt, true);
+ kfree_rcu(flt, rcu);
+}
+
+int devlink_nl_notify_filter_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_nl_sock_priv *sk_priv;
+ struct nlattr **attrs = info->attrs;
+ struct devlink_obj_desc *flt;
+ size_t data_offset = 0;
+ size_t data_size = 0;
+ char *pos;
+
+ if (attrs[DEVLINK_ATTR_BUS_NAME])
+ data_size = size_add(data_size,
+ nla_len(attrs[DEVLINK_ATTR_BUS_NAME]) + 1);
+ if (attrs[DEVLINK_ATTR_DEV_NAME])
+ data_size = size_add(data_size,
+ nla_len(attrs[DEVLINK_ATTR_DEV_NAME]) + 1);
+
+ flt = kzalloc(size_add(sizeof(*flt), data_size), GFP_KERNEL);
+ if (!flt)
+ return -ENOMEM;
+
+ pos = (char *) flt->data;
+ if (attrs[DEVLINK_ATTR_BUS_NAME]) {
+ data_offset += nla_strscpy(pos,
+ attrs[DEVLINK_ATTR_BUS_NAME],
+ data_size) + 1;
+ flt->bus_name = pos;
+ pos += data_offset;
+ }
+ if (attrs[DEVLINK_ATTR_DEV_NAME]) {
+ nla_strscpy(pos, attrs[DEVLINK_ATTR_DEV_NAME],
+ data_size - data_offset);
+ flt->dev_name = pos;
+ }
+
+ if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ flt->port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
+ flt->port_index_valid = true;
+ }
+
+ /* Don't attach empty filter. */
+ if (!flt->bus_name && !flt->dev_name && !flt->port_index_valid) {
+ kfree(flt);
+ flt = NULL;
+ }
+
+ sk_priv = genl_sk_priv_get(&devlink_nl_family, NETLINK_CB(skb).sk);
+ if (IS_ERR(sk_priv)) {
+ kfree(flt);
+ return PTR_ERR(sk_priv);
+ }
+ spin_lock(&sk_priv->flt_lock);
+ flt = rcu_replace_pointer(sk_priv->flt, flt,
+ lockdep_is_held(&sk_priv->flt_lock));
+ spin_unlock(&sk_priv->flt_lock);
+ kfree_rcu(flt, rcu);
+ return 0;
+}
+
+static bool devlink_obj_desc_match(const struct devlink_obj_desc *desc,
+ const struct devlink_obj_desc *flt)
+{
+ if (desc->bus_name && flt->bus_name &&
+ strcmp(desc->bus_name, flt->bus_name))
+ return false;
+ if (desc->dev_name && flt->dev_name &&
+ strcmp(desc->dev_name, flt->dev_name))
+ return false;
+ if (desc->port_index_valid && flt->port_index_valid &&
+ desc->port_index != flt->port_index)
+ return false;
+ return true;
+}
+
+int devlink_nl_notify_filter(struct sock *dsk, struct sk_buff *skb, void *data)
+{
+ struct devlink_obj_desc *desc = data;
+ struct devlink_nl_sock_priv *sk_priv;
+ struct devlink_obj_desc *flt;
+ int ret = 0;
+
+ rcu_read_lock();
+ sk_priv = __genl_sk_priv_get(&devlink_nl_family, dsk);
+ if (!IS_ERR_OR_NULL(sk_priv)) {
+ flt = rcu_dereference(sk_priv->flt);
+ if (flt)
+ ret = !devlink_obj_desc_match(desc, flt);
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net,
+ struct devlink *devlink, int attrtype)
+{
+ struct nlattr *nested_attr;
+ struct net *devl_net;
+
+ nested_attr = nla_nest_start(msg, attrtype);
+ if (!nested_attr)
+ return -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ rcu_read_lock();
+ devl_net = read_pnet_rcu(&devlink->_net);
+ if (!net_eq(net, devl_net)) {
+ int id = peernet2id_alloc(net, devl_net, GFP_ATOMIC);
+
+ rcu_read_unlock();
+ if (nla_put_s32(msg, DEVLINK_ATTR_NETNS_ID, id))
+ return -EMSGSIZE;
+ } else {
+ rcu_read_unlock();
+ }
+
+ nla_nest_end(msg, nested_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, nested_attr);
+ return -EMSGSIZE;
+}
+
int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info)
{
int err;
@@ -98,7 +178,8 @@ int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info)
}
struct devlink *
-devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
+devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs,
+ bool dev_lock)
{
struct devlink *devlink;
unsigned long index;
@@ -112,12 +193,12 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
devlinks_xa_for_each_registered_get(net, index, devlink) {
- devl_lock(devlink);
+ devl_dev_lock(devlink, dev_lock);
if (devl_is_registered(devlink) &&
strcmp(devlink->dev->bus->name, busname) == 0 &&
strcmp(dev_name(devlink->dev), devname) == 0)
return devlink;
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, dev_lock);
devlink_put(devlink);
}
@@ -127,11 +208,13 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info,
u8 flags)
{
+ bool dev_lock = flags & DEVLINK_NL_FLAG_NEED_DEV_LOCK;
struct devlink_port *devlink_port;
struct devlink *devlink;
int err;
- devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs);
+ devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs,
+ dev_lock);
if (IS_ERR(devlink))
return PTR_ERR(devlink);
@@ -151,7 +234,7 @@ static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info,
return 0;
unlock:
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, dev_lock);
devlink_put(devlink);
return err;
}
@@ -159,7 +242,7 @@ unlock:
int devlink_nl_pre_doit(const struct genl_split_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
- return __devlink_nl_pre_doit(skb, info, ops->internal_flags);
+ return __devlink_nl_pre_doit(skb, info, 0);
}
int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
@@ -168,6 +251,12 @@ int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_PORT);
}
+int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK);
+}
+
int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
struct sk_buff *skb,
struct genl_info *info)
@@ -175,16 +264,30 @@ int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT);
}
-void devlink_nl_post_doit(const struct genl_split_ops *ops,
- struct sk_buff *skb, struct genl_info *info)
+static void __devlink_nl_post_doit(struct sk_buff *skb, struct genl_info *info,
+ u8 flags)
{
+ bool dev_lock = flags & DEVLINK_NL_FLAG_NEED_DEV_LOCK;
struct devlink *devlink;
devlink = info->user_ptr[0];
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, dev_lock);
devlink_put(devlink);
}
+void devlink_nl_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ __devlink_nl_post_doit(skb, info, 0);
+}
+
+void
+devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ __devlink_nl_post_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK);
+}
+
static int devlink_nl_inst_single_dumpit(struct sk_buff *msg,
struct netlink_callback *cb, int flags,
devlink_nl_dump_one_func_t *dump_one,
@@ -193,7 +296,7 @@ static int devlink_nl_inst_single_dumpit(struct sk_buff *msg,
struct devlink *devlink;
int err;
- devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs, false);
if (IS_ERR(devlink))
return PTR_ERR(devlink);
err = dump_one(msg, devlink, cb, flags | NLM_F_DUMP_FILTERED);
@@ -255,272 +358,18 @@ int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
return devlink_nl_inst_iter_dumpit(msg, cb, flags, dump_one);
}
-static const struct genl_small_ops devlink_nl_small_ops[40] = {
- {
- .cmd = DEVLINK_CMD_PORT_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_RATE_SET,
- .doit = devlink_nl_cmd_rate_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_RATE_NEW,
- .doit = devlink_nl_cmd_rate_new_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_RATE_DEL,
- .doit = devlink_nl_cmd_rate_del_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_PORT_SPLIT,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_split_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_PORT_UNSPLIT,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_unsplit_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_PORT_NEW,
- .doit = devlink_nl_cmd_port_new_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_PORT_DEL,
- .doit = devlink_nl_cmd_port_del_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
-
- {
- .cmd = DEVLINK_CMD_LINECARD_SET,
- .doit = devlink_nl_cmd_linecard_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_SB_POOL_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_pool_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_port_pool_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_occ_snapshot_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_occ_max_clear_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_ESWITCH_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_eswitch_get_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_ESWITCH_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_eswitch_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_dpipe_table_get,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_dpipe_entries_get,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_dpipe_headers_get,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_dpipe_table_counters_set,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_RESOURCE_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_resource_set,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_RESOURCE_DUMP,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_resource_dump,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_RELOAD,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_reload,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_PARAM_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_param_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_PORT_PARAM_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_param_get_doit,
- .dumpit = devlink_nl_cmd_port_param_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_PORT_PARAM_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_param_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_REGION_NEW,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_region_new,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_REGION_DEL,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_region_del,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_REGION_READ,
- .validate = GENL_DONT_VALIDATE_STRICT |
- GENL_DONT_VALIDATE_DUMP_STRICT,
- .dumpit = devlink_nl_cmd_region_read_dumpit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_recover_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_diagnose_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
- .validate = GENL_DONT_VALIDATE_STRICT |
- GENL_DONT_VALIDATE_DUMP_STRICT,
- .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_test_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_FLASH_UPDATE,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_flash_update,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_TRAP_SET,
- .doit = devlink_nl_cmd_trap_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_TRAP_GROUP_SET,
- .doit = devlink_nl_cmd_trap_group_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_TRAP_POLICER_SET,
- .doit = devlink_nl_cmd_trap_policer_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_SELFTESTS_RUN,
- .doit = devlink_nl_cmd_selftests_run,
- .flags = GENL_ADMIN_PERM,
- },
- /* -- No new ops here! Use split ops going forward! -- */
-};
-
struct genl_family devlink_nl_family __ro_after_init = {
.name = DEVLINK_GENL_NAME,
.version = DEVLINK_GENL_VERSION,
- .maxattr = DEVLINK_ATTR_MAX,
- .policy = devlink_nl_policy,
.netnsok = true,
.parallel_ops = true,
- .pre_doit = devlink_nl_pre_doit,
- .post_doit = devlink_nl_post_doit,
.module = THIS_MODULE,
- .small_ops = devlink_nl_small_ops,
- .n_small_ops = ARRAY_SIZE(devlink_nl_small_ops),
.split_ops = devlink_nl_ops,
.n_split_ops = ARRAY_SIZE(devlink_nl_ops),
.resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1,
.mcgrps = devlink_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps),
+ .sock_priv_size = sizeof(struct devlink_nl_sock_priv),
+ .sock_priv_init = devlink_nl_sock_priv_init,
+ .sock_priv_destroy = devlink_nl_sock_priv_destroy,
};
diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
index 467b7a431de1..c81cf2dd154f 100644
--- a/net/devlink/netlink_gen.c
+++ b/net/devlink/netlink_gen.c
@@ -10,6 +10,18 @@
#include <uapi/linux/devlink.h>
+/* Common nested types */
+const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_CAPS + 1] = {
+ [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY, },
+ [DEVLINK_PORT_FN_ATTR_STATE] = NLA_POLICY_MAX(NLA_U8, 1),
+ [DEVLINK_PORT_FN_ATTR_OPSTATE] = NLA_POLICY_MAX(NLA_U8, 1),
+ [DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(15),
+};
+
+const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1] = {
+ [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG, },
+};
+
/* DEVLINK_CMD_GET - do */
static const struct nla_policy devlink_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
@@ -29,6 +41,48 @@ static const struct nla_policy devlink_port_get_dump_nl_policy[DEVLINK_ATTR_DEV_
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
};
+/* DEVLINK_CMD_PORT_SET - do */
+static const struct nla_policy devlink_port_set_nl_policy[DEVLINK_ATTR_PORT_FUNCTION + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_MAX(NLA_U16, 3),
+ [DEVLINK_ATTR_PORT_FUNCTION] = NLA_POLICY_NESTED(devlink_dl_port_function_nl_policy),
+};
+
+/* DEVLINK_CMD_PORT_NEW - do */
+static const struct nla_policy devlink_port_new_nl_policy[DEVLINK_ATTR_PORT_PCI_SF_NUMBER + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_PORT_FLAVOUR] = NLA_POLICY_MAX(NLA_U16, 7),
+ [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16, },
+ [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_PORT_DEL - do */
+static const struct nla_policy devlink_port_del_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_PORT_SPLIT - do */
+static const struct nla_policy devlink_port_split_nl_policy[DEVLINK_ATTR_PORT_SPLIT_COUNT + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_PORT_UNSPLIT - do */
+static const struct nla_policy devlink_port_unsplit_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+};
+
/* DEVLINK_CMD_SB_GET - do */
static const struct nla_policy devlink_sb_get_do_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
@@ -56,6 +110,16 @@ static const struct nla_policy devlink_sb_pool_get_dump_nl_policy[DEVLINK_ATTR_D
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
};
+/* DEVLINK_CMD_SB_POOL_SET - do */
+static const struct nla_policy devlink_sb_pool_set_nl_policy[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, },
+ [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = NLA_POLICY_MAX(NLA_U8, 1),
+ [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32, },
+};
+
/* DEVLINK_CMD_SB_PORT_POOL_GET - do */
static const struct nla_policy devlink_sb_port_pool_get_do_nl_policy[DEVLINK_ATTR_SB_POOL_INDEX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
@@ -71,6 +135,16 @@ static const struct nla_policy devlink_sb_port_pool_get_dump_nl_policy[DEVLINK_A
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
};
+/* DEVLINK_CMD_SB_PORT_POOL_SET - do */
+static const struct nla_policy devlink_sb_port_pool_set_nl_policy[DEVLINK_ATTR_SB_THRESHOLD + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, },
+ [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32, },
+};
+
/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */
static const struct nla_policy devlink_sb_tc_pool_bind_get_do_nl_policy[DEVLINK_ATTR_SB_TC_INDEX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
@@ -87,6 +161,100 @@ static const struct nla_policy devlink_sb_tc_pool_bind_get_dump_nl_policy[DEVLIN
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
};
+/* DEVLINK_CMD_SB_TC_POOL_BIND_SET - do */
+static const struct nla_policy devlink_sb_tc_pool_bind_set_nl_policy[DEVLINK_ATTR_SB_TC_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, },
+ [DEVLINK_ATTR_SB_POOL_TYPE] = NLA_POLICY_MAX(NLA_U8, 1),
+ [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16, },
+ [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_SB_OCC_SNAPSHOT - do */
+static const struct nla_policy devlink_sb_occ_snapshot_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_SB_OCC_MAX_CLEAR - do */
+static const struct nla_policy devlink_sb_occ_max_clear_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_ESWITCH_GET - do */
+static const struct nla_policy devlink_eswitch_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_ESWITCH_SET - do */
+static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITCH_ENCAP_MODE + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_MAX(NLA_U16, 1),
+ [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U16, 3),
+ [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = NLA_POLICY_MAX(NLA_U8, 1),
+};
+
+/* DEVLINK_CMD_DPIPE_TABLE_GET - do */
+static const struct nla_policy devlink_dpipe_table_get_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_DPIPE_ENTRIES_GET - do */
+static const struct nla_policy devlink_dpipe_entries_get_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_DPIPE_HEADERS_GET - do */
+static const struct nla_policy devlink_dpipe_headers_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET - do */
+static const struct nla_policy devlink_dpipe_table_counters_set_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8, },
+};
+
+/* DEVLINK_CMD_RESOURCE_SET - do */
+static const struct nla_policy devlink_resource_set_nl_policy[DEVLINK_ATTR_RESOURCE_SIZE + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64, },
+ [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64, },
+};
+
+/* DEVLINK_CMD_RESOURCE_DUMP - do */
+static const struct nla_policy devlink_resource_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_RELOAD - do */
+static const struct nla_policy devlink_reload_nl_policy[DEVLINK_ATTR_RELOAD_LIMITS + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, 1, 2),
+ [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(6),
+ [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32, },
+};
+
/* DEVLINK_CMD_PARAM_GET - do */
static const struct nla_policy devlink_param_get_do_nl_policy[DEVLINK_ATTR_PARAM_NAME + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
@@ -100,6 +268,15 @@ static const struct nla_policy devlink_param_get_dump_nl_policy[DEVLINK_ATTR_DEV
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
};
+/* DEVLINK_CMD_PARAM_SET - do */
+static const struct nla_policy devlink_param_set_nl_policy[DEVLINK_ATTR_PARAM_VALUE_CMODE + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8, },
+ [DEVLINK_ATTR_PARAM_VALUE_CMODE] = NLA_POLICY_MAX(NLA_U8, 2),
+};
+
/* DEVLINK_CMD_REGION_GET - do */
static const struct nla_policy devlink_region_get_do_nl_policy[DEVLINK_ATTR_REGION_NAME + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
@@ -114,6 +291,50 @@ static const struct nla_policy devlink_region_get_dump_nl_policy[DEVLINK_ATTR_DE
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
};
+/* DEVLINK_CMD_REGION_NEW - do */
+static const struct nla_policy devlink_region_new_nl_policy[DEVLINK_ATTR_REGION_SNAPSHOT_ID + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_REGION_DEL - do */
+static const struct nla_policy devlink_region_del_nl_policy[DEVLINK_ATTR_REGION_SNAPSHOT_ID + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_REGION_READ - dump */
+static const struct nla_policy devlink_region_read_nl_policy[DEVLINK_ATTR_REGION_DIRECT + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG, },
+ [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64, },
+ [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64, },
+};
+
+/* DEVLINK_CMD_PORT_PARAM_GET - do */
+static const struct nla_policy devlink_port_param_get_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_PORT_PARAM_SET - do */
+static const struct nla_policy devlink_port_param_set_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+};
+
/* DEVLINK_CMD_INFO_GET - do */
static const struct nla_policy devlink_info_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
@@ -135,6 +356,58 @@ static const struct nla_policy devlink_health_reporter_get_dump_nl_policy[DEVLIN
[DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
};
+/* DEVLINK_CMD_HEALTH_REPORTER_SET - do */
+static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64, },
+ [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8, },
+ [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8, },
+};
+
+/* DEVLINK_CMD_HEALTH_REPORTER_RECOVER - do */
+static const struct nla_policy devlink_health_reporter_recover_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE - do */
+static const struct nla_policy devlink_health_reporter_diagnose_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET - dump */
+static const struct nla_policy devlink_health_reporter_dump_get_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR - do */
+static const struct nla_policy devlink_health_reporter_dump_clear_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_FLASH_UPDATE - do */
+static const struct nla_policy devlink_flash_update_nl_policy[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] = NLA_POLICY_BITFIELD32(3),
+};
+
/* DEVLINK_CMD_TRAP_GET - do */
static const struct nla_policy devlink_trap_get_do_nl_policy[DEVLINK_ATTR_TRAP_NAME + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
@@ -148,6 +421,14 @@ static const struct nla_policy devlink_trap_get_dump_nl_policy[DEVLINK_ATTR_DEV_
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
};
+/* DEVLINK_CMD_TRAP_SET - do */
+static const struct nla_policy devlink_trap_set_nl_policy[DEVLINK_ATTR_TRAP_ACTION + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_TRAP_ACTION] = NLA_POLICY_MAX(NLA_U8, 2),
+};
+
/* DEVLINK_CMD_TRAP_GROUP_GET - do */
static const struct nla_policy devlink_trap_group_get_do_nl_policy[DEVLINK_ATTR_TRAP_GROUP_NAME + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
@@ -161,6 +442,15 @@ static const struct nla_policy devlink_trap_group_get_dump_nl_policy[DEVLINK_ATT
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
};
+/* DEVLINK_CMD_TRAP_GROUP_SET - do */
+static const struct nla_policy devlink_trap_group_set_nl_policy[DEVLINK_ATTR_TRAP_POLICER_ID + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_TRAP_ACTION] = NLA_POLICY_MAX(NLA_U8, 2),
+ [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, },
+};
+
/* DEVLINK_CMD_TRAP_POLICER_GET - do */
static const struct nla_policy devlink_trap_policer_get_do_nl_policy[DEVLINK_ATTR_TRAP_POLICER_ID + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
@@ -174,6 +464,23 @@ static const struct nla_policy devlink_trap_policer_get_dump_nl_policy[DEVLINK_A
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
};
+/* DEVLINK_CMD_TRAP_POLICER_SET - do */
+static const struct nla_policy devlink_trap_policer_set_nl_policy[DEVLINK_ATTR_TRAP_POLICER_BURST + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64, },
+ [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64, },
+};
+
+/* DEVLINK_CMD_HEALTH_REPORTER_TEST - do */
+static const struct nla_policy devlink_health_reporter_test_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, },
+};
+
/* DEVLINK_CMD_RATE_GET - do */
static const struct nla_policy devlink_rate_get_do_nl_policy[DEVLINK_ATTR_RATE_NODE_NAME + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
@@ -188,6 +495,37 @@ static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_DEV_
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
};
+/* DEVLINK_CMD_RATE_SET - do */
+static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64, },
+ [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64, },
+ [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_RATE_NEW - do */
+static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64, },
+ [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64, },
+ [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_RATE_DEL - do */
+static const struct nla_policy devlink_rate_del_nl_policy[DEVLINK_ATTR_RATE_NODE_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
+};
+
/* DEVLINK_CMD_LINECARD_GET - do */
static const struct nla_policy devlink_linecard_get_do_nl_policy[DEVLINK_ATTR_LINECARD_INDEX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
@@ -201,14 +539,36 @@ static const struct nla_policy devlink_linecard_get_dump_nl_policy[DEVLINK_ATTR_
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
};
+/* DEVLINK_CMD_LINECARD_SET - do */
+static const struct nla_policy devlink_linecard_set_nl_policy[DEVLINK_ATTR_LINECARD_TYPE + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING, },
+};
+
/* DEVLINK_CMD_SELFTESTS_GET - do */
static const struct nla_policy devlink_selftests_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
};
+/* DEVLINK_CMD_SELFTESTS_RUN - do */
+static const struct nla_policy devlink_selftests_run_nl_policy[DEVLINK_ATTR_SELFTESTS + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_SELFTESTS] = NLA_POLICY_NESTED(devlink_dl_selftest_id_nl_policy),
+};
+
+/* DEVLINK_CMD_NOTIFY_FILTER_SET - do */
+static const struct nla_policy devlink_notify_filter_set_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+};
+
/* Ops table for devlink */
-const struct genl_split_ops devlink_nl_ops[32] = {
+const struct genl_split_ops devlink_nl_ops[74] = {
{
.cmd = DEVLINK_CMD_GET,
.validate = GENL_DONT_VALIDATE_STRICT,
@@ -243,6 +603,56 @@ const struct genl_split_ops devlink_nl_ops[32] = {
.flags = GENL_CMD_CAP_DUMP,
},
{
+ .cmd = DEVLINK_CMD_PORT_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_port_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_port_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_FUNCTION,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_NEW,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_port_new_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_port_new_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_PCI_SF_NUMBER,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_DEL,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_port_del_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_port_del_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_INDEX,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_SPLIT,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_port_split_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_port_split_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_SPLIT_COUNT,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_UNSPLIT,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_port_unsplit_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_port_unsplit_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_INDEX,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
.cmd = DEVLINK_CMD_SB_GET,
.validate = GENL_DONT_VALIDATE_STRICT,
.pre_doit = devlink_nl_pre_doit,
@@ -277,6 +687,16 @@ const struct genl_split_ops devlink_nl_ops[32] = {
.flags = GENL_CMD_CAP_DUMP,
},
{
+ .cmd = DEVLINK_CMD_SB_POOL_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_sb_pool_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_pool_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
.cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
.validate = GENL_DONT_VALIDATE_STRICT,
.pre_doit = devlink_nl_pre_doit_port,
@@ -294,6 +714,16 @@ const struct genl_split_ops devlink_nl_ops[32] = {
.flags = GENL_CMD_CAP_DUMP,
},
{
+ .cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_sb_port_pool_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_port_pool_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_THRESHOLD,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
.cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
.validate = GENL_DONT_VALIDATE_STRICT,
.pre_doit = devlink_nl_pre_doit_port,
@@ -311,6 +741,126 @@ const struct genl_split_ops devlink_nl_ops[32] = {
.flags = GENL_CMD_CAP_DUMP,
},
{
+ .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_sb_tc_pool_bind_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_tc_pool_bind_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_TC_INDEX,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_sb_occ_snapshot_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_occ_snapshot_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_INDEX,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_sb_occ_max_clear_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_occ_max_clear_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_INDEX,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_ESWITCH_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_eswitch_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_eswitch_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_ESWITCH_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_eswitch_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_eswitch_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_ESWITCH_ENCAP_MODE,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_dpipe_table_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_dpipe_table_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_DPIPE_TABLE_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_dpipe_entries_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_dpipe_entries_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_DPIPE_TABLE_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_dpipe_headers_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_dpipe_headers_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_dpipe_table_counters_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_dpipe_table_counters_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_RESOURCE_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_resource_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_resource_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_RESOURCE_SIZE,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_RESOURCE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_resource_dump_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_resource_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_RELOAD,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_dev_lock,
+ .doit = devlink_nl_reload_doit,
+ .post_doit = devlink_nl_post_doit_dev_lock,
+ .policy = devlink_reload_nl_policy,
+ .maxattr = DEVLINK_ATTR_RELOAD_LIMITS,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
.cmd = DEVLINK_CMD_PARAM_GET,
.validate = GENL_DONT_VALIDATE_STRICT,
.pre_doit = devlink_nl_pre_doit,
@@ -328,6 +878,16 @@ const struct genl_split_ops devlink_nl_ops[32] = {
.flags = GENL_CMD_CAP_DUMP,
},
{
+ .cmd = DEVLINK_CMD_PARAM_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_param_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_param_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_PARAM_VALUE_CMODE,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
.cmd = DEVLINK_CMD_REGION_GET,
.validate = GENL_DONT_VALIDATE_STRICT,
.pre_doit = devlink_nl_pre_doit_port_optional,
@@ -345,6 +905,60 @@ const struct genl_split_ops devlink_nl_ops[32] = {
.flags = GENL_CMD_CAP_DUMP,
},
{
+ .cmd = DEVLINK_CMD_REGION_NEW,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port_optional,
+ .doit = devlink_nl_region_new_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_region_new_nl_policy,
+ .maxattr = DEVLINK_ATTR_REGION_SNAPSHOT_ID,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_REGION_DEL,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port_optional,
+ .doit = devlink_nl_region_del_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_region_del_nl_policy,
+ .maxattr = DEVLINK_ATTR_REGION_SNAPSHOT_ID,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_REGION_READ,
+ .validate = GENL_DONT_VALIDATE_DUMP_STRICT,
+ .dumpit = devlink_nl_region_read_dumpit,
+ .policy = devlink_region_read_nl_policy,
+ .maxattr = DEVLINK_ATTR_REGION_DIRECT,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_PARAM_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_port_param_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_port_param_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_PARAM_GET,
+ .validate = GENL_DONT_VALIDATE_DUMP_STRICT,
+ .dumpit = devlink_nl_port_param_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_PARAM_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_port_param_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_port_param_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_INDEX,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
.cmd = DEVLINK_CMD_INFO_GET,
.validate = GENL_DONT_VALIDATE_STRICT,
.pre_doit = devlink_nl_pre_doit,
@@ -378,6 +992,64 @@ const struct genl_split_ops devlink_nl_ops[32] = {
.flags = GENL_CMD_CAP_DUMP,
},
{
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port_optional,
+ .doit = devlink_nl_health_reporter_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_health_reporter_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port_optional,
+ .doit = devlink_nl_health_reporter_recover_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_health_reporter_recover_nl_policy,
+ .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port_optional,
+ .doit = devlink_nl_health_reporter_diagnose_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_health_reporter_diagnose_nl_policy,
+ .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+ .validate = GENL_DONT_VALIDATE_DUMP_STRICT,
+ .dumpit = devlink_nl_health_reporter_dump_get_dumpit,
+ .policy = devlink_health_reporter_dump_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port_optional,
+ .doit = devlink_nl_health_reporter_dump_clear_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_health_reporter_dump_clear_nl_policy,
+ .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_FLASH_UPDATE,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_flash_update_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_flash_update_nl_policy,
+ .maxattr = DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
.cmd = DEVLINK_CMD_TRAP_GET,
.validate = GENL_DONT_VALIDATE_STRICT,
.pre_doit = devlink_nl_pre_doit,
@@ -395,6 +1067,16 @@ const struct genl_split_ops devlink_nl_ops[32] = {
.flags = GENL_CMD_CAP_DUMP,
},
{
+ .cmd = DEVLINK_CMD_TRAP_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_trap_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_trap_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_TRAP_ACTION,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
.cmd = DEVLINK_CMD_TRAP_GROUP_GET,
.validate = GENL_DONT_VALIDATE_STRICT,
.pre_doit = devlink_nl_pre_doit,
@@ -412,6 +1094,16 @@ const struct genl_split_ops devlink_nl_ops[32] = {
.flags = GENL_CMD_CAP_DUMP,
},
{
+ .cmd = DEVLINK_CMD_TRAP_GROUP_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_trap_group_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_trap_group_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_TRAP_POLICER_ID,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
.cmd = DEVLINK_CMD_TRAP_POLICER_GET,
.validate = GENL_DONT_VALIDATE_STRICT,
.pre_doit = devlink_nl_pre_doit,
@@ -429,6 +1121,26 @@ const struct genl_split_ops devlink_nl_ops[32] = {
.flags = GENL_CMD_CAP_DUMP,
},
{
+ .cmd = DEVLINK_CMD_TRAP_POLICER_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_trap_policer_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_trap_policer_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_TRAP_POLICER_BURST,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port_optional,
+ .doit = devlink_nl_health_reporter_test_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_health_reporter_test_nl_policy,
+ .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
.cmd = DEVLINK_CMD_RATE_GET,
.validate = GENL_DONT_VALIDATE_STRICT,
.pre_doit = devlink_nl_pre_doit,
@@ -446,6 +1158,36 @@ const struct genl_split_ops devlink_nl_ops[32] = {
.flags = GENL_CMD_CAP_DUMP,
},
{
+ .cmd = DEVLINK_CMD_RATE_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_rate_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_rate_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_RATE_TX_WEIGHT,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_NEW,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_rate_new_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_rate_new_nl_policy,
+ .maxattr = DEVLINK_ATTR_RATE_TX_WEIGHT,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_DEL,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_rate_del_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_rate_del_nl_policy,
+ .maxattr = DEVLINK_ATTR_RATE_NODE_NAME,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
.cmd = DEVLINK_CMD_LINECARD_GET,
.validate = GENL_DONT_VALIDATE_STRICT,
.pre_doit = devlink_nl_pre_doit,
@@ -463,6 +1205,16 @@ const struct genl_split_ops devlink_nl_ops[32] = {
.flags = GENL_CMD_CAP_DUMP,
},
{
+ .cmd = DEVLINK_CMD_LINECARD_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_linecard_set_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_linecard_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_LINECARD_TYPE,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
.cmd = DEVLINK_CMD_SELFTESTS_GET,
.validate = GENL_DONT_VALIDATE_STRICT,
.pre_doit = devlink_nl_pre_doit,
@@ -478,4 +1230,21 @@ const struct genl_split_ops devlink_nl_ops[32] = {
.dumpit = devlink_nl_selftests_get_dumpit,
.flags = GENL_CMD_CAP_DUMP,
},
+ {
+ .cmd = DEVLINK_CMD_SELFTESTS_RUN,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_selftests_run_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_selftests_run_nl_policy,
+ .maxattr = DEVLINK_ATTR_SELFTESTS,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_NOTIFY_FILTER_SET,
+ .doit = devlink_nl_notify_filter_set_doit,
+ .policy = devlink_notify_filter_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
};
diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h
index f8bbc93e39be..8f2bd50ddf5e 100644
--- a/net/devlink/netlink_gen.h
+++ b/net/devlink/netlink_gen.h
@@ -11,44 +11,89 @@
#include <uapi/linux/devlink.h>
+/* Common nested types */
+extern const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_CAPS + 1];
+extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1];
+
/* Ops table for devlink */
-extern const struct genl_split_ops devlink_nl_ops[32];
+extern const struct genl_split_ops devlink_nl_ops[74];
int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
struct sk_buff *skb,
struct genl_info *info);
void
devlink_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
struct genl_info *info);
+void
+devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
int devlink_nl_port_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_port_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int devlink_nl_port_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_port_new_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_port_del_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_port_split_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_port_unsplit_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_sb_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_sb_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
int devlink_nl_sb_pool_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_sb_pool_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int devlink_nl_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_sb_port_pool_get_doit(struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_sb_port_pool_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int devlink_nl_sb_port_pool_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
int devlink_nl_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_sb_tc_pool_bind_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int devlink_nl_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_sb_occ_snapshot_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_sb_occ_max_clear_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_dpipe_table_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_dpipe_entries_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_dpipe_headers_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_dpipe_table_counters_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_resource_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_param_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_param_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int devlink_nl_param_set_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_region_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_region_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int devlink_nl_region_new_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_region_del_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_region_read_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_port_param_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_port_param_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_port_param_set_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_info_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_info_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
@@ -56,24 +101,48 @@ int devlink_nl_health_reporter_get_doit(struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_health_reporter_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int devlink_nl_health_reporter_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_health_reporter_recover_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_health_reporter_diagnose_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_health_reporter_dump_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_health_reporter_dump_clear_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_flash_update_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_trap_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_trap_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int devlink_nl_trap_set_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_trap_group_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_trap_group_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int devlink_nl_trap_group_set_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_trap_policer_get_doit(struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_trap_policer_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int devlink_nl_trap_policer_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_health_reporter_test_doit(struct sk_buff *skb,
+ struct genl_info *info);
int devlink_nl_rate_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_rate_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int devlink_nl_rate_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_rate_del_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_linecard_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_linecard_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int devlink_nl_linecard_set_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_selftests_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_notify_filter_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
#endif /* _LINUX_DEVLINK_GEN_H */
diff --git a/net/devlink/param.c b/net/devlink/param.c
index 31275f9d4cb7..22bc3b500518 100644
--- a/net/devlink/param.c
+++ b/net/devlink/param.c
@@ -343,7 +343,7 @@ static void devlink_param_notify(struct devlink *devlink,
* will replay the notifications if the params are added/removed
* outside of the lifetime of the instance.
*/
- if (!devl_is_registered(devlink))
+ if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -356,8 +356,7 @@ static void devlink_param_notify(struct devlink *devlink,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
static void devlink_params_notify(struct devlink *devlink,
@@ -581,7 +580,7 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
return 0;
}
-int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_param_set_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
@@ -589,22 +588,22 @@ int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info)
info, DEVLINK_CMD_PARAM_NEW);
}
-int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+int devlink_nl_port_param_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
{
NL_SET_ERR_MSG(cb->extack, "Port params are not supported");
return msg->len;
}
-int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_port_param_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
NL_SET_ERR_MSG(info->extack, "Port params are not supported");
return -EINVAL;
}
-int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_port_param_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
NL_SET_ERR_MSG(info->extack, "Port params are not supported");
return -EINVAL;
diff --git a/net/devlink/port.c b/net/devlink/port.c
index 4763b42885fb..78592912f657 100644
--- a/net/devlink/port.c
+++ b/net/devlink/port.c
@@ -428,6 +428,13 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
if (err)
goto out;
err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
+ if (err)
+ goto out;
+ err = devlink_rel_devlink_handle_put(msg, port->devlink,
+ port->rel_index,
+ DEVLINK_PORT_FN_ATTR_DEVLINK,
+ &msg_updated);
+
out:
if (err || !msg_updated)
nla_nest_cancel(msg, function_attr);
@@ -483,7 +490,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg,
goto nla_put_failure;
if (devlink_port->linecard &&
nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX,
- devlink_port->linecard->index))
+ devlink_linecard_index(devlink_port->linecard)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -500,12 +507,13 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
enum devlink_command cmd)
{
struct devlink *devlink = devlink_port->devlink;
+ struct devlink_obj_desc desc;
struct sk_buff *msg;
int err;
WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -518,8 +526,9 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_obj_desc_init(&desc, devlink);
+ devlink_nl_obj_desc_port_set(&desc, devlink_port);
+ devlink_nl_notify_send_desc(devlink, msg, &desc);
}
static void devlink_ports_notify(struct devlink *devlink,
@@ -665,7 +674,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
return -EOPNOTSUPP;
}
if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) {
- NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_STATE],
"Function does not support state setting");
return -EOPNOTSUPP;
}
@@ -765,7 +774,7 @@ static int devlink_port_function_set(struct devlink_port *port,
return err;
}
-int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_port_set_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink_port *devlink_port = info->user_ptr[1];
int err;
@@ -791,7 +800,7 @@ int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info)
return 0;
}
-int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_port_split_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = info->user_ptr[0];
@@ -822,8 +831,7 @@ int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info)
info->extack);
}
-int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_port_unsplit_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = info->user_ptr[0];
@@ -833,7 +841,7 @@ int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
return devlink_port->ops->port_unsplit(devlink, devlink_port, info->extack);
}
-int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_port_new_doit(struct sk_buff *skb, struct genl_info *info)
{
struct netlink_ext_ack *extack = info->extack;
struct devlink_port_new_attrs new_attrs = {};
@@ -897,7 +905,7 @@ err_out_port_del:
return err;
}
-int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_port_del_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink_port *devlink_port = info->user_ptr[1];
struct netlink_ext_ack *extack = info->extack;
@@ -1392,6 +1400,50 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
}
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
+static void devlink_port_rel_notify_cb(struct devlink *devlink, u32 port_index)
+{
+ struct devlink_port *devlink_port;
+
+ devlink_port = devlink_port_get_by_index(devlink, port_index);
+ if (!devlink_port)
+ return;
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+}
+
+static void devlink_port_rel_cleanup_cb(struct devlink *devlink, u32 port_index,
+ u32 rel_index)
+{
+ struct devlink_port *devlink_port;
+
+ devlink_port = devlink_port_get_by_index(devlink, port_index);
+ if (devlink_port && devlink_port->rel_index == rel_index)
+ devlink_port->rel_index = 0;
+}
+
+/**
+ * devl_port_fn_devlink_set - Attach peer devlink
+ * instance to port function.
+ * @devlink_port: devlink port
+ * @fn_devlink: devlink instance to attach
+ */
+int devl_port_fn_devlink_set(struct devlink_port *devlink_port,
+ struct devlink *fn_devlink)
+{
+ ASSERT_DEVLINK_PORT_REGISTERED(devlink_port);
+
+ if (WARN_ON(devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_SF ||
+ devlink_port->attrs.pci_sf.external))
+ return -EINVAL;
+
+ return devlink_rel_nested_in_add(&devlink_port->rel_index,
+ devlink_port->devlink->index,
+ devlink_port->index,
+ devlink_port_rel_notify_cb,
+ devlink_port_rel_cleanup_cb,
+ fn_devlink);
+}
+EXPORT_SYMBOL_GPL(devl_port_fn_devlink_set);
+
/**
* devlink_port_linecard_set - Link port with a linecard
*
@@ -1420,7 +1472,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
if (devlink_port->linecard)
n = snprintf(name, len, "l%u",
- devlink_port->linecard->index);
+ devlink_linecard_index(devlink_port->linecard));
if (n < len)
n += snprintf(name + n, len - n, "p%u",
attrs->phys.port_number);
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
index dff1593b8406..7139e67e93ae 100644
--- a/net/devlink/rate.c
+++ b/net/devlink/rate.c
@@ -146,7 +146,7 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate,
WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -159,8 +159,7 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
void devlink_rates_notify_register(struct devlink *devlink)
@@ -458,7 +457,7 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
return true;
}
-int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_rate_set_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_rate *devlink_rate;
@@ -480,7 +479,7 @@ int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info)
return err;
}
-int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_rate *rate_node;
@@ -536,7 +535,7 @@ err_strdup:
return err;
}
-int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_rate_del_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_rate *rate_node;
diff --git a/net/devlink/region.c b/net/devlink/region.c
index d197cdb662db..7319127c5913 100644
--- a/net/devlink/region.c
+++ b/net/devlink/region.c
@@ -234,15 +234,15 @@ static void devlink_nl_region_notify(struct devlink_region *region,
struct sk_buff *msg;
WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+
+ if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0);
if (IS_ERR(msg))
return;
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
void devlink_regions_notify_register(struct devlink *devlink)
@@ -588,7 +588,7 @@ int devlink_nl_region_get_dumpit(struct sk_buff *skb,
return devlink_nl_dumpit(skb, cb, devlink_nl_region_get_dump_one);
}
-int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_region_del_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_snapshot *snapshot;
@@ -633,7 +633,7 @@ int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info)
return 0;
}
-int devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_region_new_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_snapshot *snapshot;
@@ -863,8 +863,8 @@ devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
curr_offset, chunk_size, chunk);
}
-int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
- struct netlink_callback *cb)
+int devlink_nl_region_read_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
{
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
@@ -883,7 +883,8 @@ int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
start_offset = state->start_offset;
- devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs,
+ false);
if (IS_ERR(devlink))
return PTR_ERR(devlink);
diff --git a/net/devlink/resource.c b/net/devlink/resource.c
index c8b615e4c385..594c8aeb3bfa 100644
--- a/net/devlink/resource.c
+++ b/net/devlink/resource.c
@@ -105,7 +105,7 @@ devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
return err;
}
-int devlink_nl_cmd_resource_set(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_resource_set_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_resource *resource;
@@ -285,7 +285,7 @@ err_resource_put:
return err;
}
-int devlink_nl_cmd_resource_dump(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
diff --git a/net/devlink/sb.c b/net/devlink/sb.c
index bd677fff5ec8..0a76bb32502b 100644
--- a/net/devlink/sb.c
+++ b/net/devlink/sb.c
@@ -413,7 +413,7 @@ static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
return -EOPNOTSUPP;
}
-int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
enum devlink_sb_threshold_type threshold_type;
@@ -621,8 +621,8 @@ static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
return -EOPNOTSUPP;
}
-int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_sb_port_pool_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = info->user_ptr[0];
@@ -861,8 +861,8 @@ static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
return -EOPNOTSUPP;
}
-int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = info->user_ptr[0];
@@ -900,8 +900,7 @@ int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
pool_index, threshold, info->extack);
}
-int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_sb_occ_snapshot_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
const struct devlink_ops *ops = devlink->ops;
@@ -916,8 +915,8 @@ int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
return -EOPNOTSUPP;
}
-int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_sb_occ_max_clear_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
const struct devlink_ops *ops = devlink->ops;
diff --git a/net/devlink/trap.c b/net/devlink/trap.c
index c26bf9b29bca..5d18c7424df1 100644
--- a/net/devlink/trap.c
+++ b/net/devlink/trap.c
@@ -414,7 +414,7 @@ static int devlink_trap_action_set(struct devlink *devlink,
info->extack);
}
-int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_trap_set_doit(struct sk_buff *skb, struct genl_info *info)
{
struct netlink_ext_ack *extack = info->extack;
struct devlink *devlink = info->user_ptr[0];
@@ -684,8 +684,7 @@ static int devlink_trap_group_set(struct devlink *devlink,
return 0;
}
-int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_trap_group_set_doit(struct sk_buff *skb, struct genl_info *info)
{
struct netlink_ext_ack *extack = info->extack;
struct devlink *devlink = info->user_ptr[0];
@@ -926,8 +925,8 @@ devlink_trap_policer_set(struct devlink *devlink,
return 0;
}
-int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_trap_policer_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink_trap_policer_item *policer_item;
struct netlink_ext_ack *extack = info->extack;
@@ -1174,7 +1173,8 @@ devlink_trap_group_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+
+ if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -1188,8 +1188,7 @@ devlink_trap_group_notify(struct devlink *devlink,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
void devlink_trap_groups_notify_register(struct devlink *devlink)
@@ -1235,7 +1234,8 @@ static void devlink_trap_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
cmd != DEVLINK_CMD_TRAP_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+
+ if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -1248,8 +1248,7 @@ static void devlink_trap_notify(struct devlink *devlink,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
void devlink_traps_notify_register(struct devlink *devlink)
@@ -1711,7 +1710,8 @@ devlink_trap_policer_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW &&
cmd != DEVLINK_CMD_TRAP_POLICER_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+
+ if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -1725,8 +1725,7 @@ devlink_trap_policer_notify(struct devlink *devlink,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
void devlink_trap_policers_notify_register(struct devlink *devlink)
diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig
index 155b06163409..7c2dba273e35 100644
--- a/net/dns_resolver/Kconfig
+++ b/net/dns_resolver/Kconfig
@@ -23,6 +23,6 @@ config DNS_RESOLVER
information.
To compile this as a module, choose M here: the module will be called
- dnsresolver.
+ dns_resolver.
If unsure, say N.
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 01e54b46ae0b..c42ddd85ff1f 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -91,7 +91,6 @@ const struct cred *dns_resolver_cache;
static int
dns_resolver_preparse(struct key_preparsed_payload *prep)
{
- const struct dns_payload_header *bin;
struct user_key_payload *upayload;
unsigned long derrno;
int ret;
@@ -102,26 +101,34 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
return -EINVAL;
if (data[0] == 0) {
+ const struct dns_server_list_v1_header *v1;
+
/* It may be a server list. */
- if (datalen <= sizeof(*bin))
+ if (datalen < sizeof(*v1))
return -EINVAL;
- bin = (const struct dns_payload_header *)data;
- kenter("[%u,%u],%u", bin->content, bin->version, datalen);
- if (bin->content != DNS_PAYLOAD_IS_SERVER_LIST) {
+ v1 = (const struct dns_server_list_v1_header *)data;
+ kenter("[%u,%u],%u", v1->hdr.content, v1->hdr.version, datalen);
+ if (v1->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST) {
pr_warn_ratelimited(
"dns_resolver: Unsupported content type (%u)\n",
- bin->content);
+ v1->hdr.content);
return -EINVAL;
}
- if (bin->version != 1) {
+ if (v1->hdr.version != 1) {
pr_warn_ratelimited(
"dns_resolver: Unsupported server list version (%u)\n",
- bin->version);
+ v1->hdr.version);
return -EINVAL;
}
+ if ((v1->status != DNS_LOOKUP_GOOD &&
+ v1->status != DNS_LOOKUP_GOOD_WITH_BAD)) {
+ if (prep->expiry == TIME64_MAX)
+ prep->expiry = ktime_get_real_seconds() + 1;
+ }
+
result_len = datalen;
goto store_result;
}
@@ -314,7 +321,7 @@ static long dns_resolver_read(const struct key *key,
struct key_type key_type_dns_resolver = {
.name = "dns_resolver",
- .flags = KEY_TYPE_NET_DOMAIN,
+ .flags = KEY_TYPE_NET_DOMAIN | KEY_TYPE_INSTANT_REAP,
.preparse = dns_resolver_preparse,
.free_preparse = dns_resolver_free_preparse,
.instantiate = generic_key_instantiate,
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 12e305824a96..8a1894a42552 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -8,16 +8,16 @@ endif
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
dsa_core-y += \
+ conduit.o \
devlink.o \
dsa.o \
- master.o \
netlink.o \
port.o \
- slave.o \
switch.o \
tag.o \
tag_8021q.o \
- trace.o
+ trace.o \
+ user.o
# tagging formats
obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o
diff --git a/net/dsa/master.c b/net/dsa/conduit.c
index 6be89ab0cc01..3dfdb3cb47dc 100644
--- a/net/dsa/master.c
+++ b/net/dsa/conduit.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * Handling of a master device, switching frames via its switch fabric CPU port
+ * Handling of a conduit device, switching frames via its switch fabric CPU port
*
* Copyright (c) 2017 Savoir-faire Linux Inc.
* Vivien Didelot <vivien.didelot@savoirfairelinux.com>
@@ -11,12 +11,12 @@
#include <linux/netlink.h>
#include <net/dsa.h>
+#include "conduit.h"
#include "dsa.h"
-#include "master.h"
#include "port.h"
#include "tag.h"
-static int dsa_master_get_regs_len(struct net_device *dev)
+static int dsa_conduit_get_regs_len(struct net_device *dev)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
@@ -45,8 +45,8 @@ static int dsa_master_get_regs_len(struct net_device *dev)
return ret;
}
-static void dsa_master_get_regs(struct net_device *dev,
- struct ethtool_regs *regs, void *data)
+static void dsa_conduit_get_regs(struct net_device *dev,
+ struct ethtool_regs *regs, void *data)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
@@ -80,9 +80,9 @@ static void dsa_master_get_regs(struct net_device *dev,
}
}
-static void dsa_master_get_ethtool_stats(struct net_device *dev,
- struct ethtool_stats *stats,
- uint64_t *data)
+static void dsa_conduit_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ uint64_t *data)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
@@ -99,9 +99,9 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev,
ds->ops->get_ethtool_stats(ds, port, data + count);
}
-static void dsa_master_get_ethtool_phy_stats(struct net_device *dev,
- struct ethtool_stats *stats,
- uint64_t *data)
+static void dsa_conduit_get_ethtool_phy_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ uint64_t *data)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
@@ -125,7 +125,7 @@ static void dsa_master_get_ethtool_phy_stats(struct net_device *dev,
ds->ops->get_ethtool_phy_stats(ds, port, data + count);
}
-static int dsa_master_get_sset_count(struct net_device *dev, int sset)
+static int dsa_conduit_get_sset_count(struct net_device *dev, int sset)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
@@ -147,8 +147,8 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset)
return count;
}
-static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
- uint8_t *data)
+static void dsa_conduit_get_strings(struct net_device *dev, uint32_t stringset,
+ uint8_t *data)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
@@ -195,12 +195,12 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
}
}
-/* Deny PTP operations on master if there is at least one switch in the tree
+/* Deny PTP operations on conduit if there is at least one switch in the tree
* that is PTP capable.
*/
-int __dsa_master_hwtstamp_validate(struct net_device *dev,
- const struct kernel_hwtstamp_config *config,
- struct netlink_ext_ack *extack)
+int __dsa_conduit_hwtstamp_validate(struct net_device *dev,
+ const struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
struct dsa_switch *ds = cpu_dp->ds;
@@ -212,7 +212,7 @@ int __dsa_master_hwtstamp_validate(struct net_device *dev,
list_for_each_entry(dp, &dst->ports, list) {
if (dsa_port_supports_hwtstamp(dp)) {
NL_SET_ERR_MSG(extack,
- "HW timestamping not allowed on DSA master when switch supports the operation");
+ "HW timestamping not allowed on DSA conduit when switch supports the operation");
return -EBUSY;
}
}
@@ -220,7 +220,7 @@ int __dsa_master_hwtstamp_validate(struct net_device *dev,
return 0;
}
-static int dsa_master_ethtool_setup(struct net_device *dev)
+static int dsa_conduit_ethtool_setup(struct net_device *dev)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
struct dsa_switch *ds = cpu_dp->ds;
@@ -237,19 +237,19 @@ static int dsa_master_ethtool_setup(struct net_device *dev)
if (cpu_dp->orig_ethtool_ops)
memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops));
- ops->get_regs_len = dsa_master_get_regs_len;
- ops->get_regs = dsa_master_get_regs;
- ops->get_sset_count = dsa_master_get_sset_count;
- ops->get_ethtool_stats = dsa_master_get_ethtool_stats;
- ops->get_strings = dsa_master_get_strings;
- ops->get_ethtool_phy_stats = dsa_master_get_ethtool_phy_stats;
+ ops->get_regs_len = dsa_conduit_get_regs_len;
+ ops->get_regs = dsa_conduit_get_regs;
+ ops->get_sset_count = dsa_conduit_get_sset_count;
+ ops->get_ethtool_stats = dsa_conduit_get_ethtool_stats;
+ ops->get_strings = dsa_conduit_get_strings;
+ ops->get_ethtool_phy_stats = dsa_conduit_get_ethtool_phy_stats;
dev->ethtool_ops = ops;
return 0;
}
-static void dsa_master_ethtool_teardown(struct net_device *dev)
+static void dsa_conduit_ethtool_teardown(struct net_device *dev)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
@@ -260,16 +260,16 @@ static void dsa_master_ethtool_teardown(struct net_device *dev)
cpu_dp->orig_ethtool_ops = NULL;
}
-/* Keep the master always promiscuous if the tagging protocol requires that
+/* Keep the conduit always promiscuous if the tagging protocol requires that
* (garbles MAC DA) or if it doesn't support unicast filtering, case in which
* it would revert to promiscuous mode as soon as we call dev_uc_add() on it
* anyway.
*/
-static void dsa_master_set_promiscuity(struct net_device *dev, int inc)
+static void dsa_conduit_set_promiscuity(struct net_device *dev, int inc)
{
const struct dsa_device_ops *ops = dev->dsa_ptr->tag_ops;
- if ((dev->priv_flags & IFF_UNICAST_FLT) && !ops->promisc_on_master)
+ if ((dev->priv_flags & IFF_UNICAST_FLT) && !ops->promisc_on_conduit)
return;
ASSERT_RTNL();
@@ -336,17 +336,17 @@ out:
}
static DEVICE_ATTR_RW(tagging);
-static struct attribute *dsa_slave_attrs[] = {
+static struct attribute *dsa_user_attrs[] = {
&dev_attr_tagging.attr,
NULL
};
static const struct attribute_group dsa_group = {
.name = "dsa",
- .attrs = dsa_slave_attrs,
+ .attrs = dsa_user_attrs,
};
-static void dsa_master_reset_mtu(struct net_device *dev)
+static void dsa_conduit_reset_mtu(struct net_device *dev)
{
int err;
@@ -356,7 +356,7 @@ static void dsa_master_reset_mtu(struct net_device *dev)
"Unable to reset MTU to exclude DSA overheads\n");
}
-int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
+int dsa_conduit_setup(struct net_device *dev, struct dsa_port *cpu_dp)
{
const struct dsa_device_ops *tag_ops = cpu_dp->tag_ops;
struct dsa_switch *ds = cpu_dp->ds;
@@ -365,7 +365,7 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops);
- /* The DSA master must use SET_NETDEV_DEV for this to work. */
+ /* The DSA conduit must use SET_NETDEV_DEV for this to work. */
if (!netif_is_lag_master(dev)) {
consumer_link = device_link_add(ds->dev, dev->dev.parent,
DL_FLAG_AUTOREMOVE_CONSUMER);
@@ -376,7 +376,7 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
}
/* The switch driver may not implement ->port_change_mtu(), case in
- * which dsa_slave_change_mtu() will not update the master MTU either,
+ * which dsa_user_change_mtu() will not update the conduit MTU either,
* so we need to do that here.
*/
ret = dev_set_mtu(dev, mtu);
@@ -392,9 +392,9 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
dev->dsa_ptr = cpu_dp;
- dsa_master_set_promiscuity(dev, 1);
+ dsa_conduit_set_promiscuity(dev, 1);
- ret = dsa_master_ethtool_setup(dev);
+ ret = dsa_conduit_ethtool_setup(dev);
if (ret)
goto out_err_reset_promisc;
@@ -405,18 +405,18 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
return ret;
out_err_ethtool_teardown:
- dsa_master_ethtool_teardown(dev);
+ dsa_conduit_ethtool_teardown(dev);
out_err_reset_promisc:
- dsa_master_set_promiscuity(dev, -1);
+ dsa_conduit_set_promiscuity(dev, -1);
return ret;
}
-void dsa_master_teardown(struct net_device *dev)
+void dsa_conduit_teardown(struct net_device *dev)
{
sysfs_remove_group(&dev->dev.kobj, &dsa_group);
- dsa_master_ethtool_teardown(dev);
- dsa_master_reset_mtu(dev);
- dsa_master_set_promiscuity(dev, -1);
+ dsa_conduit_ethtool_teardown(dev);
+ dsa_conduit_reset_mtu(dev);
+ dsa_conduit_set_promiscuity(dev, -1);
dev->dsa_ptr = NULL;
@@ -427,40 +427,40 @@ void dsa_master_teardown(struct net_device *dev)
wmb();
}
-int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp,
- struct netdev_lag_upper_info *uinfo,
- struct netlink_ext_ack *extack)
+int dsa_conduit_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp,
+ struct netdev_lag_upper_info *uinfo,
+ struct netlink_ext_ack *extack)
{
- bool master_setup = false;
+ bool conduit_setup = false;
int err;
if (!netdev_uses_dsa(lag_dev)) {
- err = dsa_master_setup(lag_dev, cpu_dp);
+ err = dsa_conduit_setup(lag_dev, cpu_dp);
if (err)
return err;
- master_setup = true;
+ conduit_setup = true;
}
err = dsa_port_lag_join(cpu_dp, lag_dev, uinfo, extack);
if (err) {
NL_SET_ERR_MSG_WEAK_MOD(extack, "CPU port failed to join LAG");
- goto out_master_teardown;
+ goto out_conduit_teardown;
}
return 0;
-out_master_teardown:
- if (master_setup)
- dsa_master_teardown(lag_dev);
+out_conduit_teardown:
+ if (conduit_setup)
+ dsa_conduit_teardown(lag_dev);
return err;
}
-/* Tear down a master if there isn't any other user port on it,
+/* Tear down a conduit if there isn't any other user port on it,
* optionally also destroying LAG information.
*/
-void dsa_master_lag_teardown(struct net_device *lag_dev,
- struct dsa_port *cpu_dp)
+void dsa_conduit_lag_teardown(struct net_device *lag_dev,
+ struct dsa_port *cpu_dp)
{
struct net_device *upper;
struct list_head *iter;
@@ -468,8 +468,8 @@ void dsa_master_lag_teardown(struct net_device *lag_dev,
dsa_port_lag_leave(cpu_dp, lag_dev);
netdev_for_each_upper_dev_rcu(lag_dev, upper, iter)
- if (dsa_slave_dev_check(upper))
+ if (dsa_user_dev_check(upper))
return;
- dsa_master_teardown(lag_dev);
+ dsa_conduit_teardown(lag_dev);
}
diff --git a/net/dsa/conduit.h b/net/dsa/conduit.h
new file mode 100644
index 000000000000..31f8834f54bb
--- /dev/null
+++ b/net/dsa/conduit.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __DSA_CONDUIT_H
+#define __DSA_CONDUIT_H
+
+struct dsa_port;
+struct net_device;
+struct netdev_lag_upper_info;
+struct netlink_ext_ack;
+
+int dsa_conduit_setup(struct net_device *dev, struct dsa_port *cpu_dp);
+void dsa_conduit_teardown(struct net_device *dev);
+int dsa_conduit_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp,
+ struct netdev_lag_upper_info *uinfo,
+ struct netlink_ext_ack *extack);
+void dsa_conduit_lag_teardown(struct net_device *lag_dev,
+ struct dsa_port *cpu_dp);
+int __dsa_conduit_hwtstamp_validate(struct net_device *dev,
+ const struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack);
+
+#endif
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index ccbdb98109f8..ac7be864e80d 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -20,14 +20,14 @@
#include <net/dsa_stubs.h>
#include <net/sch_generic.h>
+#include "conduit.h"
#include "devlink.h"
#include "dsa.h"
-#include "master.h"
#include "netlink.h"
#include "port.h"
-#include "slave.h"
#include "switch.h"
#include "tag.h"
+#include "user.h"
#define DSA_MAX_NUM_OFFLOADING_BRIDGES BITS_PER_LONG
@@ -365,18 +365,18 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
return NULL;
}
-struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst)
+struct net_device *dsa_tree_find_first_conduit(struct dsa_switch_tree *dst)
{
struct device_node *ethernet;
- struct net_device *master;
+ struct net_device *conduit;
struct dsa_port *cpu_dp;
cpu_dp = dsa_tree_find_first_cpu(dst);
ethernet = of_parse_phandle(cpu_dp->dn, "ethernet", 0);
- master = of_find_net_device_by_node(ethernet);
+ conduit = of_find_net_device_by_node(ethernet);
of_node_put(ethernet);
- return master;
+ return conduit;
}
/* Assign the default CPU port (the first one in the tree) to all ports of the
@@ -517,7 +517,7 @@ static int dsa_port_setup(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_USER:
of_get_mac_address(dp->dn, dp->mac);
- err = dsa_slave_create(dp);
+ err = dsa_user_create(dp);
break;
}
@@ -554,9 +554,9 @@ static void dsa_port_teardown(struct dsa_port *dp)
dsa_shared_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_USER:
- if (dp->slave) {
- dsa_slave_destroy(dp->slave);
- dp->slave = NULL;
+ if (dp->user) {
+ dsa_user_destroy(dp->user);
+ dp->user = NULL;
}
break;
}
@@ -632,9 +632,9 @@ static int dsa_switch_setup(struct dsa_switch *ds)
if (ds->setup)
return 0;
- /* Initialize ds->phys_mii_mask before registering the slave MDIO bus
+ /* Initialize ds->phys_mii_mask before registering the user MDIO bus
* driver and before ops->setup() has run, since the switch drivers and
- * the slave MDIO bus driver rely on these values for probing PHY
+ * the user MDIO bus driver rely on these values for probing PHY
* devices or not
*/
ds->phys_mii_mask |= dsa_user_ports(ds);
@@ -657,21 +657,21 @@ static int dsa_switch_setup(struct dsa_switch *ds)
if (err)
goto teardown;
- if (!ds->slave_mii_bus && ds->ops->phy_read) {
- ds->slave_mii_bus = mdiobus_alloc();
- if (!ds->slave_mii_bus) {
+ if (!ds->user_mii_bus && ds->ops->phy_read) {
+ ds->user_mii_bus = mdiobus_alloc();
+ if (!ds->user_mii_bus) {
err = -ENOMEM;
goto teardown;
}
- dsa_slave_mii_bus_init(ds);
+ dsa_user_mii_bus_init(ds);
dn = of_get_child_by_name(ds->dev->of_node, "mdio");
- err = of_mdiobus_register(ds->slave_mii_bus, dn);
+ err = of_mdiobus_register(ds->user_mii_bus, dn);
of_node_put(dn);
if (err < 0)
- goto free_slave_mii_bus;
+ goto free_user_mii_bus;
}
dsa_switch_devlink_register(ds);
@@ -679,9 +679,9 @@ static int dsa_switch_setup(struct dsa_switch *ds)
ds->setup = true;
return 0;
-free_slave_mii_bus:
- if (ds->slave_mii_bus && ds->ops->phy_read)
- mdiobus_free(ds->slave_mii_bus);
+free_user_mii_bus:
+ if (ds->user_mii_bus && ds->ops->phy_read)
+ mdiobus_free(ds->user_mii_bus);
teardown:
if (ds->ops->teardown)
ds->ops->teardown(ds);
@@ -699,10 +699,10 @@ static void dsa_switch_teardown(struct dsa_switch *ds)
dsa_switch_devlink_unregister(ds);
- if (ds->slave_mii_bus && ds->ops->phy_read) {
- mdiobus_unregister(ds->slave_mii_bus);
- mdiobus_free(ds->slave_mii_bus);
- ds->slave_mii_bus = NULL;
+ if (ds->user_mii_bus && ds->ops->phy_read) {
+ mdiobus_unregister(ds->user_mii_bus);
+ mdiobus_free(ds->user_mii_bus);
+ ds->user_mii_bus = NULL;
}
dsa_switch_teardown_tag_protocol(ds);
@@ -793,7 +793,7 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
return err;
}
-static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
+static int dsa_tree_setup_conduit(struct dsa_switch_tree *dst)
{
struct dsa_port *cpu_dp;
int err = 0;
@@ -801,18 +801,18 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
rtnl_lock();
dsa_tree_for_each_cpu_port(cpu_dp, dst) {
- struct net_device *master = cpu_dp->master;
- bool admin_up = (master->flags & IFF_UP) &&
- !qdisc_tx_is_noop(master);
+ struct net_device *conduit = cpu_dp->conduit;
+ bool admin_up = (conduit->flags & IFF_UP) &&
+ !qdisc_tx_is_noop(conduit);
- err = dsa_master_setup(master, cpu_dp);
+ err = dsa_conduit_setup(conduit, cpu_dp);
if (err)
break;
- /* Replay master state event */
- dsa_tree_master_admin_state_change(dst, master, admin_up);
- dsa_tree_master_oper_state_change(dst, master,
- netif_oper_up(master));
+ /* Replay conduit state event */
+ dsa_tree_conduit_admin_state_change(dst, conduit, admin_up);
+ dsa_tree_conduit_oper_state_change(dst, conduit,
+ netif_oper_up(conduit));
}
rtnl_unlock();
@@ -820,22 +820,22 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
return err;
}
-static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
+static void dsa_tree_teardown_conduit(struct dsa_switch_tree *dst)
{
struct dsa_port *cpu_dp;
rtnl_lock();
dsa_tree_for_each_cpu_port(cpu_dp, dst) {
- struct net_device *master = cpu_dp->master;
+ struct net_device *conduit = cpu_dp->conduit;
/* Synthesizing an "admin down" state is sufficient for
- * the switches to get a notification if the master is
+ * the switches to get a notification if the conduit is
* currently up and running.
*/
- dsa_tree_master_admin_state_change(dst, master, false);
+ dsa_tree_conduit_admin_state_change(dst, conduit, false);
- dsa_master_teardown(master);
+ dsa_conduit_teardown(conduit);
}
rtnl_unlock();
@@ -894,13 +894,13 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
if (err)
goto teardown_switches;
- err = dsa_tree_setup_master(dst);
+ err = dsa_tree_setup_conduit(dst);
if (err)
goto teardown_ports;
err = dsa_tree_setup_lags(dst);
if (err)
- goto teardown_master;
+ goto teardown_conduit;
dst->setup = true;
@@ -908,8 +908,8 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
return 0;
-teardown_master:
- dsa_tree_teardown_master(dst);
+teardown_conduit:
+ dsa_tree_teardown_conduit(dst);
teardown_ports:
dsa_tree_teardown_ports(dst);
teardown_switches:
@@ -929,7 +929,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
dsa_tree_teardown_lags(dst);
- dsa_tree_teardown_master(dst);
+ dsa_tree_teardown_conduit(dst);
dsa_tree_teardown_ports(dst);
@@ -978,7 +978,7 @@ out_disconnect:
return err;
}
-/* Since the dsa/tagging sysfs device attribute is per master, the assumption
+/* Since the dsa/tagging sysfs device attribute is per conduit, the assumption
* is that all DSA switches within a tree share the same tagger, otherwise
* they would have formed disjoint trees (different "dsa,member" values).
*/
@@ -999,10 +999,10 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
* restriction, there needs to be another mutex which serializes this.
*/
dsa_tree_for_each_user_port(dp, dst) {
- if (dsa_port_to_master(dp)->flags & IFF_UP)
+ if (dsa_port_to_conduit(dp)->flags & IFF_UP)
goto out_unlock;
- if (dp->slave->flags & IFF_UP)
+ if (dp->user->flags & IFF_UP)
goto out_unlock;
}
@@ -1028,62 +1028,62 @@ out_unlock:
return err;
}
-static void dsa_tree_master_state_change(struct dsa_switch_tree *dst,
- struct net_device *master)
+static void dsa_tree_conduit_state_change(struct dsa_switch_tree *dst,
+ struct net_device *conduit)
{
- struct dsa_notifier_master_state_info info;
- struct dsa_port *cpu_dp = master->dsa_ptr;
+ struct dsa_notifier_conduit_state_info info;
+ struct dsa_port *cpu_dp = conduit->dsa_ptr;
- info.master = master;
- info.operational = dsa_port_master_is_operational(cpu_dp);
+ info.conduit = conduit;
+ info.operational = dsa_port_conduit_is_operational(cpu_dp);
- dsa_tree_notify(dst, DSA_NOTIFIER_MASTER_STATE_CHANGE, &info);
+ dsa_tree_notify(dst, DSA_NOTIFIER_CONDUIT_STATE_CHANGE, &info);
}
-void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
- struct net_device *master,
- bool up)
+void dsa_tree_conduit_admin_state_change(struct dsa_switch_tree *dst,
+ struct net_device *conduit,
+ bool up)
{
- struct dsa_port *cpu_dp = master->dsa_ptr;
+ struct dsa_port *cpu_dp = conduit->dsa_ptr;
bool notify = false;
- /* Don't keep track of admin state on LAG DSA masters,
- * but rather just of physical DSA masters
+ /* Don't keep track of admin state on LAG DSA conduits,
+ * but rather just of physical DSA conduits
*/
- if (netif_is_lag_master(master))
+ if (netif_is_lag_master(conduit))
return;
- if ((dsa_port_master_is_operational(cpu_dp)) !=
- (up && cpu_dp->master_oper_up))
+ if ((dsa_port_conduit_is_operational(cpu_dp)) !=
+ (up && cpu_dp->conduit_oper_up))
notify = true;
- cpu_dp->master_admin_up = up;
+ cpu_dp->conduit_admin_up = up;
if (notify)
- dsa_tree_master_state_change(dst, master);
+ dsa_tree_conduit_state_change(dst, conduit);
}
-void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst,
- struct net_device *master,
- bool up)
+void dsa_tree_conduit_oper_state_change(struct dsa_switch_tree *dst,
+ struct net_device *conduit,
+ bool up)
{
- struct dsa_port *cpu_dp = master->dsa_ptr;
+ struct dsa_port *cpu_dp = conduit->dsa_ptr;
bool notify = false;
- /* Don't keep track of oper state on LAG DSA masters,
- * but rather just of physical DSA masters
+ /* Don't keep track of oper state on LAG DSA conduits,
+ * but rather just of physical DSA conduits
*/
- if (netif_is_lag_master(master))
+ if (netif_is_lag_master(conduit))
return;
- if ((dsa_port_master_is_operational(cpu_dp)) !=
- (cpu_dp->master_admin_up && up))
+ if ((dsa_port_conduit_is_operational(cpu_dp)) !=
+ (cpu_dp->conduit_admin_up && up))
notify = true;
- cpu_dp->master_oper_up = up;
+ cpu_dp->conduit_oper_up = up;
if (notify)
- dsa_tree_master_state_change(dst, master);
+ dsa_tree_conduit_state_change(dst, conduit);
}
static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
@@ -1129,7 +1129,7 @@ static int dsa_port_parse_dsa(struct dsa_port *dp)
}
static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp,
- struct net_device *master)
+ struct net_device *conduit)
{
enum dsa_tag_protocol tag_protocol = DSA_TAG_PROTO_NONE;
struct dsa_switch *mds, *ds = dp->ds;
@@ -1140,21 +1140,21 @@ static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp,
* happens the switch driver may want to know if its tagging protocol
* is going to work in such a configuration.
*/
- if (dsa_slave_dev_check(master)) {
- mdp = dsa_slave_to_port(master);
+ if (dsa_user_dev_check(conduit)) {
+ mdp = dsa_user_to_port(conduit);
mds = mdp->ds;
mdp_upstream = dsa_upstream_port(mds, mdp->index);
tag_protocol = mds->ops->get_tag_protocol(mds, mdp_upstream,
DSA_TAG_PROTO_NONE);
}
- /* If the master device is not itself a DSA slave in a disjoint DSA
+ /* If the conduit device is not itself a DSA user in a disjoint DSA
* tree, then return immediately.
*/
return ds->ops->get_tag_protocol(ds, dp->index, tag_protocol);
}
-static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master,
+static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *conduit,
const char *user_protocol)
{
const struct dsa_device_ops *tag_ops = NULL;
@@ -1163,7 +1163,7 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master,
enum dsa_tag_protocol default_proto;
/* Find out which protocol the switch would prefer. */
- default_proto = dsa_get_tag_protocol(dp, master);
+ default_proto = dsa_get_tag_protocol(dp, conduit);
if (dst->default_proto) {
if (dst->default_proto != default_proto) {
dev_err(ds->dev,
@@ -1218,7 +1218,7 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master,
dst->tag_ops = tag_ops;
}
- dp->master = master;
+ dp->conduit = conduit;
dp->type = DSA_PORT_TYPE_CPU;
dsa_port_set_tag_protocol(dp, dst->tag_ops);
dp->dst = dst;
@@ -1248,16 +1248,16 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
dp->dn = dn;
if (ethernet) {
- struct net_device *master;
+ struct net_device *conduit;
const char *user_protocol;
- master = of_find_net_device_by_node(ethernet);
+ conduit = of_find_net_device_by_node(ethernet);
of_node_put(ethernet);
- if (!master)
+ if (!conduit)
return -EPROBE_DEFER;
user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL);
- return dsa_port_parse_cpu(dp, master, user_protocol);
+ return dsa_port_parse_cpu(dp, conduit, user_protocol);
}
if (link)
@@ -1412,15 +1412,15 @@ static int dsa_port_parse(struct dsa_port *dp, const char *name,
struct device *dev)
{
if (!strcmp(name, "cpu")) {
- struct net_device *master;
+ struct net_device *conduit;
- master = dsa_dev_to_net_device(dev);
- if (!master)
+ conduit = dsa_dev_to_net_device(dev);
+ if (!conduit)
return -EPROBE_DEFER;
- dev_put(master);
+ dev_put(conduit);
- return dsa_port_parse_cpu(dp, master, NULL);
+ return dsa_port_parse_cpu(dp, conduit, NULL);
}
if (!strcmp(name, "dsa"))
@@ -1566,14 +1566,14 @@ void dsa_unregister_switch(struct dsa_switch *ds)
}
EXPORT_SYMBOL_GPL(dsa_unregister_switch);
-/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is
+/* If the DSA conduit chooses to unregister its net_device on .shutdown, DSA is
* blocking that operation from completion, due to the dev_hold taken inside
- * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of
- * the DSA master, so that the system can reboot successfully.
+ * netdev_upper_dev_link. Unlink the DSA user interfaces from being uppers of
+ * the DSA conduit, so that the system can reboot successfully.
*/
void dsa_switch_shutdown(struct dsa_switch *ds)
{
- struct net_device *master, *slave_dev;
+ struct net_device *conduit, *user_dev;
struct dsa_port *dp;
mutex_lock(&dsa2_mutex);
@@ -1584,17 +1584,17 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
rtnl_lock();
dsa_switch_for_each_user_port(dp, ds) {
- master = dsa_port_to_master(dp);
- slave_dev = dp->slave;
+ conduit = dsa_port_to_conduit(dp);
+ user_dev = dp->user;
- netdev_upper_dev_unlink(master, slave_dev);
+ netdev_upper_dev_unlink(conduit, user_dev);
}
- /* Disconnect from further netdevice notifiers on the master,
+ /* Disconnect from further netdevice notifiers on the conduit,
* since netdev_uses_dsa() will now return false.
*/
dsa_switch_for_each_cpu_port(dp, ds)
- dp->master->dsa_ptr = NULL;
+ dp->conduit->dsa_ptr = NULL;
rtnl_unlock();
out:
@@ -1605,7 +1605,7 @@ EXPORT_SYMBOL_GPL(dsa_switch_shutdown);
#ifdef CONFIG_PM_SLEEP
static bool dsa_port_is_initialized(const struct dsa_port *dp)
{
- return dp->type == DSA_PORT_TYPE_USER && dp->slave;
+ return dp->type == DSA_PORT_TYPE_USER && dp->user;
}
int dsa_switch_suspend(struct dsa_switch *ds)
@@ -1613,12 +1613,12 @@ int dsa_switch_suspend(struct dsa_switch *ds)
struct dsa_port *dp;
int ret = 0;
- /* Suspend slave network devices */
+ /* Suspend user network devices */
dsa_switch_for_each_port(dp, ds) {
if (!dsa_port_is_initialized(dp))
continue;
- ret = dsa_slave_suspend(dp->slave);
+ ret = dsa_user_suspend(dp->user);
if (ret)
return ret;
}
@@ -1641,12 +1641,12 @@ int dsa_switch_resume(struct dsa_switch *ds)
if (ret)
return ret;
- /* Resume slave network devices */
+ /* Resume user network devices */
dsa_switch_for_each_port(dp, ds) {
if (!dsa_port_is_initialized(dp))
continue;
- ret = dsa_slave_resume(dp->slave);
+ ret = dsa_user_resume(dp->user);
if (ret)
return ret;
}
@@ -1658,10 +1658,10 @@ EXPORT_SYMBOL_GPL(dsa_switch_resume);
struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
{
- if (!netdev || !dsa_slave_dev_check(netdev))
+ if (!netdev || !dsa_user_dev_check(netdev))
return ERR_PTR(-ENODEV);
- return dsa_slave_to_port(netdev);
+ return dsa_user_to_port(netdev);
}
EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
@@ -1726,7 +1726,7 @@ bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port,
EXPORT_SYMBOL_GPL(dsa_mdb_present_in_other_db);
static const struct dsa_stubs __dsa_stubs = {
- .master_hwtstamp_validate = __dsa_master_hwtstamp_validate,
+ .conduit_hwtstamp_validate = __dsa_conduit_hwtstamp_validate,
};
static void dsa_register_stubs(void)
@@ -1748,7 +1748,7 @@ static int __init dsa_init_module(void)
if (!dsa_owq)
return -ENOMEM;
- rc = dsa_slave_register_notifier();
+ rc = dsa_user_register_notifier();
if (rc)
goto register_notifier_fail;
@@ -1763,7 +1763,7 @@ static int __init dsa_init_module(void)
return 0;
netlink_register_fail:
- dsa_slave_unregister_notifier();
+ dsa_user_unregister_notifier();
dev_remove_pack(&dsa_pack_type);
register_notifier_fail:
destroy_workqueue(dsa_owq);
@@ -1778,7 +1778,7 @@ static void __exit dsa_cleanup_module(void)
rtnl_link_unregister(&dsa_link_ops);
- dsa_slave_unregister_notifier();
+ dsa_user_unregister_notifier();
dev_remove_pack(&dsa_pack_type);
destroy_workqueue(dsa_owq);
}
diff --git a/net/dsa/dsa.h b/net/dsa/dsa.h
index b7e17ae1094d..3cc7823e9ef3 100644
--- a/net/dsa/dsa.h
+++ b/net/dsa/dsa.h
@@ -21,16 +21,16 @@ void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag);
void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag);
struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst,
const struct net_device *lag_dev);
-struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst);
+struct net_device *dsa_tree_find_first_conduit(struct dsa_switch_tree *dst);
int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
const struct dsa_device_ops *tag_ops,
const struct dsa_device_ops *old_tag_ops);
-void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
- struct net_device *master,
+void dsa_tree_conduit_admin_state_change(struct dsa_switch_tree *dst,
+ struct net_device *conduit,
+ bool up);
+void dsa_tree_conduit_oper_state_change(struct dsa_switch_tree *dst,
+ struct net_device *conduit,
bool up);
-void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst,
- struct net_device *master,
- bool up);
unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max);
void dsa_bridge_num_put(const struct net_device *bridge_dev,
unsigned int bridge_num);
diff --git a/net/dsa/master.h b/net/dsa/master.h
deleted file mode 100644
index 76e39d3ec909..000000000000
--- a/net/dsa/master.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-
-#ifndef __DSA_MASTER_H
-#define __DSA_MASTER_H
-
-struct dsa_port;
-struct net_device;
-struct netdev_lag_upper_info;
-struct netlink_ext_ack;
-
-int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp);
-void dsa_master_teardown(struct net_device *dev);
-int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp,
- struct netdev_lag_upper_info *uinfo,
- struct netlink_ext_ack *extack);
-void dsa_master_lag_teardown(struct net_device *lag_dev,
- struct dsa_port *cpu_dp);
-int __dsa_master_hwtstamp_validate(struct net_device *dev,
- const struct kernel_hwtstamp_config *config,
- struct netlink_ext_ack *extack);
-
-#endif
diff --git a/net/dsa/netlink.c b/net/dsa/netlink.c
index bd4bbaf851de..1332e56349e5 100644
--- a/net/dsa/netlink.c
+++ b/net/dsa/netlink.c
@@ -5,10 +5,10 @@
#include <net/rtnetlink.h>
#include "netlink.h"
-#include "slave.h"
+#include "user.h"
static const struct nla_policy dsa_policy[IFLA_DSA_MAX + 1] = {
- [IFLA_DSA_MASTER] = { .type = NLA_U32 },
+ [IFLA_DSA_CONDUIT] = { .type = NLA_U32 },
};
static int dsa_changelink(struct net_device *dev, struct nlattr *tb[],
@@ -20,15 +20,15 @@ static int dsa_changelink(struct net_device *dev, struct nlattr *tb[],
if (!data)
return 0;
- if (data[IFLA_DSA_MASTER]) {
- u32 ifindex = nla_get_u32(data[IFLA_DSA_MASTER]);
- struct net_device *master;
+ if (data[IFLA_DSA_CONDUIT]) {
+ u32 ifindex = nla_get_u32(data[IFLA_DSA_CONDUIT]);
+ struct net_device *conduit;
- master = __dev_get_by_index(dev_net(dev), ifindex);
- if (!master)
+ conduit = __dev_get_by_index(dev_net(dev), ifindex);
+ if (!conduit)
return -EINVAL;
- err = dsa_slave_change_master(dev, master, extack);
+ err = dsa_user_change_conduit(dev, conduit, extack);
if (err)
return err;
}
@@ -38,15 +38,15 @@ static int dsa_changelink(struct net_device *dev, struct nlattr *tb[],
static size_t dsa_get_size(const struct net_device *dev)
{
- return nla_total_size(sizeof(u32)) + /* IFLA_DSA_MASTER */
+ return nla_total_size(sizeof(u32)) + /* IFLA_DSA_CONDUIT */
0;
}
static int dsa_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
- struct net_device *master = dsa_slave_to_master(dev);
+ struct net_device *conduit = dsa_user_to_conduit(dev);
- if (nla_put_u32(skb, IFLA_DSA_MASTER, master->ifindex))
+ if (nla_put_u32(skb, IFLA_DSA_CONDUIT, conduit->ifindex))
return -EMSGSIZE;
return 0;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 37ab238e8304..c42dac87671b 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -14,9 +14,9 @@
#include "dsa.h"
#include "port.h"
-#include "slave.h"
#include "switch.h"
#include "tag_8021q.h"
+#include "user.h"
/**
* dsa_port_notify - Notify the switching fabric of changes to a port
@@ -289,7 +289,7 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp,
}
/* If the bridge was vlan_filtering, the bridge core doesn't trigger an
- * event for changing vlan_filtering setting upon slave ports leaving
+ * event for changing vlan_filtering setting upon user ports leaving
* it. That is a good thing, because that lets us handle it and also
* handle the case where the switch's vlan_filtering setting is global
* (not per port). When that happens, the correct moment to trigger the
@@ -489,7 +489,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
.dp = dp,
.extack = extack,
};
- struct net_device *dev = dp->slave;
+ struct net_device *dev = dp->user;
struct net_device *brport_dev;
int err;
@@ -514,8 +514,8 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
dp->bridge->tx_fwd_offload = info.tx_fwd_offload;
err = switchdev_bridge_port_offload(brport_dev, dev, dp,
- &dsa_slave_switchdev_notifier,
- &dsa_slave_switchdev_blocking_notifier,
+ &dsa_user_switchdev_notifier,
+ &dsa_user_switchdev_blocking_notifier,
dp->bridge->tx_fwd_offload, extack);
if (err)
goto out_rollback_unbridge;
@@ -528,8 +528,8 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
out_rollback_unoffload:
switchdev_bridge_port_unoffload(brport_dev, dp,
- &dsa_slave_switchdev_notifier,
- &dsa_slave_switchdev_blocking_notifier);
+ &dsa_user_switchdev_notifier,
+ &dsa_user_switchdev_blocking_notifier);
dsa_flush_workqueue();
out_rollback_unbridge:
dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
@@ -547,8 +547,8 @@ void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br)
return;
switchdev_bridge_port_unoffload(brport_dev, dp,
- &dsa_slave_switchdev_notifier,
- &dsa_slave_switchdev_blocking_notifier);
+ &dsa_user_switchdev_notifier,
+ &dsa_user_switchdev_blocking_notifier);
dsa_flush_workqueue();
}
@@ -741,10 +741,10 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
*/
if (vlan_filtering && dsa_port_is_user(dp)) {
struct net_device *br = dsa_port_bridge_dev_get(dp);
- struct net_device *upper_dev, *slave = dp->slave;
+ struct net_device *upper_dev, *user = dp->user;
struct list_head *iter;
- netdev_for_each_upper_dev_rcu(slave, upper_dev, iter) {
+ netdev_for_each_upper_dev_rcu(user, upper_dev, iter) {
struct bridge_vlan_info br_info;
u16 vid;
@@ -803,9 +803,9 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
if (!ds->ops->port_vlan_filtering)
return -EOPNOTSUPP;
- /* We are called from dsa_slave_switchdev_blocking_event(),
+ /* We are called from dsa_user_switchdev_blocking_event(),
* which is not under rcu_read_lock(), unlike
- * dsa_slave_switchdev_event().
+ * dsa_user_switchdev_event().
*/
rcu_read_lock();
apply = dsa_port_can_apply_vlan_filtering(dp, vlan_filtering, extack);
@@ -827,24 +827,24 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
ds->vlan_filtering = vlan_filtering;
dsa_switch_for_each_user_port(other_dp, ds) {
- struct net_device *slave = other_dp->slave;
+ struct net_device *user = other_dp->user;
/* We might be called in the unbind path, so not
- * all slave devices might still be registered.
+ * all user devices might still be registered.
*/
- if (!slave)
+ if (!user)
continue;
- err = dsa_slave_manage_vlan_filtering(slave,
- vlan_filtering);
+ err = dsa_user_manage_vlan_filtering(user,
+ vlan_filtering);
if (err)
goto restore;
}
} else {
dp->vlan_filtering = vlan_filtering;
- err = dsa_slave_manage_vlan_filtering(dp->slave,
- vlan_filtering);
+ err = dsa_user_manage_vlan_filtering(dp->user,
+ vlan_filtering);
if (err)
goto restore;
}
@@ -863,7 +863,7 @@ restore:
}
/* This enforces legacy behavior for switch drivers which assume they can't
- * receive VLAN configuration when enslaved to a bridge with vlan_filtering=0
+ * receive VLAN configuration when joining a bridge with vlan_filtering=0
*/
bool dsa_port_skip_vlan_configuration(struct dsa_port *dp)
{
@@ -1047,7 +1047,7 @@ int dsa_port_standalone_host_fdb_add(struct dsa_port *dp,
int dsa_port_bridge_host_fdb_add(struct dsa_port *dp,
const unsigned char *addr, u16 vid)
{
- struct net_device *master = dsa_port_to_master(dp);
+ struct net_device *conduit = dsa_port_to_conduit(dp);
struct dsa_db db = {
.type = DSA_DB_BRIDGE,
.bridge = *dp->bridge,
@@ -1057,12 +1057,12 @@ int dsa_port_bridge_host_fdb_add(struct dsa_port *dp,
if (!dp->ds->fdb_isolation)
db.bridge.num = 0;
- /* Avoid a call to __dev_set_promiscuity() on the master, which
+ /* Avoid a call to __dev_set_promiscuity() on the conduit, which
* requires rtnl_lock(), since we can't guarantee that is held here,
* and we can't take it either.
*/
- if (master->priv_flags & IFF_UNICAST_FLT) {
- err = dev_uc_add(master, addr);
+ if (conduit->priv_flags & IFF_UNICAST_FLT) {
+ err = dev_uc_add(conduit, addr);
if (err)
return err;
}
@@ -1098,7 +1098,7 @@ int dsa_port_standalone_host_fdb_del(struct dsa_port *dp,
int dsa_port_bridge_host_fdb_del(struct dsa_port *dp,
const unsigned char *addr, u16 vid)
{
- struct net_device *master = dsa_port_to_master(dp);
+ struct net_device *conduit = dsa_port_to_conduit(dp);
struct dsa_db db = {
.type = DSA_DB_BRIDGE,
.bridge = *dp->bridge,
@@ -1108,8 +1108,8 @@ int dsa_port_bridge_host_fdb_del(struct dsa_port *dp,
if (!dp->ds->fdb_isolation)
db.bridge.num = 0;
- if (master->priv_flags & IFF_UNICAST_FLT) {
- err = dev_uc_del(master, addr);
+ if (conduit->priv_flags & IFF_UNICAST_FLT) {
+ err = dev_uc_del(conduit, addr);
if (err)
return err;
}
@@ -1229,7 +1229,7 @@ int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp,
int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb)
{
- struct net_device *master = dsa_port_to_master(dp);
+ struct net_device *conduit = dsa_port_to_conduit(dp);
struct dsa_db db = {
.type = DSA_DB_BRIDGE,
.bridge = *dp->bridge,
@@ -1239,7 +1239,7 @@ int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp,
if (!dp->ds->fdb_isolation)
db.bridge.num = 0;
- err = dev_mc_add(master, mdb->addr);
+ err = dev_mc_add(conduit, mdb->addr);
if (err)
return err;
@@ -1273,7 +1273,7 @@ int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp,
int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb)
{
- struct net_device *master = dsa_port_to_master(dp);
+ struct net_device *conduit = dsa_port_to_conduit(dp);
struct dsa_db db = {
.type = DSA_DB_BRIDGE,
.bridge = *dp->bridge,
@@ -1283,7 +1283,7 @@ int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp,
if (!dp->ds->fdb_isolation)
db.bridge.num = 0;
- err = dev_mc_del(master, mdb->addr);
+ err = dev_mc_del(conduit, mdb->addr);
if (err)
return err;
@@ -1318,7 +1318,7 @@ int dsa_port_host_vlan_add(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
struct netlink_ext_ack *extack)
{
- struct net_device *master = dsa_port_to_master(dp);
+ struct net_device *conduit = dsa_port_to_conduit(dp);
struct dsa_notifier_vlan_info info = {
.dp = dp,
.vlan = vlan,
@@ -1330,7 +1330,7 @@ int dsa_port_host_vlan_add(struct dsa_port *dp,
if (err && err != -EOPNOTSUPP)
return err;
- vlan_vid_add(master, htons(ETH_P_8021Q), vlan->vid);
+ vlan_vid_add(conduit, htons(ETH_P_8021Q), vlan->vid);
return err;
}
@@ -1338,7 +1338,7 @@ int dsa_port_host_vlan_add(struct dsa_port *dp,
int dsa_port_host_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan)
{
- struct net_device *master = dsa_port_to_master(dp);
+ struct net_device *conduit = dsa_port_to_conduit(dp);
struct dsa_notifier_vlan_info info = {
.dp = dp,
.vlan = vlan,
@@ -1349,7 +1349,7 @@ int dsa_port_host_vlan_del(struct dsa_port *dp,
if (err && err != -EOPNOTSUPP)
return err;
- vlan_vid_del(master, htons(ETH_P_8021Q), vlan->vid);
+ vlan_vid_del(conduit, htons(ETH_P_8021Q), vlan->vid);
return err;
}
@@ -1398,24 +1398,24 @@ int dsa_port_mrp_del_ring_role(const struct dsa_port *dp,
return ds->ops->port_mrp_del_ring_role(ds, dp->index, mrp);
}
-static int dsa_port_assign_master(struct dsa_port *dp,
- struct net_device *master,
- struct netlink_ext_ack *extack,
- bool fail_on_err)
+static int dsa_port_assign_conduit(struct dsa_port *dp,
+ struct net_device *conduit,
+ struct netlink_ext_ack *extack,
+ bool fail_on_err)
{
struct dsa_switch *ds = dp->ds;
int port = dp->index, err;
- err = ds->ops->port_change_master(ds, port, master, extack);
+ err = ds->ops->port_change_conduit(ds, port, conduit, extack);
if (err && !fail_on_err)
- dev_err(ds->dev, "port %d failed to assign master %s: %pe\n",
- port, master->name, ERR_PTR(err));
+ dev_err(ds->dev, "port %d failed to assign conduit %s: %pe\n",
+ port, conduit->name, ERR_PTR(err));
if (err && fail_on_err)
return err;
- dp->cpu_dp = master->dsa_ptr;
- dp->cpu_port_in_lag = netif_is_lag_master(master);
+ dp->cpu_dp = conduit->dsa_ptr;
+ dp->cpu_port_in_lag = netif_is_lag_master(conduit);
return 0;
}
@@ -1428,12 +1428,12 @@ static int dsa_port_assign_master(struct dsa_port *dp,
* the old CPU port before changing it, and restore it on errors during the
* bringup of the new one.
*/
-int dsa_port_change_master(struct dsa_port *dp, struct net_device *master,
- struct netlink_ext_ack *extack)
+int dsa_port_change_conduit(struct dsa_port *dp, struct net_device *conduit,
+ struct netlink_ext_ack *extack)
{
struct net_device *bridge_dev = dsa_port_bridge_dev_get(dp);
- struct net_device *old_master = dsa_port_to_master(dp);
- struct net_device *dev = dp->slave;
+ struct net_device *old_conduit = dsa_port_to_conduit(dp);
+ struct net_device *dev = dp->user;
struct dsa_switch *ds = dp->ds;
bool vlan_filtering;
int err, tmp;
@@ -1454,7 +1454,7 @@ int dsa_port_change_master(struct dsa_port *dp, struct net_device *master,
*/
vlan_filtering = dsa_port_is_vlan_filtering(dp);
if (vlan_filtering) {
- err = dsa_slave_manage_vlan_filtering(dev, false);
+ err = dsa_user_manage_vlan_filtering(dev, false);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"Failed to remove standalone VLANs");
@@ -1465,16 +1465,16 @@ int dsa_port_change_master(struct dsa_port *dp, struct net_device *master,
/* Standalone addresses, and addresses of upper interfaces like
* VLAN, LAG, HSR need to be migrated.
*/
- dsa_slave_unsync_ha(dev);
+ dsa_user_unsync_ha(dev);
- err = dsa_port_assign_master(dp, master, extack, true);
+ err = dsa_port_assign_conduit(dp, conduit, extack, true);
if (err)
goto rewind_old_addrs;
- dsa_slave_sync_ha(dev);
+ dsa_user_sync_ha(dev);
if (vlan_filtering) {
- err = dsa_slave_manage_vlan_filtering(dev, true);
+ err = dsa_user_manage_vlan_filtering(dev, true);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"Failed to restore standalone VLANs");
@@ -1495,19 +1495,19 @@ int dsa_port_change_master(struct dsa_port *dp, struct net_device *master,
rewind_new_vlan:
if (vlan_filtering)
- dsa_slave_manage_vlan_filtering(dev, false);
+ dsa_user_manage_vlan_filtering(dev, false);
rewind_new_addrs:
- dsa_slave_unsync_ha(dev);
+ dsa_user_unsync_ha(dev);
- dsa_port_assign_master(dp, old_master, NULL, false);
+ dsa_port_assign_conduit(dp, old_conduit, NULL, false);
/* Restore the objects on the old CPU port */
rewind_old_addrs:
- dsa_slave_sync_ha(dev);
+ dsa_user_sync_ha(dev);
if (vlan_filtering) {
- tmp = dsa_slave_manage_vlan_filtering(dev, true);
+ tmp = dsa_user_manage_vlan_filtering(dev, true);
if (tmp) {
dev_err(ds->dev,
"port %d failed to restore standalone VLANs: %pe\n",
@@ -1554,20 +1554,6 @@ static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
return phydev;
}
-static void dsa_port_phylink_validate(struct phylink_config *config,
- unsigned long *supported,
- struct phylink_link_state *state)
-{
- /* Skip call for drivers which don't yet set mac_capabilities,
- * since validating in that case would mean their PHY will advertise
- * nothing. In turn, skipping validation makes them advertise
- * everything that the PHY supports, so those drivers should be
- * converted ASAP.
- */
- if (config->mac_capabilities)
- phylink_generic_validate(config, supported, state);
-}
-
static struct phylink_pcs *
dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
phy_interface_t interface)
@@ -1634,7 +1620,7 @@ static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
struct dsa_switch *ds = dp->ds;
if (dsa_port_is_user(dp))
- phydev = dp->slave->phydev;
+ phydev = dp->user->phydev;
if (!ds->ops->phylink_mac_link_down) {
if (ds->ops->adjust_link && phydev)
@@ -1666,7 +1652,6 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
}
static const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
- .validate = dsa_port_phylink_validate,
.mac_select_pcs = dsa_port_phylink_mac_select_pcs,
.mac_prepare = dsa_port_phylink_mac_prepare,
.mac_config = dsa_port_phylink_mac_config,
@@ -1823,7 +1808,7 @@ err_phy_connect:
* their type.
*
* User ports with no phy-handle or fixed-link are expected to connect to an
- * internal PHY located on the ds->slave_mii_bus at an MDIO address equal to
+ * internal PHY located on the ds->user_mii_bus at an MDIO address equal to
* the port number. This description is still actively supported.
*
* Shared (CPU and DSA) ports with no phy-handle or fixed-link are expected to
@@ -1844,7 +1829,7 @@ err_phy_connect:
* a fixed-link, a phy-handle, or a managed = "in-band-status" property.
* It becomes the responsibility of the driver to ensure that these ports
* operate at the maximum speed (whatever this means) and will interoperate
- * with the DSA master or other cascade port, since phylink methods will not be
+ * with the DSA conduit or other cascade port, since phylink methods will not be
* invoked for them.
*
* If you are considering expanding this table for newly introduced switches,
@@ -2024,7 +2009,8 @@ void dsa_shared_port_link_unregister_of(struct dsa_port *dp)
dsa_shared_port_setup_phy_of(dp, false);
}
-int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr)
+int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr,
+ struct netlink_ext_ack *extack)
{
struct dsa_switch *ds = dp->ds;
int err;
@@ -2034,7 +2020,7 @@ int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr)
dp->hsr_dev = hsr;
- err = ds->ops->port_hsr_join(ds, dp->index, hsr);
+ err = ds->ops->port_hsr_join(ds, dp->index, hsr, extack);
if (err)
dp->hsr_dev = NULL;
diff --git a/net/dsa/port.h b/net/dsa/port.h
index dc812512fd0e..6bc3291573c0 100644
--- a/net/dsa/port.h
+++ b/net/dsa/port.h
@@ -103,12 +103,13 @@ int dsa_port_phylink_create(struct dsa_port *dp);
void dsa_port_phylink_destroy(struct dsa_port *dp);
int dsa_shared_port_link_register_of(struct dsa_port *dp);
void dsa_shared_port_link_unregister_of(struct dsa_port *dp);
-int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
+int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr,
+ struct netlink_ext_ack *extack);
void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast);
void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast);
void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc);
-int dsa_port_change_master(struct dsa_port *dp, struct net_device *master,
- struct netlink_ext_ack *extack);
+int dsa_port_change_conduit(struct dsa_port *dp, struct net_device *conduit,
+ struct netlink_ext_ack *extack);
#endif
diff --git a/net/dsa/slave.h b/net/dsa/slave.h
deleted file mode 100644
index d0abe609e00d..000000000000
--- a/net/dsa/slave.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-
-#ifndef __DSA_SLAVE_H
-#define __DSA_SLAVE_H
-
-#include <linux/if_bridge.h>
-#include <linux/if_vlan.h>
-#include <linux/list.h>
-#include <linux/netpoll.h>
-#include <linux/types.h>
-#include <net/dsa.h>
-#include <net/gro_cells.h>
-
-struct net_device;
-struct netlink_ext_ack;
-
-extern struct notifier_block dsa_slave_switchdev_notifier;
-extern struct notifier_block dsa_slave_switchdev_blocking_notifier;
-
-struct dsa_slave_priv {
- /* Copy of CPU port xmit for faster access in slave transmit hot path */
- struct sk_buff * (*xmit)(struct sk_buff *skb,
- struct net_device *dev);
-
- struct gro_cells gcells;
-
- /* DSA port data, such as switch, port index, etc. */
- struct dsa_port *dp;
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
- struct netpoll *netpoll;
-#endif
-
- /* TC context */
- struct list_head mall_tc_list;
-};
-
-void dsa_slave_mii_bus_init(struct dsa_switch *ds);
-int dsa_slave_create(struct dsa_port *dp);
-void dsa_slave_destroy(struct net_device *slave_dev);
-int dsa_slave_suspend(struct net_device *slave_dev);
-int dsa_slave_resume(struct net_device *slave_dev);
-int dsa_slave_register_notifier(void);
-void dsa_slave_unregister_notifier(void);
-void dsa_slave_sync_ha(struct net_device *dev);
-void dsa_slave_unsync_ha(struct net_device *dev);
-void dsa_slave_setup_tagger(struct net_device *slave);
-int dsa_slave_change_mtu(struct net_device *dev, int new_mtu);
-int dsa_slave_change_master(struct net_device *dev, struct net_device *master,
- struct netlink_ext_ack *extack);
-int dsa_slave_manage_vlan_filtering(struct net_device *dev,
- bool vlan_filtering);
-
-static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- return p->dp;
-}
-
-static inline struct net_device *
-dsa_slave_to_master(const struct net_device *dev)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
-
- return dsa_port_to_master(dp);
-}
-
-#endif
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 1a42f9317334..3d2feeea897b 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -15,10 +15,10 @@
#include "dsa.h"
#include "netlink.h"
#include "port.h"
-#include "slave.h"
#include "switch.h"
#include "tag_8021q.h"
#include "trace.h"
+#include "user.h"
static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds,
unsigned int ageing_time)
@@ -894,12 +894,12 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds,
* bits that depend on the tagger, such as the MTU.
*/
dsa_switch_for_each_user_port(dp, ds) {
- struct net_device *slave = dp->slave;
+ struct net_device *user = dp->user;
- dsa_slave_setup_tagger(slave);
+ dsa_user_setup_tagger(user);
/* rtnl_mutex is held in dsa_tree_change_tag_proto */
- dsa_slave_change_mtu(slave, slave->mtu);
+ dsa_user_change_mtu(user, user->mtu);
}
return 0;
@@ -960,13 +960,13 @@ dsa_switch_disconnect_tag_proto(struct dsa_switch *ds,
}
static int
-dsa_switch_master_state_change(struct dsa_switch *ds,
- struct dsa_notifier_master_state_info *info)
+dsa_switch_conduit_state_change(struct dsa_switch *ds,
+ struct dsa_notifier_conduit_state_info *info)
{
- if (!ds->ops->master_state_change)
+ if (!ds->ops->conduit_state_change)
return 0;
- ds->ops->master_state_change(ds, info->master, info->operational);
+ ds->ops->conduit_state_change(ds, info->conduit, info->operational);
return 0;
}
@@ -1056,8 +1056,8 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_TAG_8021Q_VLAN_DEL:
err = dsa_switch_tag_8021q_vlan_del(ds, info);
break;
- case DSA_NOTIFIER_MASTER_STATE_CHANGE:
- err = dsa_switch_master_state_change(ds, info);
+ case DSA_NOTIFIER_CONDUIT_STATE_CHANGE:
+ err = dsa_switch_conduit_state_change(ds, info);
break;
default:
err = -EOPNOTSUPP;
diff --git a/net/dsa/switch.h b/net/dsa/switch.h
index ea034677da15..be0a2749cd97 100644
--- a/net/dsa/switch.h
+++ b/net/dsa/switch.h
@@ -34,7 +34,7 @@ enum {
DSA_NOTIFIER_TAG_PROTO_DISCONNECT,
DSA_NOTIFIER_TAG_8021Q_VLAN_ADD,
DSA_NOTIFIER_TAG_8021Q_VLAN_DEL,
- DSA_NOTIFIER_MASTER_STATE_CHANGE,
+ DSA_NOTIFIER_CONDUIT_STATE_CHANGE,
};
/* DSA_NOTIFIER_AGEING_TIME */
@@ -105,9 +105,9 @@ struct dsa_notifier_tag_8021q_vlan_info {
u16 vid;
};
-/* DSA_NOTIFIER_MASTER_STATE_CHANGE */
-struct dsa_notifier_master_state_info {
- const struct net_device *master;
+/* DSA_NOTIFIER_CONDUIT_STATE_CHANGE */
+struct dsa_notifier_conduit_state_info {
+ const struct net_device *conduit;
bool operational;
};
diff --git a/net/dsa/tag.c b/net/dsa/tag.c
index 5105a5ff58fa..6e402d49afd3 100644
--- a/net/dsa/tag.c
+++ b/net/dsa/tag.c
@@ -13,8 +13,8 @@
#include <net/dsa.h>
#include <net/dst_metadata.h>
-#include "slave.h"
#include "tag.h"
+#include "user.h"
static LIST_HEAD(dsa_tag_drivers_list);
static DEFINE_MUTEX(dsa_tag_drivers_lock);
@@ -27,7 +27,7 @@ static DEFINE_MUTEX(dsa_tag_drivers_lock);
* switch, the DSA driver owning the interface to which the packet is
* delivered is never notified unless we do so here.
*/
-static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p,
+static bool dsa_skb_defer_rx_timestamp(struct dsa_user_priv *p,
struct sk_buff *skb)
{
struct dsa_switch *ds = p->dp->ds;
@@ -57,7 +57,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
struct metadata_dst *md_dst = skb_metadata_dst(skb);
struct dsa_port *cpu_dp = dev->dsa_ptr;
struct sk_buff *nskb = NULL;
- struct dsa_slave_priv *p;
+ struct dsa_user_priv *p;
if (unlikely(!cpu_dp)) {
kfree_skb(skb);
@@ -75,7 +75,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
if (!skb_has_extensions(skb))
skb->slow_gro = 0;
- skb->dev = dsa_master_find_slave(dev, 0, port);
+ skb->dev = dsa_conduit_find_user(dev, 0, port);
if (likely(skb->dev)) {
dsa_default_offload_fwd_mark(skb);
nskb = skb;
@@ -94,7 +94,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
- if (unlikely(!dsa_slave_dev_check(skb->dev))) {
+ if (unlikely(!dsa_user_dev_check(skb->dev))) {
/* Packet is to be injected directly on an upper
* device, e.g. a team/bond, so skip all DSA-port
* specific actions.
diff --git a/net/dsa/tag.h b/net/dsa/tag.h
index 32d12f4a9d73..f6b9c73718df 100644
--- a/net/dsa/tag.h
+++ b/net/dsa/tag.h
@@ -9,7 +9,7 @@
#include <net/dsa.h>
#include "port.h"
-#include "slave.h"
+#include "user.h"
struct dsa_tag_driver {
const struct dsa_device_ops *ops;
@@ -29,7 +29,7 @@ static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops)
return ops->needed_headroom + ops->needed_tailroom;
}
-static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
+static inline struct net_device *dsa_conduit_find_user(struct net_device *dev,
int device, int port)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
@@ -39,7 +39,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
list_for_each_entry(dp, &dst->ports, list)
if (dp->ds->index == device && dp->index == port &&
dp->type == DSA_PORT_TYPE_USER)
- return dp->slave;
+ return dp->user;
return NULL;
}
@@ -49,7 +49,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
*/
static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb)
{
- struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+ struct dsa_port *dp = dsa_user_to_port(skb->dev);
struct net_device *br = dsa_port_bridge_dev_get(dp);
struct net_device *dev = skb->dev;
struct net_device *upper_dev;
@@ -107,12 +107,12 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb)
* to support termination through the bridge.
*/
static inline struct net_device *
-dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid)
+dsa_find_designated_bridge_port_by_vid(struct net_device *conduit, u16 vid)
{
- struct dsa_port *cpu_dp = master->dsa_ptr;
+ struct dsa_port *cpu_dp = conduit->dsa_ptr;
struct dsa_switch_tree *dst = cpu_dp->dst;
struct bridge_vlan_info vinfo;
- struct net_device *slave;
+ struct net_device *user;
struct dsa_port *dp;
int err;
@@ -134,13 +134,13 @@ dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid)
if (dp->cpu_dp != cpu_dp)
continue;
- slave = dp->slave;
+ user = dp->user;
- err = br_vlan_get_info_rcu(slave, vid, &vinfo);
+ err = br_vlan_get_info_rcu(user, vid, &vinfo);
if (err)
continue;
- return slave;
+ return user;
}
return NULL;
@@ -155,7 +155,7 @@ dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid)
*/
static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb)
{
- struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+ struct dsa_port *dp = dsa_user_to_port(skb->dev);
skb->offload_fwd_mark = !!(dp->bridge);
}
@@ -215,9 +215,9 @@ static inline void dsa_alloc_etype_header(struct sk_buff *skb, int len)
memmove(skb->data, skb->data + len, 2 * ETH_ALEN);
}
-/* On RX, eth_type_trans() on the DSA master pulls ETH_HLEN bytes starting from
+/* On RX, eth_type_trans() on the DSA conduit pulls ETH_HLEN bytes starting from
* skb_mac_header(skb), which leaves skb->data pointing at the first byte after
- * what the DSA master perceives as the EtherType (the beginning of the L3
+ * what the DSA conduit perceives as the EtherType (the beginning of the L3
* protocol). Since DSA EtherType header taggers treat the EtherType as part of
* the DSA tag itself, and the EtherType is 2 bytes in length, the DSA header
* is located 2 bytes behind skb->data. Note that EtherType in this context
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index cbdfc392f7e0..71b26ae6db39 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -73,7 +73,7 @@ struct dsa_tag_8021q_vlan {
struct dsa_8021q_context {
struct dsa_switch *ds;
struct list_head vlans;
- /* EtherType of RX VID, used for filtering on master interface */
+ /* EtherType of RX VID, used for filtering on conduit interface */
__be16 proto;
};
@@ -338,7 +338,7 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port)
struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
struct dsa_port *dp = dsa_to_port(ds, port);
u16 vid = dsa_tag_8021q_standalone_vid(dp);
- struct net_device *master;
+ struct net_device *conduit;
int err;
/* The CPU port is implicitly configured by
@@ -347,7 +347,7 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port)
if (!dsa_port_is_user(dp))
return 0;
- master = dsa_port_to_master(dp);
+ conduit = dsa_port_to_conduit(dp);
err = dsa_port_tag_8021q_vlan_add(dp, vid, false);
if (err) {
@@ -357,8 +357,8 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port)
return err;
}
- /* Add the VLAN to the master's RX filter. */
- vlan_vid_add(master, ctx->proto, vid);
+ /* Add the VLAN to the conduit's RX filter. */
+ vlan_vid_add(conduit, ctx->proto, vid);
return err;
}
@@ -368,7 +368,7 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port)
struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
struct dsa_port *dp = dsa_to_port(ds, port);
u16 vid = dsa_tag_8021q_standalone_vid(dp);
- struct net_device *master;
+ struct net_device *conduit;
/* The CPU port is implicitly configured by
* configuring the front-panel ports
@@ -376,11 +376,11 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port)
if (!dsa_port_is_user(dp))
return;
- master = dsa_port_to_master(dp);
+ conduit = dsa_port_to_conduit(dp);
dsa_port_tag_8021q_vlan_del(dp, vid, false);
- vlan_vid_del(master, ctx->proto, vid);
+ vlan_vid_del(conduit, ctx->proto, vid);
}
static int dsa_tag_8021q_setup(struct dsa_switch *ds)
@@ -468,10 +468,10 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
}
EXPORT_SYMBOL_GPL(dsa_8021q_xmit);
-struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master,
+struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit,
int vbid)
{
- struct dsa_port *cpu_dp = master->dsa_ptr;
+ struct dsa_port *cpu_dp = conduit->dsa_ptr;
struct dsa_switch_tree *dst = cpu_dp->dst;
struct dsa_port *dp;
@@ -490,7 +490,7 @@ struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master,
continue;
if (dsa_port_bridge_num_get(dp) == vbid)
- return dp->slave;
+ return dp->user;
}
return NULL;
diff --git a/net/dsa/tag_8021q.h b/net/dsa/tag_8021q.h
index b75cbaa028ef..41f7167ac520 100644
--- a/net/dsa/tag_8021q.h
+++ b/net/dsa/tag_8021q.h
@@ -16,7 +16,7 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
int *vbid);
-struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master,
+struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit,
int vbid);
int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c
index 7f3b7d730b85..cbb588ca73aa 100644
--- a/net/dsa/tag_ar9331.c
+++ b/net/dsa/tag_ar9331.c
@@ -29,7 +29,7 @@
static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
__le16 *phdr;
u16 hdr;
@@ -74,7 +74,7 @@ static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb,
/* Get source port information */
port = FIELD_GET(AR9331_HDR_PORT_NUM_MASK, hdr);
- skb->dev = dsa_master_find_slave(ndev, 0, port);
+ skb->dev = dsa_conduit_find_user(ndev, 0, port);
if (!skb->dev)
return NULL;
@@ -89,6 +89,7 @@ static const struct dsa_device_ops ar9331_netdev_ops = {
.needed_headroom = AR9331_HDR_LEN,
};
+MODULE_DESCRIPTION("DSA tag driver for Atheros AR9331 SoC with built-in switch");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_AR9331, AR9331_NAME);
module_dsa_tag_driver(ar9331_netdev_ops);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index cacdafb41200..8c3c068728e5 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -85,7 +85,7 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
struct net_device *dev,
unsigned int offset)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
u16 queue = skb_get_queue_mapping(skb);
u8 *brcm_tag;
@@ -96,7 +96,7 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
* (including FCS and tag) because the length verification is done after
* the Broadcom tag is stripped off the ingress packet.
*
- * Let dsa_slave_xmit() free the SKB
+ * Let dsa_user_xmit() free the SKB
*/
if (__skb_put_padto(skb, ETH_ZLEN + BRCM_TAG_LEN, false))
return NULL;
@@ -119,7 +119,7 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
brcm_tag[2] = BRCM_IG_DSTMAP2_MASK;
brcm_tag[3] = (1 << dp->index) & BRCM_IG_DSTMAP1_MASK;
- /* Now tell the master network device about the desired output queue
+ /* Now tell the conduit network device about the desired output queue
* as well
*/
skb_set_queue_mapping(skb, BRCM_TAG_SET_PORT_QUEUE(dp->index, queue));
@@ -164,7 +164,7 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
/* Locate which port this is coming from */
source_port = brcm_tag[3] & BRCM_EG_PID_MASK;
- skb->dev = dsa_master_find_slave(dev, 0, source_port);
+ skb->dev = dsa_conduit_find_user(dev, 0, source_port);
if (!skb->dev)
return NULL;
@@ -216,7 +216,7 @@ MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM, BRCM_NAME);
static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
u8 *brcm_tag;
/* The Ethernet switch we are interfaced with needs packets to be at
@@ -226,7 +226,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
* (including FCS and tag) because the length verification is done after
* the Broadcom tag is stripped off the ingress packet.
*
- * Let dsa_slave_xmit() free the SKB
+ * Let dsa_user_xmit() free the SKB
*/
if (__skb_put_padto(skb, ETH_ZLEN + BRCM_LEG_TAG_LEN, false))
return NULL;
@@ -264,7 +264,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
source_port = brcm_tag[5] & BRCM_LEG_PORT_ID;
- skb->dev = dsa_master_find_slave(dev, 0, source_port);
+ skb->dev = dsa_conduit_find_user(dev, 0, source_port);
if (!skb->dev)
return NULL;
@@ -335,4 +335,5 @@ static struct dsa_tag_driver *dsa_tag_driver_array[] = {
module_dsa_tag_drivers(dsa_tag_driver_array);
+MODULE_DESCRIPTION("DSA tag driver for Broadcom switches using in-frame headers");
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 1fd7fa26db64..2a2c4fb61a65 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -129,7 +129,7 @@ enum dsa_code {
static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
u8 extra)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct net_device *br_dev;
u8 tag_dev, tag_port;
enum dsa_cmd cmd;
@@ -267,14 +267,14 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
lag = dsa_lag_by_id(cpu_dp->dst, source_port + 1);
skb->dev = lag ? lag->dev : NULL;
} else {
- skb->dev = dsa_master_find_slave(dev, source_device,
+ skb->dev = dsa_conduit_find_user(dev, source_device,
source_port);
}
if (!skb->dev)
return NULL;
- /* When using LAG offload, skb->dev is not a DSA slave interface,
+ /* When using LAG offload, skb->dev is not a DSA user interface,
* so we cannot call dsa_default_offload_fwd_mark and we need to
* special-case it.
*/
@@ -406,4 +406,5 @@ static struct dsa_tag_driver *dsa_tag_drivers[] = {
module_dsa_tag_drivers(dsa_tag_drivers);
+MODULE_DESCRIPTION("DSA tag driver for Marvell switches using DSA headers");
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c
index e279cd9057b0..51a1f46a567f 100644
--- a/net/dsa/tag_gswip.c
+++ b/net/dsa/tag_gswip.c
@@ -61,7 +61,7 @@
static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
u8 *gswip_tag;
skb_push(skb, GSWIP_TX_HEADER_LEN);
@@ -89,7 +89,7 @@ static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb,
/* Get source port information */
port = (gswip_tag[7] & GSWIP_RX_SPPID_MASK) >> GSWIP_RX_SPPID_SHIFT;
- skb->dev = dsa_master_find_slave(dev, 0, port);
+ skb->dev = dsa_conduit_find_user(dev, 0, port);
if (!skb->dev)
return NULL;
@@ -107,6 +107,7 @@ static const struct dsa_device_ops gswip_netdev_ops = {
.needed_headroom = GSWIP_RX_HEADER_LEN,
};
+MODULE_DESCRIPTION("DSA tag driver for Lantiq / Intel GSWIP switches");
MODULE_LICENSE("GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_GSWIP, GSWIP_NAME);
diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c
index 03a1fb9c87a9..663b25785d95 100644
--- a/net/dsa/tag_hellcreek.c
+++ b/net/dsa/tag_hellcreek.c
@@ -20,7 +20,7 @@
static struct sk_buff *hellcreek_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
u8 *tag;
/* Calculate checksums (if required) before adding the trailer tag to
@@ -45,7 +45,7 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
u8 *tag = skb_tail_pointer(skb) - HELLCREEK_TAG_LEN;
unsigned int port = tag[0] & 0x03;
- skb->dev = dsa_master_find_slave(dev, 0, port);
+ skb->dev = dsa_conduit_find_user(dev, 0, port);
if (!skb->dev) {
netdev_warn_once(dev, "Failed to get source port: %d\n", port);
return NULL;
@@ -67,6 +67,7 @@ static const struct dsa_device_ops hellcreek_netdev_ops = {
.needed_tailroom = HELLCREEK_TAG_LEN,
};
+MODULE_DESCRIPTION("DSA tag driver for Hirschmann Hellcreek TSN switches");
MODULE_LICENSE("Dual MIT/GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_HELLCREEK, HELLCREEK_NAME);
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index ea100bd25939..ee7b272ab715 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -87,7 +87,7 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
struct net_device *dev,
unsigned int port, unsigned int len)
{
- skb->dev = dsa_master_find_slave(dev, 0, port);
+ skb->dev = dsa_conduit_find_user(dev, 0, port);
if (!skb->dev)
return NULL;
@@ -119,7 +119,7 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct ethhdr *hdr;
u8 *tag;
@@ -256,7 +256,7 @@ static struct sk_buff *ksz_defer_xmit(struct dsa_port *dp, struct sk_buff *skb)
return NULL;
kthread_init_work(&xmit_work->work, xmit_work_fn);
- /* Increase refcount so the kfree_skb in dsa_slave_xmit
+ /* Increase refcount so the kfree_skb in dsa_user_xmit
* won't really free the packet.
*/
xmit_work->dp = dp;
@@ -272,7 +272,7 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
{
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 prio = netdev_txq_to_tc(dev, queue_mapping);
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct ethhdr *hdr;
__be16 *tag;
u16 val;
@@ -293,6 +293,14 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
if (is_link_local_ether_addr(hdr->h_dest))
val |= KSZ9477_TAIL_TAG_OVERRIDE;
+ if (dev->features & NETIF_F_HW_HSR_DUP) {
+ struct net_device *hsr_dev = dp->hsr_dev;
+ struct dsa_port *other_dp;
+
+ dsa_hsr_foreach_port(other_dp, dp->ds, hsr_dev)
+ val |= BIT(other_dp->index);
+ }
+
*tag = cpu_to_be16(val);
return ksz_defer_xmit(dp, skb);
@@ -336,7 +344,7 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb,
{
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 prio = netdev_txq_to_tc(dev, queue_mapping);
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct ethhdr *hdr;
u8 *tag;
@@ -402,7 +410,7 @@ static struct sk_buff *lan937x_xmit(struct sk_buff *skb,
{
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 prio = netdev_txq_to_tc(dev, queue_mapping);
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
const struct ethhdr *hdr = eth_hdr(skb);
__be16 *tag;
u16 val;
@@ -451,4 +459,5 @@ static struct dsa_tag_driver *dsa_tag_driver_array[] = {
module_dsa_tag_drivers(dsa_tag_driver_array);
+MODULE_DESCRIPTION("DSA tag driver for Microchip 8795/937x/9477/9893 families of switches");
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index c25f5536706b..258e5d7dc5ef 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -56,7 +56,7 @@ static int lan9303_xmit_use_arl(struct dsa_port *dp, u8 *dest_addr)
static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
__be16 *lan9303_tag;
u16 tag;
@@ -99,7 +99,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
source_port = lan9303_tag1 & 0x3;
- skb->dev = dsa_master_find_slave(dev, 0, source_port);
+ skb->dev = dsa_conduit_find_user(dev, 0, source_port);
if (!skb->dev) {
dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n");
return NULL;
@@ -119,6 +119,7 @@ static const struct dsa_device_ops lan9303_netdev_ops = {
.needed_headroom = LAN9303_TAG_LEN,
};
+MODULE_DESCRIPTION("DSA tag driver for SMSC/Microchip LAN9303 family of switches");
MODULE_LICENSE("GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_LAN9303, LAN9303_NAME);
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 40af80452747..b670e3c53e91 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -23,7 +23,7 @@
static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
u8 xmit_tpid;
u8 *mtk_tag;
@@ -85,7 +85,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev)
/* Get source port information */
port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
- skb->dev = dsa_master_find_slave(dev, 0, port);
+ skb->dev = dsa_conduit_find_user(dev, 0, port);
if (!skb->dev)
return NULL;
@@ -102,6 +102,7 @@ static const struct dsa_device_ops mtk_netdev_ops = {
.needed_headroom = MTK_HDR_LEN,
};
+MODULE_DESCRIPTION("DSA tag driver for Mediatek switches");
MODULE_LICENSE("GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_MTK, MTK_NAME);
diff --git a/net/dsa/tag_none.c b/net/dsa/tag_none.c
index d2fd179c4227..e9c9670a9c44 100644
--- a/net/dsa/tag_none.c
+++ b/net/dsa/tag_none.c
@@ -12,8 +12,8 @@
#define NONE_NAME "none"
-static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
- struct net_device *dev)
+static struct sk_buff *dsa_user_notag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
{
/* Just return the original SKB */
return skb;
@@ -22,9 +22,10 @@ static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
static const struct dsa_device_ops none_ops = {
.name = NONE_NAME,
.proto = DSA_TAG_PROTO_NONE,
- .xmit = dsa_slave_notag_xmit,
+ .xmit = dsa_user_notag_xmit,
};
module_dsa_tag_driver(none_ops);
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_NONE, NONE_NAME);
+MODULE_DESCRIPTION("DSA no-op tag driver");
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 20bf7074d5a6..e0e4300bfbd3 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -45,7 +45,7 @@ static void ocelot_xmit_get_vlan_info(struct sk_buff *skb, struct dsa_port *dp,
static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,
__be32 ifh_prefix, void **ifh)
{
- struct dsa_port *dp = dsa_slave_to_port(netdev);
+ struct dsa_port *dp = dsa_user_to_port(netdev);
struct dsa_switch *ds = dp->ds;
u64 vlan_tci, tag_type;
void *injection;
@@ -79,7 +79,7 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,
static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
- struct dsa_port *dp = dsa_slave_to_port(netdev);
+ struct dsa_port *dp = dsa_user_to_port(netdev);
void *injection;
ocelot_xmit_common(skb, netdev, cpu_to_be32(0x8880000a), &injection);
@@ -91,7 +91,7 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
static struct sk_buff *seville_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
- struct dsa_port *dp = dsa_slave_to_port(netdev);
+ struct dsa_port *dp = dsa_user_to_port(netdev);
void *injection;
ocelot_xmit_common(skb, netdev, cpu_to_be32(0x88800005), &injection);
@@ -111,12 +111,12 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
u16 vlan_tpid;
u64 rew_val;
- /* Revert skb->data by the amount consumed by the DSA master,
+ /* Revert skb->data by the amount consumed by the DSA conduit,
* so it points to the beginning of the frame.
*/
skb_push(skb, ETH_HLEN);
/* We don't care about the short prefix, it is just for easy entrance
- * into the DSA master's RX filter. Discard it now by moving it into
+ * into the DSA conduit's RX filter. Discard it now by moving it into
* the headroom.
*/
skb_pull(skb, OCELOT_SHORT_PREFIX_LEN);
@@ -141,12 +141,12 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
ocelot_xfh_get_vlan_tci(extraction, &vlan_tci);
ocelot_xfh_get_rew_val(extraction, &rew_val);
- skb->dev = dsa_master_find_slave(netdev, 0, src_port);
+ skb->dev = dsa_conduit_find_user(netdev, 0, src_port);
if (!skb->dev)
/* The switch will reflect back some frames sent through
- * sockets opened on the bare DSA master. These will come back
+ * sockets opened on the bare DSA conduit. These will come back
* with src_port equal to the index of the CPU port, for which
- * there is no slave registered. So don't print any error
+ * there is no user registered. So don't print any error
* message here (ignore and drop those frames).
*/
return NULL;
@@ -170,7 +170,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
* equal to the pvid of the ingress port and should not be used for
* processing.
*/
- dp = dsa_slave_to_port(skb->dev);
+ dp = dsa_user_to_port(skb->dev);
vlan_tpid = tag_type ? ETH_P_8021AD : ETH_P_8021Q;
if (dsa_port_is_vlan_filtering(dp) &&
@@ -192,7 +192,7 @@ static const struct dsa_device_ops ocelot_netdev_ops = {
.xmit = ocelot_xmit,
.rcv = ocelot_rcv,
.needed_headroom = OCELOT_TOTAL_TAG_LEN,
- .promisc_on_master = true,
+ .promisc_on_conduit = true,
};
DSA_TAG_DRIVER(ocelot_netdev_ops);
@@ -204,7 +204,7 @@ static const struct dsa_device_ops seville_netdev_ops = {
.xmit = seville_xmit,
.rcv = ocelot_rcv,
.needed_headroom = OCELOT_TOTAL_TAG_LEN,
- .promisc_on_master = true,
+ .promisc_on_conduit = true,
};
DSA_TAG_DRIVER(seville_netdev_ops);
@@ -217,4 +217,5 @@ static struct dsa_tag_driver *ocelot_tag_driver_array[] = {
module_dsa_tag_drivers(ocelot_tag_driver_array);
+MODULE_DESCRIPTION("DSA tag driver for Ocelot family of switches, using NPI port");
MODULE_LICENSE("GPL v2");
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 1f0b8c20eba5..b059381310fe 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -37,8 +37,8 @@ static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp,
return NULL;
/* PTP over IP packets need UDP checksumming. We may have inherited
- * NETIF_F_HW_CSUM from the DSA master, but these packets are not sent
- * through the DSA master, so calculate the checksum here.
+ * NETIF_F_HW_CSUM from the DSA conduit, but these packets are not sent
+ * through the DSA conduit, so calculate the checksum here.
*/
if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))
return NULL;
@@ -49,7 +49,7 @@ static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp,
/* Calls felix_port_deferred_xmit in felix.c */
kthread_init_work(&xmit_work->work, xmit_work_fn);
- /* Increase refcount so the kfree_skb in dsa_slave_xmit
+ /* Increase refcount so the kfree_skb in dsa_user_xmit
* won't really free the packet.
*/
xmit_work->dp = dp;
@@ -63,7 +63,7 @@ static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp,
static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
- struct dsa_port *dp = dsa_slave_to_port(netdev);
+ struct dsa_port *dp = dsa_user_to_port(netdev);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
u16 tx_vid = dsa_tag_8021q_standalone_vid(dp);
@@ -83,7 +83,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
dsa_8021q_rcv(skb, &src_port, &switch_id, NULL);
- skb->dev = dsa_master_find_slave(netdev, switch_id, src_port);
+ skb->dev = dsa_conduit_find_user(netdev, switch_id, src_port);
if (!skb->dev)
return NULL;
@@ -130,9 +130,10 @@ static const struct dsa_device_ops ocelot_8021q_netdev_ops = {
.connect = ocelot_connect,
.disconnect = ocelot_disconnect,
.needed_headroom = VLAN_HLEN,
- .promisc_on_master = true,
+ .promisc_on_conduit = true,
};
+MODULE_DESCRIPTION("DSA tag driver for Ocelot family of switches, using VLAN");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_OCELOT_8021Q, OCELOT_8021Q_NAME);
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index e5ff7c34e577..0cf61286b426 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -14,7 +14,7 @@
static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
__be16 *phdr;
u16 hdr;
@@ -78,7 +78,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
/* Get source port information */
port = FIELD_GET(QCA_HDR_RECV_SOURCE_PORT, hdr);
- skb->dev = dsa_master_find_slave(dev, 0, port);
+ skb->dev = dsa_conduit_find_user(dev, 0, port);
if (!skb->dev)
return NULL;
@@ -116,9 +116,10 @@ static const struct dsa_device_ops qca_netdev_ops = {
.xmit = qca_tag_xmit,
.rcv = qca_tag_rcv,
.needed_headroom = QCA_HDR_LEN,
- .promisc_on_master = true,
+ .promisc_on_conduit = true,
};
+MODULE_DESCRIPTION("DSA tag driver for Qualcomm Atheros QCA8K switches");
MODULE_LICENSE("GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_QCA, QCA_NAME);
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index c327314b95e3..feaefa0e179b 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -23,7 +23,6 @@
#define RTL4_A_NAME "rtl4a"
#define RTL4_A_HDR_LEN 4
-#define RTL4_A_ETHERTYPE 0x8899
#define RTL4_A_PROTOCOL_SHIFT 12
/*
* 0x1 = Realtek Remote Control protocol (RRCP)
@@ -36,7 +35,7 @@
static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
__be16 *p;
u8 *tag;
u16 out;
@@ -54,7 +53,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
/* Set Ethertype */
p = (__be16 *)tag;
- *p = htons(RTL4_A_ETHERTYPE);
+ *p = htons(ETH_P_REALTEK);
out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT);
/* The lower bits indicate the port number */
@@ -82,7 +81,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
tag = dsa_etype_header_pos_rx(skb);
p = (__be16 *)tag;
etype = ntohs(*p);
- if (etype != RTL4_A_ETHERTYPE) {
+ if (etype != ETH_P_REALTEK) {
/* Not custom, just pass through */
netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype);
return skb;
@@ -97,9 +96,9 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
}
port = protport & 0xff;
- skb->dev = dsa_master_find_slave(dev, 0, port);
+ skb->dev = dsa_conduit_find_user(dev, 0, port);
if (!skb->dev) {
- netdev_dbg(dev, "could not find slave for port %d\n", port);
+ netdev_dbg(dev, "could not find user for port %d\n", port);
return NULL;
}
@@ -122,5 +121,6 @@ static const struct dsa_device_ops rtl4a_netdev_ops = {
};
module_dsa_tag_driver(rtl4a_netdev_ops);
+MODULE_DESCRIPTION("DSA tag driver for Realtek 4 byte protocol A tags");
MODULE_LICENSE("GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL4_A, RTL4_A_NAME);
diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c
index 4f67834fd121..15c2bae2b429 100644
--- a/net/dsa/tag_rtl8_4.c
+++ b/net/dsa/tag_rtl8_4.c
@@ -103,7 +103,7 @@
static void rtl8_4_write_tag(struct sk_buff *skb, struct net_device *dev,
void *tag)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
__be16 tag16[RTL8_4_TAG_LEN / 2];
/* Set Realtek EtherType */
@@ -180,10 +180,10 @@ static int rtl8_4_read_tag(struct sk_buff *skb, struct net_device *dev,
/* Parse TX (switch->CPU) */
port = FIELD_GET(RTL8_4_TX, ntohs(tag16[3]));
- skb->dev = dsa_master_find_slave(dev, 0, port);
+ skb->dev = dsa_conduit_find_user(dev, 0, port);
if (!skb->dev) {
dev_warn_ratelimited(&dev->dev,
- "could not find slave for port %d\n",
+ "could not find user for port %d\n",
port);
return -ENOENT;
}
@@ -258,4 +258,5 @@ static struct dsa_tag_driver *dsa_tag_drivers[] = {
};
module_dsa_tag_drivers(dsa_tag_drivers);
+MODULE_DESCRIPTION("DSA tag driver for Realtek 8 byte protocol 4 tags");
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_rzn1_a5psw.c b/net/dsa/tag_rzn1_a5psw.c
index 437a6820ac42..69d51221b1e5 100644
--- a/net/dsa/tag_rzn1_a5psw.c
+++ b/net/dsa/tag_rzn1_a5psw.c
@@ -39,7 +39,7 @@ struct a5psw_tag {
static struct sk_buff *a5psw_tag_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct a5psw_tag *ptag;
u32 data2_val;
@@ -90,7 +90,7 @@ static struct sk_buff *a5psw_tag_rcv(struct sk_buff *skb,
port = FIELD_GET(A5PSW_CTRL_DATA_PORT, ntohs(tag->ctrl_data));
- skb->dev = dsa_master_find_slave(dev, 0, port);
+ skb->dev = dsa_conduit_find_user(dev, 0, port);
if (!skb->dev)
return NULL;
@@ -110,6 +110,7 @@ static const struct dsa_device_ops a5psw_netdev_ops = {
.needed_headroom = A5PSW_TAG_LEN,
};
+MODULE_DESCRIPTION("DSA tag driver for Renesas RZ/N1 A5PSW switch");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_A5PSW, A5PSW_NAME);
module_dsa_tag_driver(a5psw_netdev_ops);
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index ade3eeb2f3e6..2717e9d7b612 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -157,7 +157,7 @@ static struct sk_buff *sja1105_defer_xmit(struct dsa_port *dp,
return NULL;
kthread_init_work(&xmit_work->work, xmit_work_fn);
- /* Increase refcount so the kfree_skb in dsa_slave_xmit
+ /* Increase refcount so the kfree_skb in dsa_user_xmit
* won't really free the packet.
*/
xmit_work->dp = dp;
@@ -210,7 +210,7 @@ static u16 sja1105_xmit_tpid(struct dsa_port *dp)
static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
- struct dsa_port *dp = dsa_slave_to_port(netdev);
+ struct dsa_port *dp = dsa_user_to_port(netdev);
unsigned int bridge_num = dsa_port_bridge_num_get(dp);
struct net_device *br = dsa_port_bridge_dev_get(dp);
u16 tx_vid;
@@ -235,7 +235,7 @@ static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb,
/* Transform untagged control packets into pvid-tagged control packets so that
* all packets sent by this tagger are VLAN-tagged and we can configure the
- * switch to drop untagged packets coming from the DSA master.
+ * switch to drop untagged packets coming from the DSA conduit.
*/
static struct sk_buff *sja1105_pvid_tag_control_pkt(struct dsa_port *dp,
struct sk_buff *skb, u8 pcp)
@@ -266,7 +266,7 @@ static struct sk_buff *sja1105_pvid_tag_control_pkt(struct dsa_port *dp,
static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
- struct dsa_port *dp = dsa_slave_to_port(netdev);
+ struct dsa_port *dp = dsa_user_to_port(netdev);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
u16 tx_vid = dsa_tag_8021q_standalone_vid(dp);
@@ -294,7 +294,7 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone;
- struct dsa_port *dp = dsa_slave_to_port(netdev);
+ struct dsa_port *dp = dsa_user_to_port(netdev);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
u16 tx_vid = dsa_tag_8021q_standalone_vid(dp);
@@ -383,7 +383,7 @@ static struct sk_buff
* Buffer it until we get its meta frame.
*/
if (is_link_local) {
- struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+ struct dsa_port *dp = dsa_user_to_port(skb->dev);
struct sja1105_tagger_private *priv;
struct dsa_switch *ds = dp->ds;
@@ -396,7 +396,7 @@ static struct sk_buff
if (priv->stampable_skb) {
dev_err_ratelimited(ds->dev,
"Expected meta frame, is %12llx "
- "in the DSA master multicast filter?\n",
+ "in the DSA conduit multicast filter?\n",
SJA1105_META_DMAC);
kfree_skb(priv->stampable_skb);
}
@@ -417,7 +417,7 @@ static struct sk_buff
* frame, which serves no further purpose).
*/
} else if (is_meta) {
- struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+ struct dsa_port *dp = dsa_user_to_port(skb->dev);
struct sja1105_tagger_private *priv;
struct dsa_switch *ds = dp->ds;
struct sk_buff *stampable_skb;
@@ -550,7 +550,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
}
if (source_port != -1 && switch_id != -1)
- skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
+ skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port);
else if (vbid >= 1)
skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid);
else
@@ -573,16 +573,16 @@ static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header)
int switch_id = SJA1110_RX_HEADER_SWITCH_ID(rx_header);
int n_ts = SJA1110_RX_HEADER_N_TS(rx_header);
struct sja1105_tagger_data *tagger_data;
- struct net_device *master = skb->dev;
+ struct net_device *conduit = skb->dev;
struct dsa_port *cpu_dp;
struct dsa_switch *ds;
int i;
- cpu_dp = master->dsa_ptr;
+ cpu_dp = conduit->dsa_ptr;
ds = dsa_switch_find(cpu_dp->dst->index, switch_id);
if (!ds) {
net_err_ratelimited("%s: cannot find switch id %d\n",
- master->name, switch_id);
+ conduit->name, switch_id);
return NULL;
}
@@ -649,7 +649,7 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
/* skb->len counts from skb->data, while start_of_padding
* counts from the destination MAC address. Right now skb->data
- * is still as set by the DSA master, so to trim away the
+ * is still as set by the DSA conduit, so to trim away the
* padding and trailer we need to account for the fact that
* skb->data points to skb_mac_header(skb) + ETH_HLEN.
*/
@@ -698,7 +698,7 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
else if (source_port == -1 || switch_id == -1)
skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
else
- skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
+ skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port);
if (!skb->dev) {
netdev_warn(netdev, "Couldn't decode source port\n");
return NULL;
@@ -778,7 +778,7 @@ static const struct dsa_device_ops sja1105_netdev_ops = {
.disconnect = sja1105_disconnect,
.needed_headroom = VLAN_HLEN,
.flow_dissect = sja1105_flow_dissect,
- .promisc_on_master = true,
+ .promisc_on_conduit = true,
};
DSA_TAG_DRIVER(sja1105_netdev_ops);
@@ -806,4 +806,5 @@ static struct dsa_tag_driver *sja1105_tag_driver_array[] = {
module_dsa_tag_drivers(sja1105_tag_driver_array);
+MODULE_DESCRIPTION("DSA tag driver for NXP SJA1105 switches");
MODULE_LICENSE("GPL v2");
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 7361b9106382..22742a53d6f4 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -14,7 +14,7 @@
static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
u8 *trailer;
trailer = skb_put(skb, 4);
@@ -41,7 +41,7 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev)
source_port = trailer[1] & 7;
- skb->dev = dsa_master_find_slave(dev, 0, source_port);
+ skb->dev = dsa_conduit_find_user(dev, 0, source_port);
if (!skb->dev)
return NULL;
@@ -59,6 +59,7 @@ static const struct dsa_device_ops trailer_netdev_ops = {
.needed_tailroom = 4,
};
+MODULE_DESCRIPTION("DSA tag driver for switches using a trailer tag");
MODULE_LICENSE("GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_TRAILER, TRAILER_NAME);
diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c
index af19969f9bc4..68d4633ddd5e 100644
--- a/net/dsa/tag_xrs700x.c
+++ b/net/dsa/tag_xrs700x.c
@@ -13,7 +13,7 @@
static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_port *partner, *dp = dsa_slave_to_port(dev);
+ struct dsa_port *partner, *dp = dsa_user_to_port(dev);
u8 *trailer;
trailer = skb_put(skb, 1);
@@ -39,7 +39,7 @@ static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev)
if (source_port < 0)
return NULL;
- skb->dev = dsa_master_find_slave(dev, 0, source_port);
+ skb->dev = dsa_conduit_find_user(dev, 0, source_port);
if (!skb->dev)
return NULL;
@@ -60,6 +60,7 @@ static const struct dsa_device_ops xrs700x_netdev_ops = {
.needed_tailroom = 1,
};
+MODULE_DESCRIPTION("DSA tag driver for XRS700x switches");
MODULE_LICENSE("GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_XRS700X, XRS700X_NAME);
diff --git a/net/dsa/slave.c b/net/dsa/user.c
index 48db91b33390..b15e71cc342c 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/user.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * net/dsa/slave.c - Slave device handling
+ * net/dsa/user.c - user device handling
* Copyright (c) 2008-2009 Marvell Semiconductor
*/
@@ -23,13 +23,13 @@
#include <linux/netpoll.h>
#include <linux/string.h>
+#include "conduit.h"
#include "dsa.h"
-#include "port.h"
-#include "master.h"
#include "netlink.h"
-#include "slave.h"
+#include "port.h"
#include "switch.h"
#include "tag.h"
+#include "user.h"
struct dsa_switchdev_event_work {
struct net_device *dev;
@@ -79,13 +79,13 @@ static bool dsa_switch_supports_mc_filtering(struct dsa_switch *ds)
!ds->needs_standalone_vlan_filtering;
}
-static void dsa_slave_standalone_event_work(struct work_struct *work)
+static void dsa_user_standalone_event_work(struct work_struct *work)
{
struct dsa_standalone_event_work *standalone_work =
container_of(work, struct dsa_standalone_event_work, work);
const unsigned char *addr = standalone_work->addr;
struct net_device *dev = standalone_work->dev;
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct switchdev_obj_port_mdb mdb;
struct dsa_switch *ds = dp->ds;
u16 vid = standalone_work->vid;
@@ -140,10 +140,10 @@ static void dsa_slave_standalone_event_work(struct work_struct *work)
kfree(standalone_work);
}
-static int dsa_slave_schedule_standalone_work(struct net_device *dev,
- enum dsa_standalone_event event,
- const unsigned char *addr,
- u16 vid)
+static int dsa_user_schedule_standalone_work(struct net_device *dev,
+ enum dsa_standalone_event event,
+ const unsigned char *addr,
+ u16 vid)
{
struct dsa_standalone_event_work *standalone_work;
@@ -151,7 +151,7 @@ static int dsa_slave_schedule_standalone_work(struct net_device *dev,
if (!standalone_work)
return -ENOMEM;
- INIT_WORK(&standalone_work->work, dsa_slave_standalone_event_work);
+ INIT_WORK(&standalone_work->work, dsa_user_standalone_event_work);
standalone_work->event = event;
standalone_work->dev = dev;
@@ -163,18 +163,18 @@ static int dsa_slave_schedule_standalone_work(struct net_device *dev,
return 0;
}
-static int dsa_slave_host_vlan_rx_filtering(void *arg, int vid)
+static int dsa_user_host_vlan_rx_filtering(void *arg, int vid)
{
struct dsa_host_vlan_rx_filtering_ctx *ctx = arg;
- return dsa_slave_schedule_standalone_work(ctx->dev, ctx->event,
+ return dsa_user_schedule_standalone_work(ctx->dev, ctx->event,
ctx->addr, vid);
}
-static int dsa_slave_vlan_for_each(struct net_device *dev,
- int (*cb)(void *arg, int vid), void *arg)
+static int dsa_user_vlan_for_each(struct net_device *dev,
+ int (*cb)(void *arg, int vid), void *arg)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_vlan *v;
int err;
@@ -193,99 +193,99 @@ static int dsa_slave_vlan_for_each(struct net_device *dev,
return 0;
}
-static int dsa_slave_sync_uc(struct net_device *dev,
- const unsigned char *addr)
+static int dsa_user_sync_uc(struct net_device *dev,
+ const unsigned char *addr)
{
- struct net_device *master = dsa_slave_to_master(dev);
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct net_device *conduit = dsa_user_to_conduit(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_host_vlan_rx_filtering_ctx ctx = {
.dev = dev,
.addr = addr,
.event = DSA_UC_ADD,
};
- dev_uc_add(master, addr);
+ dev_uc_add(conduit, addr);
if (!dsa_switch_supports_uc_filtering(dp->ds))
return 0;
- return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering,
+ return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
&ctx);
}
-static int dsa_slave_unsync_uc(struct net_device *dev,
- const unsigned char *addr)
+static int dsa_user_unsync_uc(struct net_device *dev,
+ const unsigned char *addr)
{
- struct net_device *master = dsa_slave_to_master(dev);
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct net_device *conduit = dsa_user_to_conduit(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_host_vlan_rx_filtering_ctx ctx = {
.dev = dev,
.addr = addr,
.event = DSA_UC_DEL,
};
- dev_uc_del(master, addr);
+ dev_uc_del(conduit, addr);
if (!dsa_switch_supports_uc_filtering(dp->ds))
return 0;
- return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering,
+ return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
&ctx);
}
-static int dsa_slave_sync_mc(struct net_device *dev,
- const unsigned char *addr)
+static int dsa_user_sync_mc(struct net_device *dev,
+ const unsigned char *addr)
{
- struct net_device *master = dsa_slave_to_master(dev);
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct net_device *conduit = dsa_user_to_conduit(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_host_vlan_rx_filtering_ctx ctx = {
.dev = dev,
.addr = addr,
.event = DSA_MC_ADD,
};
- dev_mc_add(master, addr);
+ dev_mc_add(conduit, addr);
if (!dsa_switch_supports_mc_filtering(dp->ds))
return 0;
- return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering,
+ return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
&ctx);
}
-static int dsa_slave_unsync_mc(struct net_device *dev,
- const unsigned char *addr)
+static int dsa_user_unsync_mc(struct net_device *dev,
+ const unsigned char *addr)
{
- struct net_device *master = dsa_slave_to_master(dev);
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct net_device *conduit = dsa_user_to_conduit(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_host_vlan_rx_filtering_ctx ctx = {
.dev = dev,
.addr = addr,
.event = DSA_MC_DEL,
};
- dev_mc_del(master, addr);
+ dev_mc_del(conduit, addr);
if (!dsa_switch_supports_mc_filtering(dp->ds))
return 0;
- return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering,
+ return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering,
&ctx);
}
-void dsa_slave_sync_ha(struct net_device *dev)
+void dsa_user_sync_ha(struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
struct netdev_hw_addr *ha;
netif_addr_lock_bh(dev);
netdev_for_each_synced_mc_addr(ha, dev)
- dsa_slave_sync_mc(dev, ha->addr);
+ dsa_user_sync_mc(dev, ha->addr);
netdev_for_each_synced_uc_addr(ha, dev)
- dsa_slave_sync_uc(dev, ha->addr);
+ dsa_user_sync_uc(dev, ha->addr);
netif_addr_unlock_bh(dev);
@@ -294,19 +294,19 @@ void dsa_slave_sync_ha(struct net_device *dev)
dsa_flush_workqueue();
}
-void dsa_slave_unsync_ha(struct net_device *dev)
+void dsa_user_unsync_ha(struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
struct netdev_hw_addr *ha;
netif_addr_lock_bh(dev);
netdev_for_each_synced_uc_addr(ha, dev)
- dsa_slave_unsync_uc(dev, ha->addr);
+ dsa_user_unsync_uc(dev, ha->addr);
netdev_for_each_synced_mc_addr(ha, dev)
- dsa_slave_unsync_mc(dev, ha->addr);
+ dsa_user_unsync_mc(dev, ha->addr);
netif_addr_unlock_bh(dev);
@@ -315,8 +315,8 @@ void dsa_slave_unsync_ha(struct net_device *dev)
dsa_flush_workqueue();
}
-/* slave mii_bus handling ***************************************************/
-static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
+/* user mii_bus handling ***************************************************/
+static int dsa_user_phy_read(struct mii_bus *bus, int addr, int reg)
{
struct dsa_switch *ds = bus->priv;
@@ -326,7 +326,7 @@ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
return 0xffff;
}
-static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val)
+static int dsa_user_phy_write(struct mii_bus *bus, int addr, int reg, u16 val)
{
struct dsa_switch *ds = bus->priv;
@@ -336,35 +336,35 @@ static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val)
return 0;
}
-void dsa_slave_mii_bus_init(struct dsa_switch *ds)
+void dsa_user_mii_bus_init(struct dsa_switch *ds)
{
- ds->slave_mii_bus->priv = (void *)ds;
- ds->slave_mii_bus->name = "dsa slave smi";
- ds->slave_mii_bus->read = dsa_slave_phy_read;
- ds->slave_mii_bus->write = dsa_slave_phy_write;
- snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d",
+ ds->user_mii_bus->priv = (void *)ds;
+ ds->user_mii_bus->name = "dsa user smi";
+ ds->user_mii_bus->read = dsa_user_phy_read;
+ ds->user_mii_bus->write = dsa_user_phy_write;
+ snprintf(ds->user_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d",
ds->dst->index, ds->index);
- ds->slave_mii_bus->parent = ds->dev;
- ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
+ ds->user_mii_bus->parent = ds->dev;
+ ds->user_mii_bus->phy_mask = ~ds->phys_mii_mask;
}
-/* slave device handling ****************************************************/
-static int dsa_slave_get_iflink(const struct net_device *dev)
+/* user device handling ****************************************************/
+static int dsa_user_get_iflink(const struct net_device *dev)
{
- return dsa_slave_to_master(dev)->ifindex;
+ return dsa_user_to_conduit(dev)->ifindex;
}
-static int dsa_slave_open(struct net_device *dev)
+static int dsa_user_open(struct net_device *dev)
{
- struct net_device *master = dsa_slave_to_master(dev);
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct net_device *conduit = dsa_user_to_conduit(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
int err;
- err = dev_open(master, NULL);
+ err = dev_open(conduit, NULL);
if (err < 0) {
- netdev_err(dev, "failed to open master %s\n", master->name);
+ netdev_err(dev, "failed to open conduit %s\n", conduit->name);
goto out;
}
@@ -374,8 +374,8 @@ static int dsa_slave_open(struct net_device *dev)
goto out;
}
- if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) {
- err = dev_uc_add(master, dev->dev_addr);
+ if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) {
+ err = dev_uc_add(conduit, dev->dev_addr);
if (err < 0)
goto del_host_addr;
}
@@ -387,8 +387,8 @@ static int dsa_slave_open(struct net_device *dev)
return 0;
del_unicast:
- if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
- dev_uc_del(master, dev->dev_addr);
+ if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr))
+ dev_uc_del(conduit, dev->dev_addr);
del_host_addr:
if (dsa_switch_supports_uc_filtering(ds))
dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0);
@@ -396,16 +396,16 @@ out:
return err;
}
-static int dsa_slave_close(struct net_device *dev)
+static int dsa_user_close(struct net_device *dev)
{
- struct net_device *master = dsa_slave_to_master(dev);
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct net_device *conduit = dsa_user_to_conduit(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
dsa_port_disable_rt(dp);
- if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
- dev_uc_del(master, dev->dev_addr);
+ if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr))
+ dev_uc_del(conduit, dev->dev_addr);
if (dsa_switch_supports_uc_filtering(ds))
dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0);
@@ -413,43 +413,43 @@ static int dsa_slave_close(struct net_device *dev)
return 0;
}
-static void dsa_slave_manage_host_flood(struct net_device *dev)
+static void dsa_user_manage_host_flood(struct net_device *dev)
{
bool mc = dev->flags & (IFF_PROMISC | IFF_ALLMULTI);
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
bool uc = dev->flags & IFF_PROMISC;
dsa_port_set_host_flood(dp, uc, mc);
}
-static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
+static void dsa_user_change_rx_flags(struct net_device *dev, int change)
{
- struct net_device *master = dsa_slave_to_master(dev);
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct net_device *conduit = dsa_user_to_conduit(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (change & IFF_ALLMULTI)
- dev_set_allmulti(master,
+ dev_set_allmulti(conduit,
dev->flags & IFF_ALLMULTI ? 1 : -1);
if (change & IFF_PROMISC)
- dev_set_promiscuity(master,
+ dev_set_promiscuity(conduit,
dev->flags & IFF_PROMISC ? 1 : -1);
if (dsa_switch_supports_uc_filtering(ds) &&
dsa_switch_supports_mc_filtering(ds))
- dsa_slave_manage_host_flood(dev);
+ dsa_user_manage_host_flood(dev);
}
-static void dsa_slave_set_rx_mode(struct net_device *dev)
+static void dsa_user_set_rx_mode(struct net_device *dev)
{
- __dev_mc_sync(dev, dsa_slave_sync_mc, dsa_slave_unsync_mc);
- __dev_uc_sync(dev, dsa_slave_sync_uc, dsa_slave_unsync_uc);
+ __dev_mc_sync(dev, dsa_user_sync_mc, dsa_user_unsync_mc);
+ __dev_uc_sync(dev, dsa_user_sync_uc, dsa_user_unsync_uc);
}
-static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
+static int dsa_user_set_mac_address(struct net_device *dev, void *a)
{
- struct net_device *master = dsa_slave_to_master(dev);
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct net_device *conduit = dsa_user_to_conduit(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
struct sockaddr *addr = a;
int err;
@@ -457,8 +457,15 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
+ if (ds->ops->port_set_mac_address) {
+ err = ds->ops->port_set_mac_address(ds, dp->index,
+ addr->sa_data);
+ if (err)
+ return err;
+ }
+
/* If the port is down, the address isn't synced yet to hardware or
- * to the DSA master, so there is nothing to change.
+ * to the DSA conduit, so there is nothing to change.
*/
if (!(dev->flags & IFF_UP))
goto out_change_dev_addr;
@@ -469,14 +476,14 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
return err;
}
- if (!ether_addr_equal(addr->sa_data, master->dev_addr)) {
- err = dev_uc_add(master, addr->sa_data);
+ if (!ether_addr_equal(addr->sa_data, conduit->dev_addr)) {
+ err = dev_uc_add(conduit, addr->sa_data);
if (err < 0)
goto del_unicast;
}
- if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
- dev_uc_del(master, dev->dev_addr);
+ if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr))
+ dev_uc_del(conduit, dev->dev_addr);
if (dsa_switch_supports_uc_filtering(ds))
dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0);
@@ -493,7 +500,7 @@ del_unicast:
return err;
}
-struct dsa_slave_dump_ctx {
+struct dsa_user_dump_ctx {
struct net_device *dev;
struct sk_buff *skb;
struct netlink_callback *cb;
@@ -501,10 +508,10 @@ struct dsa_slave_dump_ctx {
};
static int
-dsa_slave_port_fdb_do_dump(const unsigned char *addr, u16 vid,
- bool is_static, void *data)
+dsa_user_port_fdb_do_dump(const unsigned char *addr, u16 vid,
+ bool is_static, void *data)
{
- struct dsa_slave_dump_ctx *dump = data;
+ struct dsa_user_dump_ctx *dump = data;
u32 portid = NETLINK_CB(dump->cb->skb).portid;
u32 seq = dump->cb->nlh->nlmsg_seq;
struct nlmsghdr *nlh;
@@ -545,12 +552,12 @@ nla_put_failure:
}
static int
-dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
- struct net_device *dev, struct net_device *filter_dev,
- int *idx)
+dsa_user_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct net_device *dev, struct net_device *filter_dev,
+ int *idx)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
- struct dsa_slave_dump_ctx dump = {
+ struct dsa_port *dp = dsa_user_to_port(dev);
+ struct dsa_user_dump_ctx dump = {
.dev = dev,
.skb = skb,
.cb = cb,
@@ -558,15 +565,15 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
};
int err;
- err = dsa_port_fdb_dump(dp, dsa_slave_port_fdb_do_dump, &dump);
+ err = dsa_port_fdb_dump(dp, dsa_user_port_fdb_do_dump, &dump);
*idx = dump.idx;
return err;
}
-static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int dsa_user_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_user_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->dp->ds;
int port = p->dp->index;
@@ -585,11 +592,11 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return phylink_mii_ioctl(p->dp->pl, ifr, cmd);
}
-static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
- const struct switchdev_attr *attr,
- struct netlink_ext_ack *extack)
+static int dsa_user_port_attr_set(struct net_device *dev, const void *ctx,
+ const struct switchdev_attr *attr,
+ struct netlink_ext_ack *extack)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
int ret;
if (ctx && ctx != dp)
@@ -656,13 +663,13 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
/* Must be called under rcu_read_lock() */
static int
-dsa_slave_vlan_check_for_8021q_uppers(struct net_device *slave,
- const struct switchdev_obj_port_vlan *vlan)
+dsa_user_vlan_check_for_8021q_uppers(struct net_device *user,
+ const struct switchdev_obj_port_vlan *vlan)
{
struct net_device *upper_dev;
struct list_head *iter;
- netdev_for_each_upper_dev_rcu(slave, upper_dev, iter) {
+ netdev_for_each_upper_dev_rcu(user, upper_dev, iter) {
u16 vid;
if (!is_vlan_dev(upper_dev))
@@ -676,11 +683,11 @@ dsa_slave_vlan_check_for_8021q_uppers(struct net_device *slave,
return 0;
}
-static int dsa_slave_vlan_add(struct net_device *dev,
- const struct switchdev_obj *obj,
- struct netlink_ext_ack *extack)
+static int dsa_user_vlan_add(struct net_device *dev,
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct switchdev_obj_port_vlan *vlan;
int err;
@@ -696,7 +703,7 @@ static int dsa_slave_vlan_add(struct net_device *dev,
*/
if (br_vlan_enabled(dsa_port_bridge_dev_get(dp))) {
rcu_read_lock();
- err = dsa_slave_vlan_check_for_8021q_uppers(dev, vlan);
+ err = dsa_user_vlan_check_for_8021q_uppers(dev, vlan);
rcu_read_unlock();
if (err) {
NL_SET_ERR_MSG_MOD(extack,
@@ -711,11 +718,11 @@ static int dsa_slave_vlan_add(struct net_device *dev,
/* Offload a VLAN installed on the bridge or on a foreign interface by
* installing it as a VLAN towards the CPU port.
*/
-static int dsa_slave_host_vlan_add(struct net_device *dev,
- const struct switchdev_obj *obj,
- struct netlink_ext_ack *extack)
+static int dsa_user_host_vlan_add(struct net_device *dev,
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct switchdev_obj_port_vlan vlan;
/* Do nothing if this is a software bridge */
@@ -737,11 +744,11 @@ static int dsa_slave_host_vlan_add(struct net_device *dev,
return dsa_port_host_vlan_add(dp, &vlan, extack);
}
-static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx,
- const struct switchdev_obj *obj,
- struct netlink_ext_ack *extack)
+static int dsa_user_port_obj_add(struct net_device *dev, const void *ctx,
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
int err;
if (ctx && ctx != dp)
@@ -762,9 +769,9 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx,
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
if (dsa_port_offloads_bridge_port(dp, obj->orig_dev))
- err = dsa_slave_vlan_add(dev, obj, extack);
+ err = dsa_user_vlan_add(dev, obj, extack);
else
- err = dsa_slave_host_vlan_add(dev, obj, extack);
+ err = dsa_user_host_vlan_add(dev, obj, extack);
break;
case SWITCHDEV_OBJ_ID_MRP:
if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev))
@@ -787,10 +794,10 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx,
return err;
}
-static int dsa_slave_vlan_del(struct net_device *dev,
- const struct switchdev_obj *obj)
+static int dsa_user_vlan_del(struct net_device *dev,
+ const struct switchdev_obj *obj)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct switchdev_obj_port_vlan *vlan;
if (dsa_port_skip_vlan_configuration(dp))
@@ -801,10 +808,10 @@ static int dsa_slave_vlan_del(struct net_device *dev,
return dsa_port_vlan_del(dp, vlan);
}
-static int dsa_slave_host_vlan_del(struct net_device *dev,
- const struct switchdev_obj *obj)
+static int dsa_user_host_vlan_del(struct net_device *dev,
+ const struct switchdev_obj *obj)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct switchdev_obj_port_vlan *vlan;
/* Do nothing if this is a software bridge */
@@ -819,10 +826,10 @@ static int dsa_slave_host_vlan_del(struct net_device *dev,
return dsa_port_host_vlan_del(dp, vlan);
}
-static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx,
- const struct switchdev_obj *obj)
+static int dsa_user_port_obj_del(struct net_device *dev, const void *ctx,
+ const struct switchdev_obj *obj)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
int err;
if (ctx && ctx != dp)
@@ -843,9 +850,9 @@ static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx,
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
if (dsa_port_offloads_bridge_port(dp, obj->orig_dev))
- err = dsa_slave_vlan_del(dev, obj);
+ err = dsa_user_vlan_del(dev, obj);
else
- err = dsa_slave_host_vlan_del(dev, obj);
+ err = dsa_user_host_vlan_del(dev, obj);
break;
case SWITCHDEV_OBJ_ID_MRP:
if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev))
@@ -868,11 +875,11 @@ static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx,
return err;
}
-static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
- struct sk_buff *skb)
+static inline netdev_tx_t dsa_user_netpoll_send_skb(struct net_device *dev,
+ struct sk_buff *skb)
{
#ifdef CONFIG_NET_POLL_CONTROLLER
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_user_priv *p = netdev_priv(dev);
return netpoll_send_skb(p->netpoll, skb);
#else
@@ -881,7 +888,7 @@ static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
#endif
}
-static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
+static void dsa_skb_tx_timestamp(struct dsa_user_priv *p,
struct sk_buff *skb)
{
struct dsa_switch *ds = p->dp->ds;
@@ -901,45 +908,21 @@ netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev)
* tag to be successfully transmitted
*/
if (unlikely(netpoll_tx_running(dev)))
- return dsa_slave_netpoll_send_skb(dev, skb);
+ return dsa_user_netpoll_send_skb(dev, skb);
/* Queue the SKB for transmission on the parent interface, but
* do not modify its EtherType
*/
- skb->dev = dsa_slave_to_master(dev);
+ skb->dev = dsa_user_to_conduit(dev);
dev_queue_xmit(skb);
return NETDEV_TX_OK;
}
EXPORT_SYMBOL_GPL(dsa_enqueue_skb);
-static int dsa_realloc_skb(struct sk_buff *skb, struct net_device *dev)
-{
- int needed_headroom = dev->needed_headroom;
- int needed_tailroom = dev->needed_tailroom;
-
- /* For tail taggers, we need to pad short frames ourselves, to ensure
- * that the tail tag does not fail at its role of being at the end of
- * the packet, once the master interface pads the frame. Account for
- * that pad length here, and pad later.
- */
- if (unlikely(needed_tailroom && skb->len < ETH_ZLEN))
- needed_tailroom += ETH_ZLEN - skb->len;
- /* skb_headroom() returns unsigned int... */
- needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0);
- needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0);
-
- if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb)))
- /* No reallocation needed, yay! */
- return 0;
-
- return pskb_expand_head(skb, needed_headroom, needed_tailroom,
- GFP_ATOMIC);
-}
-
-static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t dsa_user_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_user_priv *p = netdev_priv(dev);
struct sk_buff *nskb;
dev_sw_netstats_tx_add(dev, 1, skb->len);
@@ -949,13 +932,14 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
/* Handle tx timestamp if any */
dsa_skb_tx_timestamp(p, skb);
- if (dsa_realloc_skb(skb, dev)) {
+ if (skb_ensure_writable_head_tail(skb, dev)) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
/* needed_tailroom should still be 'warm' in the cache line from
- * dsa_realloc_skb(), which has also ensured that padding is safe.
+ * skb_ensure_writable_head_tail(), which has also ensured that
+ * padding is safe.
*/
if (dev->needed_tailroom)
eth_skb_pad(skb);
@@ -974,17 +958,17 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
/* ethtool operations *******************************************************/
-static void dsa_slave_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *drvinfo)
+static void dsa_user_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
{
strscpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver));
strscpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version));
strscpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
}
-static int dsa_slave_get_regs_len(struct net_device *dev)
+static int dsa_user_get_regs_len(struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (ds->ops->get_regs_len)
@@ -994,25 +978,25 @@ static int dsa_slave_get_regs_len(struct net_device *dev)
}
static void
-dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
+dsa_user_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (ds->ops->get_regs)
ds->ops->get_regs(ds, dp->index, regs, _p);
}
-static int dsa_slave_nway_reset(struct net_device *dev)
+static int dsa_user_nway_reset(struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
return phylink_ethtool_nway_reset(dp->pl);
}
-static int dsa_slave_get_eeprom_len(struct net_device *dev)
+static int dsa_user_get_eeprom_len(struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (ds->cd && ds->cd->eeprom_len)
@@ -1024,10 +1008,10 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
return 0;
}
-static int dsa_slave_get_eeprom(struct net_device *dev,
- struct ethtool_eeprom *eeprom, u8 *data)
+static int dsa_user_get_eeprom(struct net_device *dev,
+ struct ethtool_eeprom *eeprom, u8 *data)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (ds->ops->get_eeprom)
@@ -1036,10 +1020,10 @@ static int dsa_slave_get_eeprom(struct net_device *dev,
return -EOPNOTSUPP;
}
-static int dsa_slave_set_eeprom(struct net_device *dev,
- struct ethtool_eeprom *eeprom, u8 *data)
+static int dsa_user_set_eeprom(struct net_device *dev,
+ struct ethtool_eeprom *eeprom, u8 *data)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (ds->ops->set_eeprom)
@@ -1048,10 +1032,10 @@ static int dsa_slave_set_eeprom(struct net_device *dev,
return -EOPNOTSUPP;
}
-static void dsa_slave_get_strings(struct net_device *dev,
- uint32_t stringset, uint8_t *data)
+static void dsa_user_get_strings(struct net_device *dev,
+ uint32_t stringset, uint8_t *data)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (stringset == ETH_SS_STATS) {
@@ -1070,11 +1054,11 @@ static void dsa_slave_get_strings(struct net_device *dev,
}
-static void dsa_slave_get_ethtool_stats(struct net_device *dev,
- struct ethtool_stats *stats,
- uint64_t *data)
+static void dsa_user_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ uint64_t *data)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
struct pcpu_sw_netstats *s;
unsigned int start;
@@ -1100,9 +1084,9 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
ds->ops->get_ethtool_stats(ds, dp->index, data + 4);
}
-static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
+static int dsa_user_get_sset_count(struct net_device *dev, int sset)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (sset == ETH_SS_STATS) {
@@ -1122,20 +1106,20 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
return -EOPNOTSUPP;
}
-static void dsa_slave_get_eth_phy_stats(struct net_device *dev,
- struct ethtool_eth_phy_stats *phy_stats)
+static void dsa_user_get_eth_phy_stats(struct net_device *dev,
+ struct ethtool_eth_phy_stats *phy_stats)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (ds->ops->get_eth_phy_stats)
ds->ops->get_eth_phy_stats(ds, dp->index, phy_stats);
}
-static void dsa_slave_get_eth_mac_stats(struct net_device *dev,
- struct ethtool_eth_mac_stats *mac_stats)
+static void dsa_user_get_eth_mac_stats(struct net_device *dev,
+ struct ethtool_eth_mac_stats *mac_stats)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (ds->ops->get_eth_mac_stats)
@@ -1143,10 +1127,10 @@ static void dsa_slave_get_eth_mac_stats(struct net_device *dev,
}
static void
-dsa_slave_get_eth_ctrl_stats(struct net_device *dev,
- struct ethtool_eth_ctrl_stats *ctrl_stats)
+dsa_user_get_eth_ctrl_stats(struct net_device *dev,
+ struct ethtool_eth_ctrl_stats *ctrl_stats)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (ds->ops->get_eth_ctrl_stats)
@@ -1154,21 +1138,21 @@ dsa_slave_get_eth_ctrl_stats(struct net_device *dev,
}
static void
-dsa_slave_get_rmon_stats(struct net_device *dev,
- struct ethtool_rmon_stats *rmon_stats,
- const struct ethtool_rmon_hist_range **ranges)
+dsa_user_get_rmon_stats(struct net_device *dev,
+ struct ethtool_rmon_stats *rmon_stats,
+ const struct ethtool_rmon_hist_range **ranges)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (ds->ops->get_rmon_stats)
ds->ops->get_rmon_stats(ds, dp->index, rmon_stats, ranges);
}
-static void dsa_slave_net_selftest(struct net_device *ndev,
- struct ethtool_test *etest, u64 *buf)
+static void dsa_user_net_selftest(struct net_device *ndev,
+ struct ethtool_test *etest, u64 *buf)
{
- struct dsa_port *dp = dsa_slave_to_port(ndev);
+ struct dsa_port *dp = dsa_user_to_port(ndev);
struct dsa_switch *ds = dp->ds;
if (ds->ops->self_test) {
@@ -1179,10 +1163,10 @@ static void dsa_slave_net_selftest(struct net_device *ndev,
net_selftest(ndev, etest, buf);
}
-static int dsa_slave_get_mm(struct net_device *dev,
- struct ethtool_mm_state *state)
+static int dsa_user_get_mm(struct net_device *dev,
+ struct ethtool_mm_state *state)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (!ds->ops->get_mm)
@@ -1191,10 +1175,10 @@ static int dsa_slave_get_mm(struct net_device *dev,
return ds->ops->get_mm(ds, dp->index, state);
}
-static int dsa_slave_set_mm(struct net_device *dev, struct ethtool_mm_cfg *cfg,
- struct netlink_ext_ack *extack)
+static int dsa_user_set_mm(struct net_device *dev, struct ethtool_mm_cfg *cfg,
+ struct netlink_ext_ack *extack)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (!ds->ops->set_mm)
@@ -1203,19 +1187,19 @@ static int dsa_slave_set_mm(struct net_device *dev, struct ethtool_mm_cfg *cfg,
return ds->ops->set_mm(ds, dp->index, cfg, extack);
}
-static void dsa_slave_get_mm_stats(struct net_device *dev,
- struct ethtool_mm_stats *stats)
+static void dsa_user_get_mm_stats(struct net_device *dev,
+ struct ethtool_mm_stats *stats)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (ds->ops->get_mm_stats)
ds->ops->get_mm_stats(ds, dp->index, stats);
}
-static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
+static void dsa_user_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
phylink_ethtool_get_wol(dp->pl, w);
@@ -1224,9 +1208,9 @@ static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
ds->ops->get_wol(ds, dp->index, w);
}
-static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
+static int dsa_user_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
int ret = -EOPNOTSUPP;
@@ -1238,9 +1222,9 @@ static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
return ret;
}
-static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
+static int dsa_user_set_eee(struct net_device *dev, struct ethtool_eee *e)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
int ret;
@@ -1258,9 +1242,9 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
return phylink_ethtool_set_eee(dp->pl, e);
}
-static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
+static int dsa_user_get_eee(struct net_device *dev, struct ethtool_eee *e)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
int ret;
@@ -1278,54 +1262,54 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
return phylink_ethtool_get_eee(dp->pl, e);
}
-static int dsa_slave_get_link_ksettings(struct net_device *dev,
- struct ethtool_link_ksettings *cmd)
+static int dsa_user_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
return phylink_ethtool_ksettings_get(dp->pl, cmd);
}
-static int dsa_slave_set_link_ksettings(struct net_device *dev,
- const struct ethtool_link_ksettings *cmd)
+static int dsa_user_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
return phylink_ethtool_ksettings_set(dp->pl, cmd);
}
-static void dsa_slave_get_pause_stats(struct net_device *dev,
- struct ethtool_pause_stats *pause_stats)
+static void dsa_user_get_pause_stats(struct net_device *dev,
+ struct ethtool_pause_stats *pause_stats)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (ds->ops->get_pause_stats)
ds->ops->get_pause_stats(ds, dp->index, pause_stats);
}
-static void dsa_slave_get_pauseparam(struct net_device *dev,
- struct ethtool_pauseparam *pause)
+static void dsa_user_get_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pause)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
phylink_ethtool_get_pauseparam(dp->pl, pause);
}
-static int dsa_slave_set_pauseparam(struct net_device *dev,
- struct ethtool_pauseparam *pause)
+static int dsa_user_set_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pause)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
return phylink_ethtool_set_pauseparam(dp->pl, pause);
}
#ifdef CONFIG_NET_POLL_CONTROLLER
-static int dsa_slave_netpoll_setup(struct net_device *dev,
- struct netpoll_info *ni)
+static int dsa_user_netpoll_setup(struct net_device *dev,
+ struct netpoll_info *ni)
{
- struct net_device *master = dsa_slave_to_master(dev);
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct net_device *conduit = dsa_user_to_conduit(dev);
+ struct dsa_user_priv *p = netdev_priv(dev);
struct netpoll *netpoll;
int err = 0;
@@ -1333,7 +1317,7 @@ static int dsa_slave_netpoll_setup(struct net_device *dev,
if (!netpoll)
return -ENOMEM;
- err = __netpoll_setup(netpoll, master);
+ err = __netpoll_setup(netpoll, conduit);
if (err) {
kfree(netpoll);
goto out;
@@ -1344,9 +1328,9 @@ out:
return err;
}
-static void dsa_slave_netpoll_cleanup(struct net_device *dev)
+static void dsa_user_netpoll_cleanup(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_user_priv *p = netdev_priv(dev);
struct netpoll *netpoll = p->netpoll;
if (!netpoll)
@@ -1357,15 +1341,15 @@ static void dsa_slave_netpoll_cleanup(struct net_device *dev)
__netpoll_free(netpoll);
}
-static void dsa_slave_poll_controller(struct net_device *dev)
+static void dsa_user_poll_controller(struct net_device *dev)
{
}
#endif
static struct dsa_mall_tc_entry *
-dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie)
+dsa_user_mall_tc_entry_find(struct net_device *dev, unsigned long cookie)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_user_priv *p = netdev_priv(dev);
struct dsa_mall_tc_entry *mall_tc_entry;
list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list)
@@ -1376,13 +1360,13 @@ dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie)
}
static int
-dsa_slave_add_cls_matchall_mirred(struct net_device *dev,
- struct tc_cls_matchall_offload *cls,
- bool ingress)
+dsa_user_add_cls_matchall_mirred(struct net_device *dev,
+ struct tc_cls_matchall_offload *cls,
+ bool ingress)
{
struct netlink_ext_ack *extack = cls->common.extack;
- struct dsa_port *dp = dsa_slave_to_port(dev);
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
+ struct dsa_user_priv *p = netdev_priv(dev);
struct dsa_mall_mirror_tc_entry *mirror;
struct dsa_mall_tc_entry *mall_tc_entry;
struct dsa_switch *ds = dp->ds;
@@ -1402,7 +1386,7 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev,
if (!act->dev)
return -EINVAL;
- if (!dsa_slave_dev_check(act->dev))
+ if (!dsa_user_dev_check(act->dev))
return -EOPNOTSUPP;
mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL);
@@ -1413,7 +1397,7 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev,
mall_tc_entry->type = DSA_PORT_MALL_MIRROR;
mirror = &mall_tc_entry->mirror;
- to_dp = dsa_slave_to_port(act->dev);
+ to_dp = dsa_user_to_port(act->dev);
mirror->to_local_port = to_dp->index;
mirror->ingress = ingress;
@@ -1430,13 +1414,13 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev,
}
static int
-dsa_slave_add_cls_matchall_police(struct net_device *dev,
- struct tc_cls_matchall_offload *cls,
- bool ingress)
+dsa_user_add_cls_matchall_police(struct net_device *dev,
+ struct tc_cls_matchall_offload *cls,
+ bool ingress)
{
struct netlink_ext_ack *extack = cls->common.extack;
- struct dsa_port *dp = dsa_slave_to_port(dev);
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
+ struct dsa_user_priv *p = netdev_priv(dev);
struct dsa_mall_policer_tc_entry *policer;
struct dsa_mall_tc_entry *mall_tc_entry;
struct dsa_switch *ds = dp->ds;
@@ -1490,31 +1474,31 @@ dsa_slave_add_cls_matchall_police(struct net_device *dev,
return err;
}
-static int dsa_slave_add_cls_matchall(struct net_device *dev,
- struct tc_cls_matchall_offload *cls,
- bool ingress)
+static int dsa_user_add_cls_matchall(struct net_device *dev,
+ struct tc_cls_matchall_offload *cls,
+ bool ingress)
{
int err = -EOPNOTSUPP;
if (cls->common.protocol == htons(ETH_P_ALL) &&
flow_offload_has_one_action(&cls->rule->action) &&
cls->rule->action.entries[0].id == FLOW_ACTION_MIRRED)
- err = dsa_slave_add_cls_matchall_mirred(dev, cls, ingress);
+ err = dsa_user_add_cls_matchall_mirred(dev, cls, ingress);
else if (flow_offload_has_one_action(&cls->rule->action) &&
cls->rule->action.entries[0].id == FLOW_ACTION_POLICE)
- err = dsa_slave_add_cls_matchall_police(dev, cls, ingress);
+ err = dsa_user_add_cls_matchall_police(dev, cls, ingress);
return err;
}
-static void dsa_slave_del_cls_matchall(struct net_device *dev,
- struct tc_cls_matchall_offload *cls)
+static void dsa_user_del_cls_matchall(struct net_device *dev,
+ struct tc_cls_matchall_offload *cls)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_mall_tc_entry *mall_tc_entry;
struct dsa_switch *ds = dp->ds;
- mall_tc_entry = dsa_slave_mall_tc_entry_find(dev, cls->cookie);
+ mall_tc_entry = dsa_user_mall_tc_entry_find(dev, cls->cookie);
if (!mall_tc_entry)
return;
@@ -1537,29 +1521,29 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
kfree(mall_tc_entry);
}
-static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
- struct tc_cls_matchall_offload *cls,
- bool ingress)
+static int dsa_user_setup_tc_cls_matchall(struct net_device *dev,
+ struct tc_cls_matchall_offload *cls,
+ bool ingress)
{
if (cls->common.chain_index)
return -EOPNOTSUPP;
switch (cls->command) {
case TC_CLSMATCHALL_REPLACE:
- return dsa_slave_add_cls_matchall(dev, cls, ingress);
+ return dsa_user_add_cls_matchall(dev, cls, ingress);
case TC_CLSMATCHALL_DESTROY:
- dsa_slave_del_cls_matchall(dev, cls);
+ dsa_user_del_cls_matchall(dev, cls);
return 0;
default:
return -EOPNOTSUPP;
}
}
-static int dsa_slave_add_cls_flower(struct net_device *dev,
- struct flow_cls_offload *cls,
- bool ingress)
+static int dsa_user_add_cls_flower(struct net_device *dev,
+ struct flow_cls_offload *cls,
+ bool ingress)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
int port = dp->index;
@@ -1569,11 +1553,11 @@ static int dsa_slave_add_cls_flower(struct net_device *dev,
return ds->ops->cls_flower_add(ds, port, cls, ingress);
}
-static int dsa_slave_del_cls_flower(struct net_device *dev,
- struct flow_cls_offload *cls,
- bool ingress)
+static int dsa_user_del_cls_flower(struct net_device *dev,
+ struct flow_cls_offload *cls,
+ bool ingress)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
int port = dp->index;
@@ -1583,11 +1567,11 @@ static int dsa_slave_del_cls_flower(struct net_device *dev,
return ds->ops->cls_flower_del(ds, port, cls, ingress);
}
-static int dsa_slave_stats_cls_flower(struct net_device *dev,
- struct flow_cls_offload *cls,
- bool ingress)
+static int dsa_user_stats_cls_flower(struct net_device *dev,
+ struct flow_cls_offload *cls,
+ bool ingress)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
int port = dp->index;
@@ -1597,24 +1581,24 @@ static int dsa_slave_stats_cls_flower(struct net_device *dev,
return ds->ops->cls_flower_stats(ds, port, cls, ingress);
}
-static int dsa_slave_setup_tc_cls_flower(struct net_device *dev,
- struct flow_cls_offload *cls,
- bool ingress)
+static int dsa_user_setup_tc_cls_flower(struct net_device *dev,
+ struct flow_cls_offload *cls,
+ bool ingress)
{
switch (cls->command) {
case FLOW_CLS_REPLACE:
- return dsa_slave_add_cls_flower(dev, cls, ingress);
+ return dsa_user_add_cls_flower(dev, cls, ingress);
case FLOW_CLS_DESTROY:
- return dsa_slave_del_cls_flower(dev, cls, ingress);
+ return dsa_user_del_cls_flower(dev, cls, ingress);
case FLOW_CLS_STATS:
- return dsa_slave_stats_cls_flower(dev, cls, ingress);
+ return dsa_user_stats_cls_flower(dev, cls, ingress);
default:
return -EOPNOTSUPP;
}
}
-static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
- void *cb_priv, bool ingress)
+static int dsa_user_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv, bool ingress)
{
struct net_device *dev = cb_priv;
@@ -1623,46 +1607,46 @@ static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
switch (type) {
case TC_SETUP_CLSMATCHALL:
- return dsa_slave_setup_tc_cls_matchall(dev, type_data, ingress);
+ return dsa_user_setup_tc_cls_matchall(dev, type_data, ingress);
case TC_SETUP_CLSFLOWER:
- return dsa_slave_setup_tc_cls_flower(dev, type_data, ingress);
+ return dsa_user_setup_tc_cls_flower(dev, type_data, ingress);
default:
return -EOPNOTSUPP;
}
}
-static int dsa_slave_setup_tc_block_cb_ig(enum tc_setup_type type,
- void *type_data, void *cb_priv)
+static int dsa_user_setup_tc_block_cb_ig(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
{
- return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, true);
+ return dsa_user_setup_tc_block_cb(type, type_data, cb_priv, true);
}
-static int dsa_slave_setup_tc_block_cb_eg(enum tc_setup_type type,
- void *type_data, void *cb_priv)
+static int dsa_user_setup_tc_block_cb_eg(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
{
- return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, false);
+ return dsa_user_setup_tc_block_cb(type, type_data, cb_priv, false);
}
-static LIST_HEAD(dsa_slave_block_cb_list);
+static LIST_HEAD(dsa_user_block_cb_list);
-static int dsa_slave_setup_tc_block(struct net_device *dev,
- struct flow_block_offload *f)
+static int dsa_user_setup_tc_block(struct net_device *dev,
+ struct flow_block_offload *f)
{
struct flow_block_cb *block_cb;
flow_setup_cb_t *cb;
if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
- cb = dsa_slave_setup_tc_block_cb_ig;
+ cb = dsa_user_setup_tc_block_cb_ig;
else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
- cb = dsa_slave_setup_tc_block_cb_eg;
+ cb = dsa_user_setup_tc_block_cb_eg;
else
return -EOPNOTSUPP;
- f->driver_block_list = &dsa_slave_block_cb_list;
+ f->driver_block_list = &dsa_user_block_cb_list;
switch (f->command) {
case FLOW_BLOCK_BIND:
- if (flow_block_cb_is_busy(cb, dev, &dsa_slave_block_cb_list))
+ if (flow_block_cb_is_busy(cb, dev, &dsa_user_block_cb_list))
return -EBUSY;
block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
@@ -1670,7 +1654,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
return PTR_ERR(block_cb);
flow_block_cb_add(block_cb, f);
- list_add_tail(&block_cb->driver_list, &dsa_slave_block_cb_list);
+ list_add_tail(&block_cb->driver_list, &dsa_user_block_cb_list);
return 0;
case FLOW_BLOCK_UNBIND:
block_cb = flow_block_cb_lookup(f->block, cb, dev);
@@ -1685,28 +1669,28 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
}
}
-static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
- void *type_data)
+static int dsa_user_setup_ft_block(struct dsa_switch *ds, int port,
+ void *type_data)
{
- struct net_device *master = dsa_port_to_master(dsa_to_port(ds, port));
+ struct net_device *conduit = dsa_port_to_conduit(dsa_to_port(ds, port));
- if (!master->netdev_ops->ndo_setup_tc)
+ if (!conduit->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
- return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
+ return conduit->netdev_ops->ndo_setup_tc(conduit, TC_SETUP_FT, type_data);
}
-static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
- void *type_data)
+static int dsa_user_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
switch (type) {
case TC_SETUP_BLOCK:
- return dsa_slave_setup_tc_block(dev, type_data);
+ return dsa_user_setup_tc_block(dev, type_data);
case TC_SETUP_FT:
- return dsa_slave_setup_ft_block(ds, dp->index, type_data);
+ return dsa_user_setup_ft_block(ds, dp->index, type_data);
default:
break;
}
@@ -1717,10 +1701,10 @@ static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
return ds->ops->port_setup_tc(ds, dp->index, type, type_data);
}
-static int dsa_slave_get_rxnfc(struct net_device *dev,
- struct ethtool_rxnfc *nfc, u32 *rule_locs)
+static int dsa_user_get_rxnfc(struct net_device *dev,
+ struct ethtool_rxnfc *nfc, u32 *rule_locs)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (!ds->ops->get_rxnfc)
@@ -1729,10 +1713,10 @@ static int dsa_slave_get_rxnfc(struct net_device *dev,
return ds->ops->get_rxnfc(ds, dp->index, nfc, rule_locs);
}
-static int dsa_slave_set_rxnfc(struct net_device *dev,
- struct ethtool_rxnfc *nfc)
+static int dsa_user_set_rxnfc(struct net_device *dev,
+ struct ethtool_rxnfc *nfc)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (!ds->ops->set_rxnfc)
@@ -1741,10 +1725,10 @@ static int dsa_slave_set_rxnfc(struct net_device *dev,
return ds->ops->set_rxnfc(ds, dp->index, nfc);
}
-static int dsa_slave_get_ts_info(struct net_device *dev,
- struct ethtool_ts_info *ts)
+static int dsa_user_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *ts)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_user_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->dp->ds;
if (!ds->ops->get_ts_info)
@@ -1753,10 +1737,10 @@ static int dsa_slave_get_ts_info(struct net_device *dev,
return ds->ops->get_ts_info(ds, p->dp->index, ts);
}
-static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
- u16 vid)
+static int dsa_user_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
+ u16 vid)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct switchdev_obj_port_vlan vlan = {
.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
.vid = vid,
@@ -1803,15 +1787,15 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
if (dsa_switch_supports_mc_filtering(ds)) {
netdev_for_each_synced_mc_addr(ha, dev) {
- dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD,
- ha->addr, vid);
+ dsa_user_schedule_standalone_work(dev, DSA_MC_ADD,
+ ha->addr, vid);
}
}
if (dsa_switch_supports_uc_filtering(ds)) {
netdev_for_each_synced_uc_addr(ha, dev) {
- dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD,
- ha->addr, vid);
+ dsa_user_schedule_standalone_work(dev, DSA_UC_ADD,
+ ha->addr, vid);
}
}
@@ -1828,10 +1812,10 @@ rollback:
return ret;
}
-static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
- u16 vid)
+static int dsa_user_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
+ u16 vid)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct switchdev_obj_port_vlan vlan = {
.vid = vid,
/* This API only allows programming tagged, non-PVID VIDs */
@@ -1867,15 +1851,15 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
if (dsa_switch_supports_mc_filtering(ds)) {
netdev_for_each_synced_mc_addr(ha, dev) {
- dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL,
- ha->addr, vid);
+ dsa_user_schedule_standalone_work(dev, DSA_MC_DEL,
+ ha->addr, vid);
}
}
if (dsa_switch_supports_uc_filtering(ds)) {
netdev_for_each_synced_uc_addr(ha, dev) {
- dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL,
- ha->addr, vid);
+ dsa_user_schedule_standalone_work(dev, DSA_UC_DEL,
+ ha->addr, vid);
}
}
@@ -1886,18 +1870,18 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
return 0;
}
-static int dsa_slave_restore_vlan(struct net_device *vdev, int vid, void *arg)
+static int dsa_user_restore_vlan(struct net_device *vdev, int vid, void *arg)
{
__be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q);
- return dsa_slave_vlan_rx_add_vid(arg, proto, vid);
+ return dsa_user_vlan_rx_add_vid(arg, proto, vid);
}
-static int dsa_slave_clear_vlan(struct net_device *vdev, int vid, void *arg)
+static int dsa_user_clear_vlan(struct net_device *vdev, int vid, void *arg)
{
__be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q);
- return dsa_slave_vlan_rx_kill_vid(arg, proto, vid);
+ return dsa_user_vlan_rx_kill_vid(arg, proto, vid);
}
/* Keep the VLAN RX filtering list in sync with the hardware only if VLAN
@@ -1931,26 +1915,26 @@ static int dsa_slave_clear_vlan(struct net_device *vdev, int vid, void *arg)
* - the bridge VLANs
* - the 8021q upper VLANs
*/
-int dsa_slave_manage_vlan_filtering(struct net_device *slave,
- bool vlan_filtering)
+int dsa_user_manage_vlan_filtering(struct net_device *user,
+ bool vlan_filtering)
{
int err;
if (vlan_filtering) {
- slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ user->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
- err = vlan_for_each(slave, dsa_slave_restore_vlan, slave);
+ err = vlan_for_each(user, dsa_user_restore_vlan, user);
if (err) {
- vlan_for_each(slave, dsa_slave_clear_vlan, slave);
- slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+ vlan_for_each(user, dsa_user_clear_vlan, user);
+ user->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
return err;
}
} else {
- err = vlan_for_each(slave, dsa_slave_clear_vlan, slave);
+ err = vlan_for_each(user, dsa_user_clear_vlan, user);
if (err)
return err;
- slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+ user->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
}
return 0;
@@ -2021,7 +2005,7 @@ static void dsa_bridge_mtu_normalization(struct dsa_port *dp)
list_for_each_entry(dst, &dsa_tree_list, list) {
list_for_each_entry(other_dp, &dst->ports, list) {
struct dsa_hw_port *hw_port;
- struct net_device *slave;
+ struct net_device *user;
if (other_dp->type != DSA_PORT_TYPE_USER)
continue;
@@ -2032,17 +2016,17 @@ static void dsa_bridge_mtu_normalization(struct dsa_port *dp)
if (!other_dp->ds->mtu_enforcement_ingress)
continue;
- slave = other_dp->slave;
+ user = other_dp->user;
- if (min_mtu > slave->mtu)
- min_mtu = slave->mtu;
+ if (min_mtu > user->mtu)
+ min_mtu = user->mtu;
hw_port = kzalloc(sizeof(*hw_port), GFP_KERNEL);
if (!hw_port)
goto out;
- hw_port->dev = slave;
- hw_port->old_mtu = slave->mtu;
+ hw_port->dev = user;
+ hw_port->old_mtu = user->mtu;
list_add(&hw_port->list, &hw_port_list);
}
@@ -2052,7 +2036,7 @@ static void dsa_bridge_mtu_normalization(struct dsa_port *dp)
* interface's MTU first, regardless of whether the intention of the
* user was to raise or lower it.
*/
- err = dsa_hw_port_list_set_mtu(&hw_port_list, dp->slave->mtu);
+ err = dsa_hw_port_list_set_mtu(&hw_port_list, dp->user->mtu);
if (!err)
goto out;
@@ -2066,16 +2050,16 @@ out:
dsa_hw_port_list_free(&hw_port_list);
}
-int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
+int dsa_user_change_mtu(struct net_device *dev, int new_mtu)
{
- struct net_device *master = dsa_slave_to_master(dev);
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct net_device *conduit = dsa_user_to_conduit(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_port *cpu_dp = dp->cpu_dp;
struct dsa_switch *ds = dp->ds;
struct dsa_port *other_dp;
int largest_mtu = 0;
- int new_master_mtu;
- int old_master_mtu;
+ int new_conduit_mtu;
+ int old_conduit_mtu;
int mtu_limit;
int overhead;
int cpu_mtu;
@@ -2085,44 +2069,44 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
return -EOPNOTSUPP;
dsa_tree_for_each_user_port(other_dp, ds->dst) {
- int slave_mtu;
+ int user_mtu;
- /* During probe, this function will be called for each slave
+ /* During probe, this function will be called for each user
* device, while not all of them have been allocated. That's
* ok, it doesn't change what the maximum is, so ignore it.
*/
- if (!other_dp->slave)
+ if (!other_dp->user)
continue;
/* Pretend that we already applied the setting, which we
* actually haven't (still haven't done all integrity checks)
*/
if (dp == other_dp)
- slave_mtu = new_mtu;
+ user_mtu = new_mtu;
else
- slave_mtu = other_dp->slave->mtu;
+ user_mtu = other_dp->user->mtu;
- if (largest_mtu < slave_mtu)
- largest_mtu = slave_mtu;
+ if (largest_mtu < user_mtu)
+ largest_mtu = user_mtu;
}
overhead = dsa_tag_protocol_overhead(cpu_dp->tag_ops);
- mtu_limit = min_t(int, master->max_mtu, dev->max_mtu + overhead);
- old_master_mtu = master->mtu;
- new_master_mtu = largest_mtu + overhead;
- if (new_master_mtu > mtu_limit)
+ mtu_limit = min_t(int, conduit->max_mtu, dev->max_mtu + overhead);
+ old_conduit_mtu = conduit->mtu;
+ new_conduit_mtu = largest_mtu + overhead;
+ if (new_conduit_mtu > mtu_limit)
return -ERANGE;
- /* If the master MTU isn't over limit, there's no need to check the CPU
+ /* If the conduit MTU isn't over limit, there's no need to check the CPU
* MTU, since that surely isn't either.
*/
cpu_mtu = largest_mtu;
/* Start applying stuff */
- if (new_master_mtu != old_master_mtu) {
- err = dev_set_mtu(master, new_master_mtu);
+ if (new_conduit_mtu != old_conduit_mtu) {
+ err = dev_set_mtu(conduit, new_conduit_mtu);
if (err < 0)
- goto out_master_failed;
+ goto out_conduit_failed;
/* We only need to propagate the MTU of the CPU port to
* upstream switches, so emit a notifier which updates them.
@@ -2143,19 +2127,19 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
return 0;
out_port_failed:
- if (new_master_mtu != old_master_mtu)
- dsa_port_mtu_change(cpu_dp, old_master_mtu - overhead);
+ if (new_conduit_mtu != old_conduit_mtu)
+ dsa_port_mtu_change(cpu_dp, old_conduit_mtu - overhead);
out_cpu_failed:
- if (new_master_mtu != old_master_mtu)
- dev_set_mtu(master, old_master_mtu);
-out_master_failed:
+ if (new_conduit_mtu != old_conduit_mtu)
+ dev_set_mtu(conduit, old_conduit_mtu);
+out_conduit_failed:
return err;
}
static int __maybe_unused
-dsa_slave_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app)
+dsa_user_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
unsigned long mask, new_prio;
int err, port = dp->index;
@@ -2180,9 +2164,9 @@ dsa_slave_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app)
}
static int __maybe_unused
-dsa_slave_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app)
+dsa_user_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
unsigned long mask, new_prio;
int err, port = dp->index;
@@ -2213,29 +2197,29 @@ dsa_slave_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app)
return 0;
}
-static int __maybe_unused dsa_slave_dcbnl_ieee_setapp(struct net_device *dev,
- struct dcb_app *app)
+static int __maybe_unused dsa_user_dcbnl_ieee_setapp(struct net_device *dev,
+ struct dcb_app *app)
{
switch (app->selector) {
case IEEE_8021QAZ_APP_SEL_ETHERTYPE:
switch (app->protocol) {
case 0:
- return dsa_slave_dcbnl_set_default_prio(dev, app);
+ return dsa_user_dcbnl_set_default_prio(dev, app);
default:
return -EOPNOTSUPP;
}
break;
case IEEE_8021QAZ_APP_SEL_DSCP:
- return dsa_slave_dcbnl_add_dscp_prio(dev, app);
+ return dsa_user_dcbnl_add_dscp_prio(dev, app);
default:
return -EOPNOTSUPP;
}
}
static int __maybe_unused
-dsa_slave_dcbnl_del_default_prio(struct net_device *dev, struct dcb_app *app)
+dsa_user_dcbnl_del_default_prio(struct net_device *dev, struct dcb_app *app)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
unsigned long mask, new_prio;
int err, port = dp->index;
@@ -2260,9 +2244,9 @@ dsa_slave_dcbnl_del_default_prio(struct net_device *dev, struct dcb_app *app)
}
static int __maybe_unused
-dsa_slave_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app)
+dsa_user_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
int err, port = dp->index;
u8 dscp = app->protocol;
@@ -2283,20 +2267,20 @@ dsa_slave_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app)
return 0;
}
-static int __maybe_unused dsa_slave_dcbnl_ieee_delapp(struct net_device *dev,
- struct dcb_app *app)
+static int __maybe_unused dsa_user_dcbnl_ieee_delapp(struct net_device *dev,
+ struct dcb_app *app)
{
switch (app->selector) {
case IEEE_8021QAZ_APP_SEL_ETHERTYPE:
switch (app->protocol) {
case 0:
- return dsa_slave_dcbnl_del_default_prio(dev, app);
+ return dsa_user_dcbnl_del_default_prio(dev, app);
default:
return -EOPNOTSUPP;
}
break;
case IEEE_8021QAZ_APP_SEL_DSCP:
- return dsa_slave_dcbnl_del_dscp_prio(dev, app);
+ return dsa_user_dcbnl_del_dscp_prio(dev, app);
default:
return -EOPNOTSUPP;
}
@@ -2305,9 +2289,9 @@ static int __maybe_unused dsa_slave_dcbnl_ieee_delapp(struct net_device *dev,
/* Pre-populate the DCB application priority table with the priorities
* configured during switch setup, which we read from hardware here.
*/
-static int dsa_slave_dcbnl_init(struct net_device *dev)
+static int dsa_user_dcbnl_init(struct net_device *dev)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
int port = dp->index;
int err;
@@ -2355,49 +2339,49 @@ static int dsa_slave_dcbnl_init(struct net_device *dev)
return 0;
}
-static const struct ethtool_ops dsa_slave_ethtool_ops = {
- .get_drvinfo = dsa_slave_get_drvinfo,
- .get_regs_len = dsa_slave_get_regs_len,
- .get_regs = dsa_slave_get_regs,
- .nway_reset = dsa_slave_nway_reset,
+static const struct ethtool_ops dsa_user_ethtool_ops = {
+ .get_drvinfo = dsa_user_get_drvinfo,
+ .get_regs_len = dsa_user_get_regs_len,
+ .get_regs = dsa_user_get_regs,
+ .nway_reset = dsa_user_nway_reset,
.get_link = ethtool_op_get_link,
- .get_eeprom_len = dsa_slave_get_eeprom_len,
- .get_eeprom = dsa_slave_get_eeprom,
- .set_eeprom = dsa_slave_set_eeprom,
- .get_strings = dsa_slave_get_strings,
- .get_ethtool_stats = dsa_slave_get_ethtool_stats,
- .get_sset_count = dsa_slave_get_sset_count,
- .get_eth_phy_stats = dsa_slave_get_eth_phy_stats,
- .get_eth_mac_stats = dsa_slave_get_eth_mac_stats,
- .get_eth_ctrl_stats = dsa_slave_get_eth_ctrl_stats,
- .get_rmon_stats = dsa_slave_get_rmon_stats,
- .set_wol = dsa_slave_set_wol,
- .get_wol = dsa_slave_get_wol,
- .set_eee = dsa_slave_set_eee,
- .get_eee = dsa_slave_get_eee,
- .get_link_ksettings = dsa_slave_get_link_ksettings,
- .set_link_ksettings = dsa_slave_set_link_ksettings,
- .get_pause_stats = dsa_slave_get_pause_stats,
- .get_pauseparam = dsa_slave_get_pauseparam,
- .set_pauseparam = dsa_slave_set_pauseparam,
- .get_rxnfc = dsa_slave_get_rxnfc,
- .set_rxnfc = dsa_slave_set_rxnfc,
- .get_ts_info = dsa_slave_get_ts_info,
- .self_test = dsa_slave_net_selftest,
- .get_mm = dsa_slave_get_mm,
- .set_mm = dsa_slave_set_mm,
- .get_mm_stats = dsa_slave_get_mm_stats,
+ .get_eeprom_len = dsa_user_get_eeprom_len,
+ .get_eeprom = dsa_user_get_eeprom,
+ .set_eeprom = dsa_user_set_eeprom,
+ .get_strings = dsa_user_get_strings,
+ .get_ethtool_stats = dsa_user_get_ethtool_stats,
+ .get_sset_count = dsa_user_get_sset_count,
+ .get_eth_phy_stats = dsa_user_get_eth_phy_stats,
+ .get_eth_mac_stats = dsa_user_get_eth_mac_stats,
+ .get_eth_ctrl_stats = dsa_user_get_eth_ctrl_stats,
+ .get_rmon_stats = dsa_user_get_rmon_stats,
+ .set_wol = dsa_user_set_wol,
+ .get_wol = dsa_user_get_wol,
+ .set_eee = dsa_user_set_eee,
+ .get_eee = dsa_user_get_eee,
+ .get_link_ksettings = dsa_user_get_link_ksettings,
+ .set_link_ksettings = dsa_user_set_link_ksettings,
+ .get_pause_stats = dsa_user_get_pause_stats,
+ .get_pauseparam = dsa_user_get_pauseparam,
+ .set_pauseparam = dsa_user_set_pauseparam,
+ .get_rxnfc = dsa_user_get_rxnfc,
+ .set_rxnfc = dsa_user_set_rxnfc,
+ .get_ts_info = dsa_user_get_ts_info,
+ .self_test = dsa_user_net_selftest,
+ .get_mm = dsa_user_get_mm,
+ .set_mm = dsa_user_set_mm,
+ .get_mm_stats = dsa_user_get_mm_stats,
};
-static const struct dcbnl_rtnl_ops __maybe_unused dsa_slave_dcbnl_ops = {
- .ieee_setapp = dsa_slave_dcbnl_ieee_setapp,
- .ieee_delapp = dsa_slave_dcbnl_ieee_delapp,
+static const struct dcbnl_rtnl_ops __maybe_unused dsa_user_dcbnl_ops = {
+ .ieee_setapp = dsa_user_dcbnl_ieee_setapp,
+ .ieee_delapp = dsa_user_dcbnl_ieee_delapp,
};
-static void dsa_slave_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *s)
+static void dsa_user_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *s)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
if (ds->ops->get_stats64)
@@ -2406,43 +2390,43 @@ static void dsa_slave_get_stats64(struct net_device *dev,
dev_get_tstats64(dev, s);
}
-static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
- struct net_device_path *path)
+static int dsa_user_fill_forward_path(struct net_device_path_ctx *ctx,
+ struct net_device_path *path)
{
- struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
- struct net_device *master = dsa_port_to_master(dp);
+ struct dsa_port *dp = dsa_user_to_port(ctx->dev);
+ struct net_device *conduit = dsa_port_to_conduit(dp);
struct dsa_port *cpu_dp = dp->cpu_dp;
path->dev = ctx->dev;
path->type = DEV_PATH_DSA;
path->dsa.proto = cpu_dp->tag_ops->proto;
path->dsa.port = dp->index;
- ctx->dev = master;
+ ctx->dev = conduit;
return 0;
}
-static const struct net_device_ops dsa_slave_netdev_ops = {
- .ndo_open = dsa_slave_open,
- .ndo_stop = dsa_slave_close,
- .ndo_start_xmit = dsa_slave_xmit,
- .ndo_change_rx_flags = dsa_slave_change_rx_flags,
- .ndo_set_rx_mode = dsa_slave_set_rx_mode,
- .ndo_set_mac_address = dsa_slave_set_mac_address,
- .ndo_fdb_dump = dsa_slave_fdb_dump,
- .ndo_eth_ioctl = dsa_slave_ioctl,
- .ndo_get_iflink = dsa_slave_get_iflink,
+static const struct net_device_ops dsa_user_netdev_ops = {
+ .ndo_open = dsa_user_open,
+ .ndo_stop = dsa_user_close,
+ .ndo_start_xmit = dsa_user_xmit,
+ .ndo_change_rx_flags = dsa_user_change_rx_flags,
+ .ndo_set_rx_mode = dsa_user_set_rx_mode,
+ .ndo_set_mac_address = dsa_user_set_mac_address,
+ .ndo_fdb_dump = dsa_user_fdb_dump,
+ .ndo_eth_ioctl = dsa_user_ioctl,
+ .ndo_get_iflink = dsa_user_get_iflink,
#ifdef CONFIG_NET_POLL_CONTROLLER
- .ndo_netpoll_setup = dsa_slave_netpoll_setup,
- .ndo_netpoll_cleanup = dsa_slave_netpoll_cleanup,
- .ndo_poll_controller = dsa_slave_poll_controller,
+ .ndo_netpoll_setup = dsa_user_netpoll_setup,
+ .ndo_netpoll_cleanup = dsa_user_netpoll_cleanup,
+ .ndo_poll_controller = dsa_user_poll_controller,
#endif
- .ndo_setup_tc = dsa_slave_setup_tc,
- .ndo_get_stats64 = dsa_slave_get_stats64,
- .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid,
- .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
- .ndo_change_mtu = dsa_slave_change_mtu,
- .ndo_fill_forward_path = dsa_slave_fill_forward_path,
+ .ndo_setup_tc = dsa_user_setup_tc,
+ .ndo_get_stats64 = dsa_user_get_stats64,
+ .ndo_vlan_rx_add_vid = dsa_user_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = dsa_user_vlan_rx_kill_vid,
+ .ndo_change_mtu = dsa_user_change_mtu,
+ .ndo_fill_forward_path = dsa_user_fill_forward_path,
};
static struct device_type dsa_type = {
@@ -2458,8 +2442,8 @@ void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up)
}
EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_change);
-static void dsa_slave_phylink_fixed_state(struct phylink_config *config,
- struct phylink_link_state *state)
+static void dsa_user_phylink_fixed_state(struct phylink_config *config,
+ struct phylink_link_state *state)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -2470,33 +2454,33 @@ static void dsa_slave_phylink_fixed_state(struct phylink_config *config,
ds->ops->phylink_fixed_state(ds, dp->index, state);
}
-/* slave device setup *******************************************************/
-static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr,
- u32 flags)
+/* user device setup *******************************************************/
+static int dsa_user_phy_connect(struct net_device *user_dev, int addr,
+ u32 flags)
{
- struct dsa_port *dp = dsa_slave_to_port(slave_dev);
+ struct dsa_port *dp = dsa_user_to_port(user_dev);
struct dsa_switch *ds = dp->ds;
- slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr);
- if (!slave_dev->phydev) {
- netdev_err(slave_dev, "no phy at %d\n", addr);
+ user_dev->phydev = mdiobus_get_phy(ds->user_mii_bus, addr);
+ if (!user_dev->phydev) {
+ netdev_err(user_dev, "no phy at %d\n", addr);
return -ENODEV;
}
- slave_dev->phydev->dev_flags |= flags;
+ user_dev->phydev->dev_flags |= flags;
- return phylink_connect_phy(dp->pl, slave_dev->phydev);
+ return phylink_connect_phy(dp->pl, user_dev->phydev);
}
-static int dsa_slave_phy_setup(struct net_device *slave_dev)
+static int dsa_user_phy_setup(struct net_device *user_dev)
{
- struct dsa_port *dp = dsa_slave_to_port(slave_dev);
+ struct dsa_port *dp = dsa_user_to_port(user_dev);
struct device_node *port_dn = dp->dn;
struct dsa_switch *ds = dp->ds;
u32 phy_flags = 0;
int ret;
- dp->pl_config.dev = &slave_dev->dev;
+ dp->pl_config.dev = &user_dev->dev;
dp->pl_config.type = PHYLINK_NETDEV;
/* The get_fixed_state callback takes precedence over polling the
@@ -2504,7 +2488,7 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
* this if the switch provides such a callback.
*/
if (ds->ops->phylink_fixed_state) {
- dp->pl_config.get_fixed_state = dsa_slave_phylink_fixed_state;
+ dp->pl_config.get_fixed_state = dsa_user_phylink_fixed_state;
dp->pl_config.poll_fixed_state = true;
}
@@ -2516,14 +2500,14 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
phy_flags = ds->ops->get_phy_flags(ds, dp->index);
ret = phylink_of_phy_connect(dp->pl, port_dn, phy_flags);
- if (ret == -ENODEV && ds->slave_mii_bus) {
+ if (ret == -ENODEV && ds->user_mii_bus) {
/* We could not connect to a designated PHY or SFP, so try to
* use the switch internal MDIO bus instead
*/
- ret = dsa_slave_phy_connect(slave_dev, dp->index, phy_flags);
+ ret = dsa_user_phy_connect(user_dev, dp->index, phy_flags);
}
if (ret) {
- netdev_err(slave_dev, "failed to connect to PHY: %pe\n",
+ netdev_err(user_dev, "failed to connect to PHY: %pe\n",
ERR_PTR(ret));
dsa_port_phylink_destroy(dp);
}
@@ -2531,42 +2515,42 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
return ret;
}
-void dsa_slave_setup_tagger(struct net_device *slave)
+void dsa_user_setup_tagger(struct net_device *user)
{
- struct dsa_port *dp = dsa_slave_to_port(slave);
- struct net_device *master = dsa_port_to_master(dp);
- struct dsa_slave_priv *p = netdev_priv(slave);
+ struct dsa_port *dp = dsa_user_to_port(user);
+ struct net_device *conduit = dsa_port_to_conduit(dp);
+ struct dsa_user_priv *p = netdev_priv(user);
const struct dsa_port *cpu_dp = dp->cpu_dp;
const struct dsa_switch *ds = dp->ds;
- slave->needed_headroom = cpu_dp->tag_ops->needed_headroom;
- slave->needed_tailroom = cpu_dp->tag_ops->needed_tailroom;
- /* Try to save one extra realloc later in the TX path (in the master)
- * by also inheriting the master's needed headroom and tailroom.
+ user->needed_headroom = cpu_dp->tag_ops->needed_headroom;
+ user->needed_tailroom = cpu_dp->tag_ops->needed_tailroom;
+ /* Try to save one extra realloc later in the TX path (in the conduit)
+ * by also inheriting the conduit's needed headroom and tailroom.
* The 8021q driver also does this.
*/
- slave->needed_headroom += master->needed_headroom;
- slave->needed_tailroom += master->needed_tailroom;
+ user->needed_headroom += conduit->needed_headroom;
+ user->needed_tailroom += conduit->needed_tailroom;
p->xmit = cpu_dp->tag_ops->xmit;
- slave->features = master->vlan_features | NETIF_F_HW_TC;
- slave->hw_features |= NETIF_F_HW_TC;
- slave->features |= NETIF_F_LLTX;
- if (slave->needed_tailroom)
- slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST);
+ user->features = conduit->vlan_features | NETIF_F_HW_TC;
+ user->hw_features |= NETIF_F_HW_TC;
+ user->features |= NETIF_F_LLTX;
+ if (user->needed_tailroom)
+ user->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST);
if (ds->needs_standalone_vlan_filtering)
- slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ user->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
}
-int dsa_slave_suspend(struct net_device *slave_dev)
+int dsa_user_suspend(struct net_device *user_dev)
{
- struct dsa_port *dp = dsa_slave_to_port(slave_dev);
+ struct dsa_port *dp = dsa_user_to_port(user_dev);
- if (!netif_running(slave_dev))
+ if (!netif_running(user_dev))
return 0;
- netif_device_detach(slave_dev);
+ netif_device_detach(user_dev);
rtnl_lock();
phylink_stop(dp->pl);
@@ -2575,14 +2559,14 @@ int dsa_slave_suspend(struct net_device *slave_dev)
return 0;
}
-int dsa_slave_resume(struct net_device *slave_dev)
+int dsa_user_resume(struct net_device *user_dev)
{
- struct dsa_port *dp = dsa_slave_to_port(slave_dev);
+ struct dsa_port *dp = dsa_user_to_port(user_dev);
- if (!netif_running(slave_dev))
+ if (!netif_running(user_dev))
return 0;
- netif_device_attach(slave_dev);
+ netif_device_attach(user_dev);
rtnl_lock();
phylink_start(dp->pl);
@@ -2591,12 +2575,12 @@ int dsa_slave_resume(struct net_device *slave_dev)
return 0;
}
-int dsa_slave_create(struct dsa_port *port)
+int dsa_user_create(struct dsa_port *port)
{
- struct net_device *master = dsa_port_to_master(port);
+ struct net_device *conduit = dsa_port_to_conduit(port);
struct dsa_switch *ds = port->ds;
- struct net_device *slave_dev;
- struct dsa_slave_priv *p;
+ struct net_device *user_dev;
+ struct dsa_user_priv *p;
const char *name;
int assign_type;
int ret;
@@ -2612,55 +2596,55 @@ int dsa_slave_create(struct dsa_port *port)
assign_type = NET_NAME_ENUM;
}
- slave_dev = alloc_netdev_mqs(sizeof(struct dsa_slave_priv), name,
- assign_type, ether_setup,
- ds->num_tx_queues, 1);
- if (slave_dev == NULL)
+ user_dev = alloc_netdev_mqs(sizeof(struct dsa_user_priv), name,
+ assign_type, ether_setup,
+ ds->num_tx_queues, 1);
+ if (user_dev == NULL)
return -ENOMEM;
- slave_dev->rtnl_link_ops = &dsa_link_ops;
- slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
+ user_dev->rtnl_link_ops = &dsa_link_ops;
+ user_dev->ethtool_ops = &dsa_user_ethtool_ops;
#if IS_ENABLED(CONFIG_DCB)
- slave_dev->dcbnl_ops = &dsa_slave_dcbnl_ops;
+ user_dev->dcbnl_ops = &dsa_user_dcbnl_ops;
#endif
if (!is_zero_ether_addr(port->mac))
- eth_hw_addr_set(slave_dev, port->mac);
+ eth_hw_addr_set(user_dev, port->mac);
else
- eth_hw_addr_inherit(slave_dev, master);
- slave_dev->priv_flags |= IFF_NO_QUEUE;
+ eth_hw_addr_inherit(user_dev, conduit);
+ user_dev->priv_flags |= IFF_NO_QUEUE;
if (dsa_switch_supports_uc_filtering(ds))
- slave_dev->priv_flags |= IFF_UNICAST_FLT;
- slave_dev->netdev_ops = &dsa_slave_netdev_ops;
+ user_dev->priv_flags |= IFF_UNICAST_FLT;
+ user_dev->netdev_ops = &dsa_user_netdev_ops;
if (ds->ops->port_max_mtu)
- slave_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index);
- SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
-
- SET_NETDEV_DEV(slave_dev, port->ds->dev);
- SET_NETDEV_DEVLINK_PORT(slave_dev, &port->devlink_port);
- slave_dev->dev.of_node = port->dn;
- slave_dev->vlan_features = master->vlan_features;
-
- p = netdev_priv(slave_dev);
- slave_dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!slave_dev->tstats) {
- free_netdev(slave_dev);
+ user_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index);
+ SET_NETDEV_DEVTYPE(user_dev, &dsa_type);
+
+ SET_NETDEV_DEV(user_dev, port->ds->dev);
+ SET_NETDEV_DEVLINK_PORT(user_dev, &port->devlink_port);
+ user_dev->dev.of_node = port->dn;
+ user_dev->vlan_features = conduit->vlan_features;
+
+ p = netdev_priv(user_dev);
+ user_dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!user_dev->tstats) {
+ free_netdev(user_dev);
return -ENOMEM;
}
- ret = gro_cells_init(&p->gcells, slave_dev);
+ ret = gro_cells_init(&p->gcells, user_dev);
if (ret)
goto out_free;
p->dp = port;
INIT_LIST_HEAD(&p->mall_tc_list);
- port->slave = slave_dev;
- dsa_slave_setup_tagger(slave_dev);
+ port->user = user_dev;
+ dsa_user_setup_tagger(user_dev);
- netif_carrier_off(slave_dev);
+ netif_carrier_off(user_dev);
- ret = dsa_slave_phy_setup(slave_dev);
+ ret = dsa_user_phy_setup(user_dev);
if (ret) {
- netdev_err(slave_dev,
+ netdev_err(user_dev,
"error %d setting up PHY for tree %d, switch %d, port %d\n",
ret, ds->dst->index, ds->index, port->index);
goto out_gcells;
@@ -2668,23 +2652,23 @@ int dsa_slave_create(struct dsa_port *port)
rtnl_lock();
- ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN);
+ ret = dsa_user_change_mtu(user_dev, ETH_DATA_LEN);
if (ret && ret != -EOPNOTSUPP)
dev_warn(ds->dev, "nonfatal error %d setting MTU to %d on port %d\n",
ret, ETH_DATA_LEN, port->index);
- ret = register_netdevice(slave_dev);
+ ret = register_netdevice(user_dev);
if (ret) {
- netdev_err(master, "error %d registering interface %s\n",
- ret, slave_dev->name);
+ netdev_err(conduit, "error %d registering interface %s\n",
+ ret, user_dev->name);
rtnl_unlock();
goto out_phy;
}
if (IS_ENABLED(CONFIG_DCB)) {
- ret = dsa_slave_dcbnl_init(slave_dev);
+ ret = dsa_user_dcbnl_init(user_dev);
if (ret) {
- netdev_err(slave_dev,
+ netdev_err(user_dev,
"failed to initialize DCB: %pe\n",
ERR_PTR(ret));
rtnl_unlock();
@@ -2692,7 +2676,7 @@ int dsa_slave_create(struct dsa_port *port)
}
}
- ret = netdev_upper_dev_link(master, slave_dev, NULL);
+ ret = netdev_upper_dev_link(conduit, user_dev, NULL);
rtnl_unlock();
@@ -2702,7 +2686,7 @@ int dsa_slave_create(struct dsa_port *port)
return 0;
out_unregister:
- unregister_netdev(slave_dev);
+ unregister_netdev(user_dev);
out_phy:
rtnl_lock();
phylink_disconnect_phy(p->dp->pl);
@@ -2711,124 +2695,125 @@ out_phy:
out_gcells:
gro_cells_destroy(&p->gcells);
out_free:
- free_percpu(slave_dev->tstats);
- free_netdev(slave_dev);
- port->slave = NULL;
+ free_percpu(user_dev->tstats);
+ free_netdev(user_dev);
+ port->user = NULL;
return ret;
}
-void dsa_slave_destroy(struct net_device *slave_dev)
+void dsa_user_destroy(struct net_device *user_dev)
{
- struct net_device *master = dsa_slave_to_master(slave_dev);
- struct dsa_port *dp = dsa_slave_to_port(slave_dev);
- struct dsa_slave_priv *p = netdev_priv(slave_dev);
+ struct net_device *conduit = dsa_user_to_conduit(user_dev);
+ struct dsa_port *dp = dsa_user_to_port(user_dev);
+ struct dsa_user_priv *p = netdev_priv(user_dev);
- netif_carrier_off(slave_dev);
+ netif_carrier_off(user_dev);
rtnl_lock();
- netdev_upper_dev_unlink(master, slave_dev);
- unregister_netdevice(slave_dev);
+ netdev_upper_dev_unlink(conduit, user_dev);
+ unregister_netdevice(user_dev);
phylink_disconnect_phy(dp->pl);
rtnl_unlock();
dsa_port_phylink_destroy(dp);
gro_cells_destroy(&p->gcells);
- free_percpu(slave_dev->tstats);
- free_netdev(slave_dev);
+ free_percpu(user_dev->tstats);
+ free_netdev(user_dev);
}
-int dsa_slave_change_master(struct net_device *dev, struct net_device *master,
+int dsa_user_change_conduit(struct net_device *dev, struct net_device *conduit,
struct netlink_ext_ack *extack)
{
- struct net_device *old_master = dsa_slave_to_master(dev);
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct net_device *old_conduit = dsa_user_to_conduit(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
struct net_device *upper;
struct list_head *iter;
int err;
- if (master == old_master)
+ if (conduit == old_conduit)
return 0;
- if (!ds->ops->port_change_master) {
+ if (!ds->ops->port_change_conduit) {
NL_SET_ERR_MSG_MOD(extack,
- "Driver does not support changing DSA master");
+ "Driver does not support changing DSA conduit");
return -EOPNOTSUPP;
}
- if (!netdev_uses_dsa(master)) {
+ if (!netdev_uses_dsa(conduit)) {
NL_SET_ERR_MSG_MOD(extack,
- "Interface not eligible as DSA master");
+ "Interface not eligible as DSA conduit");
return -EOPNOTSUPP;
}
- netdev_for_each_upper_dev_rcu(master, upper, iter) {
- if (dsa_slave_dev_check(upper))
+ netdev_for_each_upper_dev_rcu(conduit, upper, iter) {
+ if (dsa_user_dev_check(upper))
continue;
if (netif_is_bridge_master(upper))
continue;
- NL_SET_ERR_MSG_MOD(extack, "Cannot join master with unknown uppers");
+ NL_SET_ERR_MSG_MOD(extack, "Cannot join conduit with unknown uppers");
return -EOPNOTSUPP;
}
- /* Since we allow live-changing the DSA master, plus we auto-open the
- * DSA master when the user port opens => we need to ensure that the
- * new DSA master is open too.
+ /* Since we allow live-changing the DSA conduit, plus we auto-open the
+ * DSA conduit when the user port opens => we need to ensure that the
+ * new DSA conduit is open too.
*/
if (dev->flags & IFF_UP) {
- err = dev_open(master, extack);
+ err = dev_open(conduit, extack);
if (err)
return err;
}
- netdev_upper_dev_unlink(old_master, dev);
+ netdev_upper_dev_unlink(old_conduit, dev);
- err = netdev_upper_dev_link(master, dev, extack);
+ err = netdev_upper_dev_link(conduit, dev, extack);
if (err)
- goto out_revert_old_master_unlink;
+ goto out_revert_old_conduit_unlink;
- err = dsa_port_change_master(dp, master, extack);
+ err = dsa_port_change_conduit(dp, conduit, extack);
if (err)
- goto out_revert_master_link;
+ goto out_revert_conduit_link;
/* Update the MTU of the new CPU port through cross-chip notifiers */
- err = dsa_slave_change_mtu(dev, dev->mtu);
+ err = dsa_user_change_mtu(dev, dev->mtu);
if (err && err != -EOPNOTSUPP) {
netdev_warn(dev,
- "nonfatal error updating MTU with new master: %pe\n",
+ "nonfatal error updating MTU with new conduit: %pe\n",
ERR_PTR(err));
}
/* If the port doesn't have its own MAC address and relies on the DSA
- * master's one, inherit it again from the new DSA master.
+ * conduit's one, inherit it again from the new DSA conduit.
*/
if (is_zero_ether_addr(dp->mac))
- eth_hw_addr_inherit(dev, master);
+ eth_hw_addr_inherit(dev, conduit);
return 0;
-out_revert_master_link:
- netdev_upper_dev_unlink(master, dev);
-out_revert_old_master_unlink:
- netdev_upper_dev_link(old_master, dev, NULL);
+out_revert_conduit_link:
+ netdev_upper_dev_unlink(conduit, dev);
+out_revert_old_conduit_unlink:
+ netdev_upper_dev_link(old_conduit, dev, NULL);
return err;
}
-bool dsa_slave_dev_check(const struct net_device *dev)
+bool dsa_user_dev_check(const struct net_device *dev)
{
- return dev->netdev_ops == &dsa_slave_netdev_ops;
+ return dev->netdev_ops == &dsa_user_netdev_ops;
}
-EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
+EXPORT_SYMBOL_GPL(dsa_user_dev_check);
-static int dsa_slave_changeupper(struct net_device *dev,
- struct netdev_notifier_changeupper_info *info)
+static int dsa_user_changeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
struct netlink_ext_ack *extack;
int err = NOTIFY_DONE;
+ struct dsa_port *dp;
- if (!dsa_slave_dev_check(dev))
+ if (!dsa_user_dev_check(dev))
return err;
+ dp = dsa_user_to_port(dev);
extack = netdev_notifier_info_to_extack(&info->info);
if (netif_is_bridge_master(info->upper_dev)) {
@@ -2862,7 +2847,7 @@ static int dsa_slave_changeupper(struct net_device *dev,
}
} else if (is_hsr_master(info->upper_dev)) {
if (info->linking) {
- err = dsa_port_hsr_join(dp, info->upper_dev);
+ err = dsa_port_hsr_join(dp, info->upper_dev, extack);
if (err == -EOPNOTSUPP) {
NL_SET_ERR_MSG_WEAK_MOD(extack,
"Offloading not supported");
@@ -2878,28 +2863,30 @@ static int dsa_slave_changeupper(struct net_device *dev,
return err;
}
-static int dsa_slave_prechangeupper(struct net_device *dev,
- struct netdev_notifier_changeupper_info *info)
+static int dsa_user_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp;
- if (!dsa_slave_dev_check(dev))
+ if (!dsa_user_dev_check(dev))
return NOTIFY_DONE;
+ dp = dsa_user_to_port(dev);
+
if (netif_is_bridge_master(info->upper_dev) && !info->linking)
dsa_port_pre_bridge_leave(dp, info->upper_dev);
else if (netif_is_lag_master(info->upper_dev) && !info->linking)
dsa_port_pre_lag_leave(dp, info->upper_dev);
- /* dsa_port_pre_hsr_leave is not yet necessary since hsr cannot be
- * meaningfully enslaved to a bridge yet
+ /* dsa_port_pre_hsr_leave is not yet necessary since hsr devices cannot
+ * meaningfully placed under a bridge yet
*/
return NOTIFY_DONE;
}
static int
-dsa_slave_lag_changeupper(struct net_device *dev,
- struct netdev_notifier_changeupper_info *info)
+dsa_user_lag_changeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
{
struct net_device *lower;
struct list_head *iter;
@@ -2910,15 +2897,15 @@ dsa_slave_lag_changeupper(struct net_device *dev,
return err;
netdev_for_each_lower_dev(dev, lower, iter) {
- if (!dsa_slave_dev_check(lower))
+ if (!dsa_user_dev_check(lower))
continue;
- dp = dsa_slave_to_port(lower);
+ dp = dsa_user_to_port(lower);
if (!dp->lag)
/* Software LAG */
continue;
- err = dsa_slave_changeupper(lower, info);
+ err = dsa_user_changeupper(lower, info);
if (notifier_to_errno(err))
break;
}
@@ -2926,12 +2913,12 @@ dsa_slave_lag_changeupper(struct net_device *dev,
return err;
}
-/* Same as dsa_slave_lag_changeupper() except that it calls
- * dsa_slave_prechangeupper()
+/* Same as dsa_user_lag_changeupper() except that it calls
+ * dsa_user_prechangeupper()
*/
static int
-dsa_slave_lag_prechangeupper(struct net_device *dev,
- struct netdev_notifier_changeupper_info *info)
+dsa_user_lag_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
{
struct net_device *lower;
struct list_head *iter;
@@ -2942,15 +2929,15 @@ dsa_slave_lag_prechangeupper(struct net_device *dev,
return err;
netdev_for_each_lower_dev(dev, lower, iter) {
- if (!dsa_slave_dev_check(lower))
+ if (!dsa_user_dev_check(lower))
continue;
- dp = dsa_slave_to_port(lower);
+ dp = dsa_user_to_port(lower);
if (!dp->lag)
/* Software LAG */
continue;
- err = dsa_slave_prechangeupper(lower, info);
+ err = dsa_user_prechangeupper(lower, info);
if (notifier_to_errno(err))
break;
}
@@ -2963,7 +2950,7 @@ dsa_prevent_bridging_8021q_upper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
{
struct netlink_ext_ack *ext_ack;
- struct net_device *slave, *br;
+ struct net_device *user, *br;
struct dsa_port *dp;
ext_ack = netdev_notifier_info_to_extack(&info->info);
@@ -2971,11 +2958,11 @@ dsa_prevent_bridging_8021q_upper(struct net_device *dev,
if (!is_vlan_dev(dev))
return NOTIFY_DONE;
- slave = vlan_dev_real_dev(dev);
- if (!dsa_slave_dev_check(slave))
+ user = vlan_dev_real_dev(dev);
+ if (!dsa_user_dev_check(user))
return NOTIFY_DONE;
- dp = dsa_slave_to_port(slave);
+ dp = dsa_user_to_port(user);
br = dsa_port_bridge_dev_get(dp);
if (!br)
return NOTIFY_DONE;
@@ -2984,7 +2971,7 @@ dsa_prevent_bridging_8021q_upper(struct net_device *dev,
if (br_vlan_enabled(br) &&
netif_is_bridge_master(info->upper_dev) && info->linking) {
NL_SET_ERR_MSG_MOD(ext_ack,
- "Cannot enslave VLAN device into VLAN aware bridge");
+ "Cannot make VLAN device join VLAN-aware bridge");
return notifier_from_errno(-EINVAL);
}
@@ -2992,10 +2979,10 @@ dsa_prevent_bridging_8021q_upper(struct net_device *dev,
}
static int
-dsa_slave_check_8021q_upper(struct net_device *dev,
- struct netdev_notifier_changeupper_info *info)
+dsa_user_check_8021q_upper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
struct net_device *br = dsa_port_bridge_dev_get(dp);
struct bridge_vlan_info br_info;
struct netlink_ext_ack *extack;
@@ -3023,17 +3010,17 @@ dsa_slave_check_8021q_upper(struct net_device *dev,
}
static int
-dsa_slave_prechangeupper_sanity_check(struct net_device *dev,
- struct netdev_notifier_changeupper_info *info)
+dsa_user_prechangeupper_sanity_check(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
{
struct dsa_switch *ds;
struct dsa_port *dp;
int err;
- if (!dsa_slave_dev_check(dev))
+ if (!dsa_user_dev_check(dev))
return dsa_prevent_bridging_8021q_upper(dev, info);
- dp = dsa_slave_to_port(dev);
+ dp = dsa_user_to_port(dev);
ds = dp->ds;
if (ds->ops->port_prechangeupper) {
@@ -3043,17 +3030,17 @@ dsa_slave_prechangeupper_sanity_check(struct net_device *dev,
}
if (is_vlan_dev(info->upper_dev))
- return dsa_slave_check_8021q_upper(dev, info);
+ return dsa_user_check_8021q_upper(dev, info);
return NOTIFY_DONE;
}
-/* To be eligible as a DSA master, a LAG must have all lower interfaces be
- * eligible DSA masters. Additionally, all LAG slaves must be DSA masters of
+/* To be eligible as a DSA conduit, a LAG must have all lower interfaces be
+ * eligible DSA conduits. Additionally, all LAG slaves must be DSA conduits of
* switches in the same switch tree.
*/
-static int dsa_lag_master_validate(struct net_device *lag_dev,
- struct netlink_ext_ack *extack)
+static int dsa_lag_conduit_validate(struct net_device *lag_dev,
+ struct netlink_ext_ack *extack)
{
struct net_device *lower1, *lower2;
struct list_head *iter1, *iter2;
@@ -3063,7 +3050,7 @@ static int dsa_lag_master_validate(struct net_device *lag_dev,
if (!netdev_uses_dsa(lower1) ||
!netdev_uses_dsa(lower2)) {
NL_SET_ERR_MSG_MOD(extack,
- "All LAG ports must be eligible as DSA masters");
+ "All LAG ports must be eligible as DSA conduits");
return notifier_from_errno(-EINVAL);
}
@@ -3073,7 +3060,7 @@ static int dsa_lag_master_validate(struct net_device *lag_dev,
if (!dsa_port_tree_same(lower1->dsa_ptr,
lower2->dsa_ptr)) {
NL_SET_ERR_MSG_MOD(extack,
- "LAG contains DSA masters of disjoint switch trees");
+ "LAG contains DSA conduits of disjoint switch trees");
return notifier_from_errno(-EINVAL);
}
}
@@ -3083,41 +3070,41 @@ static int dsa_lag_master_validate(struct net_device *lag_dev,
}
static int
-dsa_master_prechangeupper_sanity_check(struct net_device *master,
- struct netdev_notifier_changeupper_info *info)
+dsa_conduit_prechangeupper_sanity_check(struct net_device *conduit,
+ struct netdev_notifier_changeupper_info *info)
{
struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info);
- if (!netdev_uses_dsa(master))
+ if (!netdev_uses_dsa(conduit))
return NOTIFY_DONE;
if (!info->linking)
return NOTIFY_DONE;
/* Allow DSA switch uppers */
- if (dsa_slave_dev_check(info->upper_dev))
+ if (dsa_user_dev_check(info->upper_dev))
return NOTIFY_DONE;
- /* Allow bridge uppers of DSA masters, subject to further
+ /* Allow bridge uppers of DSA conduits, subject to further
* restrictions in dsa_bridge_prechangelower_sanity_check()
*/
if (netif_is_bridge_master(info->upper_dev))
return NOTIFY_DONE;
/* Allow LAG uppers, subject to further restrictions in
- * dsa_lag_master_prechangelower_sanity_check()
+ * dsa_lag_conduit_prechangelower_sanity_check()
*/
if (netif_is_lag_master(info->upper_dev))
- return dsa_lag_master_validate(info->upper_dev, extack);
+ return dsa_lag_conduit_validate(info->upper_dev, extack);
NL_SET_ERR_MSG_MOD(extack,
- "DSA master cannot join unknown upper interfaces");
+ "DSA conduit cannot join unknown upper interfaces");
return notifier_from_errno(-EBUSY);
}
static int
-dsa_lag_master_prechangelower_sanity_check(struct net_device *dev,
- struct netdev_notifier_changeupper_info *info)
+dsa_lag_conduit_prechangelower_sanity_check(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
{
struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info);
struct net_device *lag_dev = info->upper_dev;
@@ -3132,14 +3119,14 @@ dsa_lag_master_prechangelower_sanity_check(struct net_device *dev,
if (!netdev_uses_dsa(dev)) {
NL_SET_ERR_MSG(extack,
- "Only DSA masters can join a LAG DSA master");
+ "Only DSA conduits can join a LAG DSA conduit");
return notifier_from_errno(-EINVAL);
}
netdev_for_each_lower_dev(lag_dev, lower, iter) {
if (!dsa_port_tree_same(dev->dsa_ptr, lower->dsa_ptr)) {
NL_SET_ERR_MSG(extack,
- "Interface is DSA master for a different switch tree than this LAG");
+ "Interface is DSA conduit for a different switch tree than this LAG");
return notifier_from_errno(-EINVAL);
}
@@ -3149,13 +3136,13 @@ dsa_lag_master_prechangelower_sanity_check(struct net_device *dev,
return NOTIFY_DONE;
}
-/* Don't allow bridging of DSA masters, since the bridge layer rx_handler
+/* Don't allow bridging of DSA conduits, since the bridge layer rx_handler
* prevents the DSA fake ethertype handler to be invoked, so we don't get the
* chance to strip off and parse the DSA switch tag protocol header (the bridge
* layer just returns RX_HANDLER_CONSUMED, stopping RX processing for these
* frames).
* The only case where that would not be an issue is when bridging can already
- * be offloaded, such as when the DSA master is itself a DSA or plain switchdev
+ * be offloaded, such as when the DSA conduit is itself a DSA or plain switchdev
* port, and is bridged only with other ports from the same hardware device.
*/
static int
@@ -3181,7 +3168,7 @@ dsa_bridge_prechangelower_sanity_check(struct net_device *new_lower,
if (!netdev_port_same_parent_id(lower, new_lower)) {
NL_SET_ERR_MSG(extack,
- "Cannot do software bridging with a DSA master");
+ "Cannot do software bridging with a DSA conduit");
return notifier_from_errno(-EINVAL);
}
}
@@ -3189,45 +3176,45 @@ dsa_bridge_prechangelower_sanity_check(struct net_device *new_lower,
return NOTIFY_DONE;
}
-static void dsa_tree_migrate_ports_from_lag_master(struct dsa_switch_tree *dst,
- struct net_device *lag_dev)
+static void dsa_tree_migrate_ports_from_lag_conduit(struct dsa_switch_tree *dst,
+ struct net_device *lag_dev)
{
- struct net_device *new_master = dsa_tree_find_first_master(dst);
+ struct net_device *new_conduit = dsa_tree_find_first_conduit(dst);
struct dsa_port *dp;
int err;
dsa_tree_for_each_user_port(dp, dst) {
- if (dsa_port_to_master(dp) != lag_dev)
+ if (dsa_port_to_conduit(dp) != lag_dev)
continue;
- err = dsa_slave_change_master(dp->slave, new_master, NULL);
+ err = dsa_user_change_conduit(dp->user, new_conduit, NULL);
if (err) {
- netdev_err(dp->slave,
- "failed to restore master to %s: %pe\n",
- new_master->name, ERR_PTR(err));
+ netdev_err(dp->user,
+ "failed to restore conduit to %s: %pe\n",
+ new_conduit->name, ERR_PTR(err));
}
}
}
-static int dsa_master_lag_join(struct net_device *master,
- struct net_device *lag_dev,
- struct netdev_lag_upper_info *uinfo,
- struct netlink_ext_ack *extack)
+static int dsa_conduit_lag_join(struct net_device *conduit,
+ struct net_device *lag_dev,
+ struct netdev_lag_upper_info *uinfo,
+ struct netlink_ext_ack *extack)
{
- struct dsa_port *cpu_dp = master->dsa_ptr;
+ struct dsa_port *cpu_dp = conduit->dsa_ptr;
struct dsa_switch_tree *dst = cpu_dp->dst;
struct dsa_port *dp;
int err;
- err = dsa_master_lag_setup(lag_dev, cpu_dp, uinfo, extack);
+ err = dsa_conduit_lag_setup(lag_dev, cpu_dp, uinfo, extack);
if (err)
return err;
dsa_tree_for_each_user_port(dp, dst) {
- if (dsa_port_to_master(dp) != master)
+ if (dsa_port_to_conduit(dp) != conduit)
continue;
- err = dsa_slave_change_master(dp->slave, lag_dev, extack);
+ err = dsa_user_change_conduit(dp->user, lag_dev, extack);
if (err)
goto restore;
}
@@ -3236,24 +3223,24 @@ static int dsa_master_lag_join(struct net_device *master,
restore:
dsa_tree_for_each_user_port_continue_reverse(dp, dst) {
- if (dsa_port_to_master(dp) != lag_dev)
+ if (dsa_port_to_conduit(dp) != lag_dev)
continue;
- err = dsa_slave_change_master(dp->slave, master, NULL);
+ err = dsa_user_change_conduit(dp->user, conduit, NULL);
if (err) {
- netdev_err(dp->slave,
- "failed to restore master to %s: %pe\n",
- master->name, ERR_PTR(err));
+ netdev_err(dp->user,
+ "failed to restore conduit to %s: %pe\n",
+ conduit->name, ERR_PTR(err));
}
}
- dsa_master_lag_teardown(lag_dev, master->dsa_ptr);
+ dsa_conduit_lag_teardown(lag_dev, conduit->dsa_ptr);
return err;
}
-static void dsa_master_lag_leave(struct net_device *master,
- struct net_device *lag_dev)
+static void dsa_conduit_lag_leave(struct net_device *conduit,
+ struct net_device *lag_dev)
{
struct dsa_port *dp, *cpu_dp = lag_dev->dsa_ptr;
struct dsa_switch_tree *dst = cpu_dp->dst;
@@ -3270,10 +3257,10 @@ static void dsa_master_lag_leave(struct net_device *master,
if (new_cpu_dp) {
/* Update the CPU port of the user ports still under the LAG
- * so that dsa_port_to_master() continues to work properly
+ * so that dsa_port_to_conduit() continues to work properly
*/
dsa_tree_for_each_user_port(dp, dst)
- if (dsa_port_to_master(dp) == lag_dev)
+ if (dsa_port_to_conduit(dp) == lag_dev)
dp->cpu_dp = new_cpu_dp;
/* Update the index of the virtual CPU port to match the lowest
@@ -3282,20 +3269,20 @@ static void dsa_master_lag_leave(struct net_device *master,
lag_dev->dsa_ptr = new_cpu_dp;
wmb();
} else {
- /* If the LAG DSA master has no ports left, migrate back all
+ /* If the LAG DSA conduit has no ports left, migrate back all
* user ports to the first physical CPU port
*/
- dsa_tree_migrate_ports_from_lag_master(dst, lag_dev);
+ dsa_tree_migrate_ports_from_lag_conduit(dst, lag_dev);
}
- /* This DSA master has left its LAG in any case, so let
+ /* This DSA conduit has left its LAG in any case, so let
* the CPU port leave the hardware LAG as well
*/
- dsa_master_lag_teardown(lag_dev, master->dsa_ptr);
+ dsa_conduit_lag_teardown(lag_dev, conduit->dsa_ptr);
}
-static int dsa_master_changeupper(struct net_device *dev,
- struct netdev_notifier_changeupper_info *info)
+static int dsa_conduit_changeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
{
struct netlink_ext_ack *extack;
int err = NOTIFY_DONE;
@@ -3307,11 +3294,11 @@ static int dsa_master_changeupper(struct net_device *dev,
if (netif_is_lag_master(info->upper_dev)) {
if (info->linking) {
- err = dsa_master_lag_join(dev, info->upper_dev,
- info->upper_info, extack);
+ err = dsa_conduit_lag_join(dev, info->upper_dev,
+ info->upper_info, extack);
err = notifier_from_errno(err);
} else {
- dsa_master_lag_leave(dev, info->upper_dev);
+ dsa_conduit_lag_leave(dev, info->upper_dev);
err = NOTIFY_OK;
}
}
@@ -3319,8 +3306,8 @@ static int dsa_master_changeupper(struct net_device *dev,
return err;
}
-static int dsa_slave_netdevice_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
+static int dsa_user_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
@@ -3329,15 +3316,15 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
struct netdev_notifier_changeupper_info *info = ptr;
int err;
- err = dsa_slave_prechangeupper_sanity_check(dev, info);
+ err = dsa_user_prechangeupper_sanity_check(dev, info);
if (notifier_to_errno(err))
return err;
- err = dsa_master_prechangeupper_sanity_check(dev, info);
+ err = dsa_conduit_prechangeupper_sanity_check(dev, info);
if (notifier_to_errno(err))
return err;
- err = dsa_lag_master_prechangelower_sanity_check(dev, info);
+ err = dsa_lag_conduit_prechangelower_sanity_check(dev, info);
if (notifier_to_errno(err))
return err;
@@ -3345,11 +3332,11 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
if (notifier_to_errno(err))
return err;
- err = dsa_slave_prechangeupper(dev, ptr);
+ err = dsa_user_prechangeupper(dev, ptr);
if (notifier_to_errno(err))
return err;
- err = dsa_slave_lag_prechangeupper(dev, ptr);
+ err = dsa_user_lag_prechangeupper(dev, ptr);
if (notifier_to_errno(err))
return err;
@@ -3358,15 +3345,15 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
case NETDEV_CHANGEUPPER: {
int err;
- err = dsa_slave_changeupper(dev, ptr);
+ err = dsa_user_changeupper(dev, ptr);
if (notifier_to_errno(err))
return err;
- err = dsa_slave_lag_changeupper(dev, ptr);
+ err = dsa_user_lag_changeupper(dev, ptr);
if (notifier_to_errno(err))
return err;
- err = dsa_master_changeupper(dev, ptr);
+ err = dsa_conduit_changeupper(dev, ptr);
if (notifier_to_errno(err))
return err;
@@ -3377,13 +3364,13 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
struct dsa_port *dp;
int err = 0;
- if (dsa_slave_dev_check(dev)) {
- dp = dsa_slave_to_port(dev);
+ if (dsa_user_dev_check(dev)) {
+ dp = dsa_user_to_port(dev);
err = dsa_port_lag_change(dp, info->lower_state_info);
}
- /* Mirror LAG port events on DSA masters that are in
+ /* Mirror LAG port events on DSA conduits that are in
* a LAG towards their respective switch CPU ports
*/
if (netdev_uses_dsa(dev)) {
@@ -3396,28 +3383,28 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
}
case NETDEV_CHANGE:
case NETDEV_UP: {
- /* Track state of master port.
- * DSA driver may require the master port (and indirectly
+ /* Track state of conduit port.
+ * DSA driver may require the conduit port (and indirectly
* the tagger) to be available for some special operation.
*/
if (netdev_uses_dsa(dev)) {
struct dsa_port *cpu_dp = dev->dsa_ptr;
struct dsa_switch_tree *dst = cpu_dp->ds->dst;
- /* Track when the master port is UP */
- dsa_tree_master_oper_state_change(dst, dev,
- netif_oper_up(dev));
+ /* Track when the conduit port is UP */
+ dsa_tree_conduit_oper_state_change(dst, dev,
+ netif_oper_up(dev));
- /* Track when the master port is ready and can accept
+ /* Track when the conduit port is ready and can accept
* packet.
* NETDEV_UP event is not enough to flag a port as ready.
* We also have to wait for linkwatch_do_dev to dev_activate
* and emit a NETDEV_CHANGE event.
- * We check if a master port is ready by checking if the dev
+ * We check if a conduit port is ready by checking if the dev
* have a qdisc assigned and is not noop.
*/
- dsa_tree_master_admin_state_change(dst, dev,
- !qdisc_tx_is_noop(dev));
+ dsa_tree_conduit_admin_state_change(dst, dev,
+ !qdisc_tx_is_noop(dev));
return NOTIFY_OK;
}
@@ -3435,7 +3422,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
cpu_dp = dev->dsa_ptr;
dst = cpu_dp->ds->dst;
- dsa_tree_master_admin_state_change(dst, dev, false);
+ dsa_tree_conduit_admin_state_change(dst, dev, false);
list_for_each_entry(dp, &dst->ports, list) {
if (!dsa_port_is_user(dp))
@@ -3444,7 +3431,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
if (dp->cpu_dp != cpu_dp)
continue;
- list_add(&dp->slave->close_list, &close_list);
+ list_add(&dp->user->close_list, &close_list);
}
dev_close_many(&close_list, true);
@@ -3470,7 +3457,7 @@ dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work)
switchdev_work->orig_dev, &info.info, NULL);
}
-static void dsa_slave_switchdev_event_work(struct work_struct *work)
+static void dsa_user_switchdev_event_work(struct work_struct *work)
{
struct dsa_switchdev_event_work *switchdev_work =
container_of(work, struct dsa_switchdev_event_work, work);
@@ -3481,7 +3468,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
struct dsa_port *dp;
int err;
- dp = dsa_slave_to_port(dev);
+ dp = dsa_user_to_port(dev);
ds = dp->ds;
switch (switchdev_work->event) {
@@ -3523,7 +3510,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
static bool dsa_foreign_dev_check(const struct net_device *dev,
const struct net_device *foreign_dev)
{
- const struct dsa_port *dp = dsa_slave_to_port(dev);
+ const struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch_tree *dst = dp->ds->dst;
if (netif_is_bridge_master(foreign_dev))
@@ -3536,13 +3523,13 @@ static bool dsa_foreign_dev_check(const struct net_device *dev,
return true;
}
-static int dsa_slave_fdb_event(struct net_device *dev,
- struct net_device *orig_dev,
- unsigned long event, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info)
+static int dsa_user_fdb_event(struct net_device *dev,
+ struct net_device *orig_dev,
+ unsigned long event, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info)
{
struct dsa_switchdev_event_work *switchdev_work;
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_port *dp = dsa_user_to_port(dev);
bool host_addr = fdb_info->is_local;
struct dsa_switch *ds = dp->ds;
@@ -3591,7 +3578,7 @@ static int dsa_slave_fdb_event(struct net_device *dev,
orig_dev->name, fdb_info->addr, fdb_info->vid,
host_addr ? " as host address" : "");
- INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work);
+ INIT_WORK(&switchdev_work->work, dsa_user_switchdev_event_work);
switchdev_work->event = event;
switchdev_work->dev = dev;
switchdev_work->orig_dev = orig_dev;
@@ -3606,8 +3593,8 @@ static int dsa_slave_fdb_event(struct net_device *dev,
}
/* Called under rcu_read_lock() */
-static int dsa_slave_switchdev_event(struct notifier_block *unused,
- unsigned long event, void *ptr)
+static int dsa_user_switchdev_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
int err;
@@ -3615,15 +3602,15 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
switch (event) {
case SWITCHDEV_PORT_ATTR_SET:
err = switchdev_handle_port_attr_set(dev, ptr,
- dsa_slave_dev_check,
- dsa_slave_port_attr_set);
+ dsa_user_dev_check,
+ dsa_user_port_attr_set);
return notifier_from_errno(err);
case SWITCHDEV_FDB_ADD_TO_DEVICE:
case SWITCHDEV_FDB_DEL_TO_DEVICE:
err = switchdev_handle_fdb_event_to_device(dev, event, ptr,
- dsa_slave_dev_check,
+ dsa_user_dev_check,
dsa_foreign_dev_check,
- dsa_slave_fdb_event);
+ dsa_user_fdb_event);
return notifier_from_errno(err);
default:
return NOTIFY_DONE;
@@ -3632,8 +3619,8 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
return NOTIFY_OK;
}
-static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused,
- unsigned long event, void *ptr)
+static int dsa_user_switchdev_blocking_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
int err;
@@ -3641,52 +3628,52 @@ static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused,
switch (event) {
case SWITCHDEV_PORT_OBJ_ADD:
err = switchdev_handle_port_obj_add_foreign(dev, ptr,
- dsa_slave_dev_check,
+ dsa_user_dev_check,
dsa_foreign_dev_check,
- dsa_slave_port_obj_add);
+ dsa_user_port_obj_add);
return notifier_from_errno(err);
case SWITCHDEV_PORT_OBJ_DEL:
err = switchdev_handle_port_obj_del_foreign(dev, ptr,
- dsa_slave_dev_check,
+ dsa_user_dev_check,
dsa_foreign_dev_check,
- dsa_slave_port_obj_del);
+ dsa_user_port_obj_del);
return notifier_from_errno(err);
case SWITCHDEV_PORT_ATTR_SET:
err = switchdev_handle_port_attr_set(dev, ptr,
- dsa_slave_dev_check,
- dsa_slave_port_attr_set);
+ dsa_user_dev_check,
+ dsa_user_port_attr_set);
return notifier_from_errno(err);
}
return NOTIFY_DONE;
}
-static struct notifier_block dsa_slave_nb __read_mostly = {
- .notifier_call = dsa_slave_netdevice_event,
+static struct notifier_block dsa_user_nb __read_mostly = {
+ .notifier_call = dsa_user_netdevice_event,
};
-struct notifier_block dsa_slave_switchdev_notifier = {
- .notifier_call = dsa_slave_switchdev_event,
+struct notifier_block dsa_user_switchdev_notifier = {
+ .notifier_call = dsa_user_switchdev_event,
};
-struct notifier_block dsa_slave_switchdev_blocking_notifier = {
- .notifier_call = dsa_slave_switchdev_blocking_event,
+struct notifier_block dsa_user_switchdev_blocking_notifier = {
+ .notifier_call = dsa_user_switchdev_blocking_event,
};
-int dsa_slave_register_notifier(void)
+int dsa_user_register_notifier(void)
{
struct notifier_block *nb;
int err;
- err = register_netdevice_notifier(&dsa_slave_nb);
+ err = register_netdevice_notifier(&dsa_user_nb);
if (err)
return err;
- err = register_switchdev_notifier(&dsa_slave_switchdev_notifier);
+ err = register_switchdev_notifier(&dsa_user_switchdev_notifier);
if (err)
goto err_switchdev_nb;
- nb = &dsa_slave_switchdev_blocking_notifier;
+ nb = &dsa_user_switchdev_blocking_notifier;
err = register_switchdev_blocking_notifier(nb);
if (err)
goto err_switchdev_blocking_nb;
@@ -3694,27 +3681,27 @@ int dsa_slave_register_notifier(void)
return 0;
err_switchdev_blocking_nb:
- unregister_switchdev_notifier(&dsa_slave_switchdev_notifier);
+ unregister_switchdev_notifier(&dsa_user_switchdev_notifier);
err_switchdev_nb:
- unregister_netdevice_notifier(&dsa_slave_nb);
+ unregister_netdevice_notifier(&dsa_user_nb);
return err;
}
-void dsa_slave_unregister_notifier(void)
+void dsa_user_unregister_notifier(void)
{
struct notifier_block *nb;
int err;
- nb = &dsa_slave_switchdev_blocking_notifier;
+ nb = &dsa_user_switchdev_blocking_notifier;
err = unregister_switchdev_blocking_notifier(nb);
if (err)
pr_err("DSA: failed to unregister switchdev blocking notifier (%d)\n", err);
- err = unregister_switchdev_notifier(&dsa_slave_switchdev_notifier);
+ err = unregister_switchdev_notifier(&dsa_user_switchdev_notifier);
if (err)
pr_err("DSA: failed to unregister switchdev notifier (%d)\n", err);
- err = unregister_netdevice_notifier(&dsa_slave_nb);
+ err = unregister_netdevice_notifier(&dsa_user_nb);
if (err)
- pr_err("DSA: failed to unregister slave notifier (%d)\n", err);
+ pr_err("DSA: failed to unregister user notifier (%d)\n", err);
}
diff --git a/net/dsa/user.h b/net/dsa/user.h
new file mode 100644
index 000000000000..996069130bea
--- /dev/null
+++ b/net/dsa/user.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __DSA_USER_H
+#define __DSA_USER_H
+
+#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
+#include <linux/list.h>
+#include <linux/netpoll.h>
+#include <linux/types.h>
+#include <net/dsa.h>
+#include <net/gro_cells.h>
+
+struct net_device;
+struct netlink_ext_ack;
+
+extern struct notifier_block dsa_user_switchdev_notifier;
+extern struct notifier_block dsa_user_switchdev_blocking_notifier;
+
+struct dsa_user_priv {
+ /* Copy of CPU port xmit for faster access in user transmit hot path */
+ struct sk_buff * (*xmit)(struct sk_buff *skb,
+ struct net_device *dev);
+
+ struct gro_cells gcells;
+
+ /* DSA port data, such as switch, port index, etc. */
+ struct dsa_port *dp;
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ struct netpoll *netpoll;
+#endif
+
+ /* TC context */
+ struct list_head mall_tc_list;
+};
+
+void dsa_user_mii_bus_init(struct dsa_switch *ds);
+int dsa_user_create(struct dsa_port *dp);
+void dsa_user_destroy(struct net_device *user_dev);
+int dsa_user_suspend(struct net_device *user_dev);
+int dsa_user_resume(struct net_device *user_dev);
+int dsa_user_register_notifier(void);
+void dsa_user_unregister_notifier(void);
+void dsa_user_sync_ha(struct net_device *dev);
+void dsa_user_unsync_ha(struct net_device *dev);
+void dsa_user_setup_tagger(struct net_device *user);
+int dsa_user_change_mtu(struct net_device *dev, int new_mtu);
+int dsa_user_change_conduit(struct net_device *dev, struct net_device *conduit,
+ struct netlink_ext_ack *extack);
+int dsa_user_manage_vlan_filtering(struct net_device *dev,
+ bool vlan_filtering);
+
+static inline struct dsa_port *dsa_user_to_port(const struct net_device *dev)
+{
+ struct dsa_user_priv *p = netdev_priv(dev);
+
+ return p->dp;
+}
+
+static inline struct net_device *
+dsa_user_to_conduit(const struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_user_to_port(dev);
+
+ return dsa_port_to_conduit(dp);
+}
+
+#endif
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index f5598c5f50de..6b2a360dcdf0 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -589,8 +589,8 @@ err_free_info:
int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max)
{
+ struct ethtool_rxfh_param rxfh = {};
u32 dev_size, current_max = 0;
- u32 *indir;
int ret;
if (!dev->ethtool_ops->get_rxfh_indir_size ||
@@ -600,21 +600,21 @@ int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max)
if (dev_size == 0)
return -EOPNOTSUPP;
- indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
- if (!indir)
+ rxfh.indir = kcalloc(dev_size, sizeof(rxfh.indir[0]), GFP_USER);
+ if (!rxfh.indir)
return -ENOMEM;
- ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL);
+ ret = dev->ethtool_ops->get_rxfh(dev, &rxfh);
if (ret)
goto out;
while (dev_size--)
- current_max = max(current_max, indir[dev_size]);
+ current_max = max(current_max, rxfh.indir[dev_size]);
*max = current_max;
out:
- kfree(indir);
+ kfree(rxfh.indir);
return ret;
}
@@ -661,6 +661,12 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
}
EXPORT_SYMBOL(ethtool_get_phc_vclocks);
+int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info)
+{
+ return __ethtool_get_ts_info(dev, info);
+}
+EXPORT_SYMBOL(ethtool_get_ts_info_by_layer);
+
const struct ethtool_phy_ops *ethtool_phy_ops;
void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops)
@@ -685,3 +691,24 @@ ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings,
link_ksettings->base.duplex = link_info->duplex;
}
EXPORT_SYMBOL_GPL(ethtool_params_from_link_mode);
+
+/**
+ * ethtool_forced_speed_maps_init
+ * @maps: Pointer to an array of Ethtool forced speed map
+ * @size: Array size
+ *
+ * Initialize an array of Ethtool forced speed map to Ethtool link modes. This
+ * should be called during driver module init.
+ */
+void
+ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size)
+{
+ for (u32 i = 0; i < size; i++) {
+ struct ethtool_forced_speed_map *map = &maps[i];
+
+ linkmode_set_bit_array(map->cap_arr, map->arr_size, map->caps);
+ map->cap_arr = NULL;
+ map->arr_size = 0;
+ }
+}
+EXPORT_SYMBOL_GPL(ethtool_forced_speed_maps_init);
diff --git a/net/ethtool/features.c b/net/ethtool/features.c
index a79af8c25a07..b6cb101d7f19 100644
--- a/net/ethtool/features.c
+++ b/net/ethtool/features.c
@@ -234,17 +234,20 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info)
dev = req_info.dev;
rtnl_lock();
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out_rtnl;
ethnl_features_to_bitmap(old_active, dev->features);
ethnl_features_to_bitmap(old_wanted, dev->wanted_features);
ret = ethnl_parse_bitset(req_wanted, req_mask, NETDEV_FEATURE_COUNT,
tb[ETHTOOL_A_FEATURES_WANTED],
netdev_features_strings, info->extack);
if (ret < 0)
- goto out_rtnl;
+ goto out_ops;
if (ethnl_bitmap_to_features(req_mask) & ~NETIF_F_ETHTOOL_BITS) {
GENL_SET_ERR_MSG(info, "attempt to change non-ethtool features");
ret = -EINVAL;
- goto out_rtnl;
+ goto out_ops;
}
/* set req_wanted bits not in req_mask from old_wanted */
@@ -281,6 +284,8 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info)
if (mod)
netdev_features_change(dev);
+out_ops:
+ ethnl_ops_complete(dev);
out_rtnl:
rtnl_unlock();
ethnl_parse_header_dev_put(&req_info);
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 0b0ce4f81c01..7519b0818b91 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -58,6 +58,9 @@ static struct devlink *netdev_to_devlink_get(struct net_device *dev)
u32 ethtool_op_get_link(struct net_device *dev)
{
+ /* Synchronize carrier state with link watch, see also rtnl_getlink() */
+ linkwatch_sync_dev(dev);
+
return netif_carrier_ok(dev) ? 1 : 0;
}
EXPORT_SYMBOL(ethtool_op_get_link);
@@ -969,18 +972,38 @@ static int ethtool_rxnfc_copy_to_user(void __user *useraddr,
static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
u32 cmd, void __user *useraddr)
{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
struct ethtool_rxnfc info;
size_t info_size = sizeof(info);
int rc;
- if (!dev->ethtool_ops->set_rxnfc)
+ if (!ops->set_rxnfc)
return -EOPNOTSUPP;
rc = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr);
if (rc)
return rc;
- rc = dev->ethtool_ops->set_rxnfc(dev, &info);
+ if (ops->get_rxfh) {
+ struct ethtool_rxfh_param rxfh = {};
+
+ rc = ops->get_rxfh(dev, &rxfh);
+ if (rc)
+ return rc;
+
+ /* Sanity check: if symmetric-xor is set, then:
+ * 1 - no other fields besides IP src/dst and/or L4 src/dst
+ * 2 - If src is set, dst must also be set
+ */
+ if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) &&
+ ((info.data & ~(RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3)) ||
+ (!!(info.data & RXH_IP_SRC) ^ !!(info.data & RXH_IP_DST)) ||
+ (!!(info.data & RXH_L4_B_0_1) ^ !!(info.data & RXH_L4_B_2_3))))
+ return -EINVAL;
+ }
+
+ rc = ops->set_rxnfc(dev, &info);
if (rc)
return rc;
@@ -1058,15 +1081,15 @@ EXPORT_SYMBOL(netdev_rss_key_fill);
static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
void __user *useraddr)
{
- u32 user_size, dev_size;
- u32 *indir;
+ struct ethtool_rxfh_param rxfh = {};
+ u32 user_size;
int ret;
if (!dev->ethtool_ops->get_rxfh_indir_size ||
!dev->ethtool_ops->get_rxfh)
return -EOPNOTSUPP;
- dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
- if (dev_size == 0)
+ rxfh.indir_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+ if (rxfh.indir_size == 0)
return -EOPNOTSUPP;
if (copy_from_user(&user_size,
@@ -1075,41 +1098,41 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
return -EFAULT;
if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, size),
- &dev_size, sizeof(dev_size)))
+ &rxfh.indir_size, sizeof(rxfh.indir_size)))
return -EFAULT;
/* If the user buffer size is 0, this is just a query for the
* device table size. Otherwise, if it's smaller than the
* device table size it's an error.
*/
- if (user_size < dev_size)
+ if (user_size < rxfh.indir_size)
return user_size == 0 ? 0 : -EINVAL;
- indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
- if (!indir)
+ rxfh.indir = kcalloc(rxfh.indir_size, sizeof(rxfh.indir[0]), GFP_USER);
+ if (!rxfh.indir)
return -ENOMEM;
- ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL);
+ ret = dev->ethtool_ops->get_rxfh(dev, &rxfh);
if (ret)
goto out;
-
if (copy_to_user(useraddr +
offsetof(struct ethtool_rxfh_indir, ring_index[0]),
- indir, dev_size * sizeof(indir[0])))
+ rxfh.indir, rxfh.indir_size * sizeof(*rxfh.indir)))
ret = -EFAULT;
out:
- kfree(indir);
+ kfree(rxfh.indir);
return ret;
}
static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
void __user *useraddr)
{
- struct ethtool_rxnfc rx_rings;
- u32 user_size, dev_size, i;
- u32 *indir;
const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_rxfh_param rxfh_dev = {};
+ struct netlink_ext_ack *extack = NULL;
+ struct ethtool_rxnfc rx_rings;
+ u32 user_size, i;
int ret;
u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]);
@@ -1117,8 +1140,8 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
!ops->get_rxnfc)
return -EOPNOTSUPP;
- dev_size = ops->get_rxfh_indir_size(dev);
- if (dev_size == 0)
+ rxfh_dev.indir_size = ops->get_rxfh_indir_size(dev);
+ if (rxfh_dev.indir_size == 0)
return -EOPNOTSUPP;
if (copy_from_user(&user_size,
@@ -1126,11 +1149,12 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
sizeof(user_size)))
return -EFAULT;
- if (user_size != 0 && user_size != dev_size)
+ if (user_size != 0 && user_size != rxfh_dev.indir_size)
return -EINVAL;
- indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
- if (!indir)
+ rxfh_dev.indir = kcalloc(rxfh_dev.indir_size,
+ sizeof(rxfh_dev.indir[0]), GFP_USER);
+ if (!rxfh_dev.indir)
return -ENOMEM;
rx_rings.cmd = ETHTOOL_GRXRINGS;
@@ -1139,18 +1163,21 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
goto out;
if (user_size == 0) {
- for (i = 0; i < dev_size; i++)
+ u32 *indir = rxfh_dev.indir;
+
+ for (i = 0; i < rxfh_dev.indir_size; i++)
indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
} else {
- ret = ethtool_copy_validate_indir(indir,
+ ret = ethtool_copy_validate_indir(rxfh_dev.indir,
useraddr + ringidx_offset,
&rx_rings,
- dev_size);
+ rxfh_dev.indir_size);
if (ret)
goto out;
}
- ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE);
+ rxfh_dev.hfunc = ETH_RSS_HASH_NO_CHANGE;
+ ret = ops->set_rxfh(dev, &rxfh_dev, extack);
if (ret)
goto out;
@@ -1161,32 +1188,29 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
dev->priv_flags |= IFF_RXFH_CONFIGURED;
out:
- kfree(indir);
+ kfree(rxfh_dev.indir);
return ret;
}
static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
void __user *useraddr)
{
- int ret;
const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_rxfh_param rxfh_dev = {};
u32 user_indir_size, user_key_size;
- u32 dev_indir_size = 0, dev_key_size = 0;
struct ethtool_rxfh rxfh;
- u32 total_size;
u32 indir_bytes;
- u32 *indir = NULL;
- u8 dev_hfunc = 0;
- u8 *hkey = NULL;
u8 *rss_config;
+ u32 total_size;
+ int ret;
if (!ops->get_rxfh)
return -EOPNOTSUPP;
if (ops->get_rxfh_indir_size)
- dev_indir_size = ops->get_rxfh_indir_size(dev);
+ rxfh_dev.indir_size = ops->get_rxfh_indir_size(dev);
if (ops->get_rxfh_key_size)
- dev_key_size = ops->get_rxfh_key_size(dev);
+ rxfh_dev.key_size = ops->get_rxfh_key_size(dev);
if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
return -EFAULT;
@@ -1194,44 +1218,46 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
user_key_size = rxfh.key_size;
/* Check that reserved fields are 0 for now */
- if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
+ if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32)
return -EINVAL;
/* Most drivers don't handle rss_context, check it's 0 as well */
- if (rxfh.rss_context && !ops->get_rxfh_context)
+ if (rxfh.rss_context && !ops->cap_rss_ctx_supported)
return -EOPNOTSUPP;
- rxfh.indir_size = dev_indir_size;
- rxfh.key_size = dev_key_size;
+ rxfh.indir_size = rxfh_dev.indir_size;
+ rxfh.key_size = rxfh_dev.key_size;
if (copy_to_user(useraddr, &rxfh, sizeof(rxfh)))
return -EFAULT;
- if ((user_indir_size && (user_indir_size != dev_indir_size)) ||
- (user_key_size && (user_key_size != dev_key_size)))
+ if ((user_indir_size && user_indir_size != rxfh_dev.indir_size) ||
+ (user_key_size && user_key_size != rxfh_dev.key_size))
return -EINVAL;
- indir_bytes = user_indir_size * sizeof(indir[0]);
+ indir_bytes = user_indir_size * sizeof(rxfh_dev.indir[0]);
total_size = indir_bytes + user_key_size;
rss_config = kzalloc(total_size, GFP_USER);
if (!rss_config)
return -ENOMEM;
if (user_indir_size)
- indir = (u32 *)rss_config;
+ rxfh_dev.indir = (u32 *)rss_config;
if (user_key_size)
- hkey = rss_config + indir_bytes;
+ rxfh_dev.key = rss_config + indir_bytes;
- if (rxfh.rss_context)
- ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey,
- &dev_hfunc,
- rxfh.rss_context);
- else
- ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
+ rxfh_dev.rss_context = rxfh.rss_context;
+
+ ret = dev->ethtool_ops->get_rxfh(dev, &rxfh_dev);
if (ret)
goto out;
if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc),
- &dev_hfunc, sizeof(rxfh.hfunc))) {
+ &rxfh_dev.hfunc, sizeof(rxfh.hfunc))) {
+ ret = -EFAULT;
+ } else if (copy_to_user(useraddr +
+ offsetof(struct ethtool_rxfh, input_xfrm),
+ &rxfh_dev.input_xfrm,
+ sizeof(rxfh.input_xfrm))) {
ret = -EFAULT;
} else if (copy_to_user(useraddr +
offsetof(struct ethtool_rxfh, rss_config[0]),
@@ -1247,16 +1273,16 @@ out:
static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
void __user *useraddr)
{
- int ret;
+ u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
const struct ethtool_ops *ops = dev->ethtool_ops;
+ u32 dev_indir_size = 0, dev_key_size = 0, i;
+ struct ethtool_rxfh_param rxfh_dev = {};
+ struct netlink_ext_ack *extack = NULL;
struct ethtool_rxnfc rx_rings;
struct ethtool_rxfh rxfh;
- u32 dev_indir_size = 0, dev_key_size = 0, i;
- u32 *indir = NULL, indir_bytes = 0;
- u8 *hkey = NULL;
+ u32 indir_bytes = 0;
u8 *rss_config;
- u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
- bool delete = false;
+ int ret;
if (!ops->get_rxnfc || !ops->set_rxfh)
return -EOPNOTSUPP;
@@ -1270,25 +1296,34 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
return -EFAULT;
/* Check that reserved fields are 0 for now */
- if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
+ if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32)
return -EINVAL;
/* Most drivers don't handle rss_context, check it's 0 as well */
- if (rxfh.rss_context && !ops->set_rxfh_context)
+ if (rxfh.rss_context && !ops->cap_rss_ctx_supported)
+ return -EOPNOTSUPP;
+ /* Check input data transformation capabilities */
+ if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR &&
+ rxfh.input_xfrm != RXH_XFRM_NO_CHANGE)
+ return -EINVAL;
+ if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) &&
+ !ops->cap_rss_sym_xor_supported)
return -EOPNOTSUPP;
/* If either indir, hash key or function is valid, proceed further.
- * Must request at least one change: indir size, hash key or function.
+ * Must request at least one change: indir size, hash key, function
+ * or input transformation.
*/
if ((rxfh.indir_size &&
rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
rxfh.indir_size != dev_indir_size) ||
(rxfh.key_size && (rxfh.key_size != dev_key_size)) ||
(rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE &&
- rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE))
+ rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE &&
+ rxfh.input_xfrm == RXH_XFRM_NO_CHANGE))
return -EINVAL;
if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
- indir_bytes = dev_indir_size * sizeof(indir[0]);
+ indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]);
rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER);
if (!rss_config)
@@ -1305,8 +1340,9 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
*/
if (rxfh.indir_size &&
rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) {
- indir = (u32 *)rss_config;
- ret = ethtool_copy_validate_indir(indir,
+ rxfh_dev.indir = (u32 *)rss_config;
+ rxfh_dev.indir_size = dev_indir_size;
+ ret = ethtool_copy_validate_indir(rxfh_dev.indir,
useraddr + rss_cfg_offset,
&rx_rings,
rxfh.indir_size);
@@ -1314,17 +1350,22 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
goto out;
} else if (rxfh.indir_size == 0) {
if (rxfh.rss_context == 0) {
- indir = (u32 *)rss_config;
+ u32 *indir;
+
+ rxfh_dev.indir = (u32 *)rss_config;
+ rxfh_dev.indir_size = dev_indir_size;
+ indir = rxfh_dev.indir;
for (i = 0; i < dev_indir_size; i++)
indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
} else {
- delete = true;
+ rxfh_dev.rss_delete = true;
}
}
if (rxfh.key_size) {
- hkey = rss_config + indir_bytes;
- if (copy_from_user(hkey,
+ rxfh_dev.key_size = dev_key_size;
+ rxfh_dev.key = rss_config + indir_bytes;
+ if (copy_from_user(rxfh_dev.key,
useraddr + rss_cfg_offset + indir_bytes,
rxfh.key_size)) {
ret = -EFAULT;
@@ -1332,19 +1373,19 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
}
}
- if (rxfh.rss_context)
- ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc,
- &rxfh.rss_context, delete);
- else
- ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
+ rxfh_dev.hfunc = rxfh.hfunc;
+ rxfh_dev.rss_context = rxfh.rss_context;
+ rxfh_dev.input_xfrm = rxfh.input_xfrm;
+
+ ret = ops->set_rxfh(dev, &rxfh_dev, extack);
if (ret)
goto out;
if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context),
- &rxfh.rss_context, sizeof(rxfh.rss_context)))
+ &rxfh_dev.rss_context, sizeof(rxfh_dev.rss_context)))
ret = -EFAULT;
- if (!rxfh.rss_context) {
+ if (!rxfh_dev.rss_context) {
/* indicate whether rxfh was set to default */
if (rxfh.indir_size == 0)
dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
@@ -1991,6 +2032,13 @@ __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...)
}
EXPORT_SYMBOL(ethtool_sprintf);
+void ethtool_puts(u8 **data, const char *str)
+{
+ strscpy(*data, str, ETH_GSTRING_LEN);
+ *data += ETH_GSTRING_LEN;
+}
+EXPORT_SYMBOL(ethtool_puts);
+
static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
{
struct ethtool_value id;
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 3bbd5afb7b31..fe3553f60bf3 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -505,6 +505,7 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
ret = skb->len;
break;
}
+ ret = 0;
}
rtnl_unlock();
diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c
index b238a1afe9ae..b1e2e3b5027f 100644
--- a/net/ethtool/plca.c
+++ b/net/ethtool/plca.c
@@ -21,16 +21,6 @@ struct plca_reply_data {
#define PLCA_REPDATA(__reply_base) \
container_of(__reply_base, struct plca_reply_data, base)
-static void plca_update_sint(int *dst, const struct nlattr *attr,
- bool *mod)
-{
- if (!attr)
- return;
-
- *dst = nla_get_u32(attr);
- *mod = true;
-}
-
// PLCA get configuration message ------------------------------------------- //
const struct nla_policy ethnl_plca_get_cfg_policy[] = {
@@ -38,6 +28,29 @@ const struct nla_policy ethnl_plca_get_cfg_policy[] = {
NLA_POLICY_NESTED(ethnl_header_policy),
};
+static void plca_update_sint(int *dst, struct nlattr **tb, u32 attrid,
+ bool *mod)
+{
+ const struct nlattr *attr = tb[attrid];
+
+ if (!attr ||
+ WARN_ON_ONCE(attrid >= ARRAY_SIZE(ethnl_plca_set_cfg_policy)))
+ return;
+
+ switch (ethnl_plca_set_cfg_policy[attrid].type) {
+ case NLA_U8:
+ *dst = nla_get_u8(attr);
+ break;
+ case NLA_U32:
+ *dst = nla_get_u32(attr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+
+ *mod = true;
+}
+
static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
const struct genl_info *info)
@@ -144,13 +157,13 @@ ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info)
return -EOPNOTSUPP;
memset(&plca_cfg, 0xff, sizeof(plca_cfg));
- plca_update_sint(&plca_cfg.enabled, tb[ETHTOOL_A_PLCA_ENABLED], &mod);
- plca_update_sint(&plca_cfg.node_id, tb[ETHTOOL_A_PLCA_NODE_ID], &mod);
- plca_update_sint(&plca_cfg.node_cnt, tb[ETHTOOL_A_PLCA_NODE_CNT], &mod);
- plca_update_sint(&plca_cfg.to_tmr, tb[ETHTOOL_A_PLCA_TO_TMR], &mod);
- plca_update_sint(&plca_cfg.burst_cnt, tb[ETHTOOL_A_PLCA_BURST_CNT],
+ plca_update_sint(&plca_cfg.enabled, tb, ETHTOOL_A_PLCA_ENABLED, &mod);
+ plca_update_sint(&plca_cfg.node_id, tb, ETHTOOL_A_PLCA_NODE_ID, &mod);
+ plca_update_sint(&plca_cfg.node_cnt, tb, ETHTOOL_A_PLCA_NODE_CNT, &mod);
+ plca_update_sint(&plca_cfg.to_tmr, tb, ETHTOOL_A_PLCA_TO_TMR, &mod);
+ plca_update_sint(&plca_cfg.burst_cnt, tb, ETHTOOL_A_PLCA_BURST_CNT,
&mod);
- plca_update_sint(&plca_cfg.burst_tmr, tb[ETHTOOL_A_PLCA_BURST_TMR],
+ plca_update_sint(&plca_cfg.burst_tmr, tb, ETHTOOL_A_PLCA_BURST_TMR,
&mod);
if (!mod)
return 0;
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index fb09f774ea01..b7865a14fdf8 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -124,6 +124,8 @@ const struct nla_policy ethnl_rings_set_policy[] = {
[ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_U32 },
[ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 },
[ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1),
+ [ETHTOOL_A_RINGS_TCP_DATA_SPLIT] =
+ NLA_POLICY_MAX(NLA_U8, ETHTOOL_TCP_DATA_SPLIT_ENABLED),
[ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1),
[ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1),
[ETHTOOL_A_RINGS_RX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1),
@@ -145,6 +147,14 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info,
return -EOPNOTSUPP;
}
+ if (tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT] &&
+ !(ops->supported_ring_params & ETHTOOL_RING_USE_TCP_DATA_SPLIT)) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT],
+ "setting TCP data split is not supported");
+ return -EOPNOTSUPP;
+ }
+
if (tb[ETHTOOL_A_RINGS_CQE_SIZE] &&
!(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) {
NL_SET_ERR_MSG_ATTR(info->extack,
@@ -202,6 +212,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info)
ethnl_update_u32(&ringparam.tx_pending, tb[ETHTOOL_A_RINGS_TX], &mod);
ethnl_update_u32(&kernel_ringparam.rx_buf_len,
tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod);
+ ethnl_update_u8(&kernel_ringparam.tcp_data_split,
+ tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT], &mod);
ethnl_update_u32(&kernel_ringparam.cqe_size,
tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod);
ethnl_update_u8(&kernel_ringparam.tx_push,
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index 5764202e6cb6..71679137eff2 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -13,6 +13,7 @@ struct rss_reply_data {
u32 indir_size;
u32 hkey_size;
u32 hfunc;
+ u32 input_xfrm;
u32 *indir_table;
u8 *hkey;
};
@@ -48,9 +49,9 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
struct rss_reply_data *data = RSS_REPDATA(reply_base);
struct rss_req_info *request = RSS_REQINFO(req_base);
struct net_device *dev = reply_base->dev;
+ struct ethtool_rxfh_param rxfh = {};
const struct ethtool_ops *ops;
u32 total_size, indir_bytes;
- u8 dev_hfunc = 0;
u8 *rss_config;
int ret;
@@ -59,7 +60,7 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
return -EOPNOTSUPP;
/* Some drivers don't handle rss_context */
- if (request->rss_context && !ops->get_rxfh_context)
+ if (request->rss_context && !ops->cap_rss_ctx_supported)
return -EOPNOTSUPP;
ret = ethnl_ops_begin(dev);
@@ -83,21 +84,21 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
if (data->indir_size)
data->indir_table = (u32 *)rss_config;
-
if (data->hkey_size)
data->hkey = rss_config + indir_bytes;
- if (request->rss_context)
- ret = ops->get_rxfh_context(dev, data->indir_table, data->hkey,
- &dev_hfunc, request->rss_context);
- else
- ret = ops->get_rxfh(dev, data->indir_table, data->hkey,
- &dev_hfunc);
+ rxfh.indir_size = data->indir_size;
+ rxfh.indir = data->indir_table;
+ rxfh.key_size = data->hkey_size;
+ rxfh.key = data->hkey;
+ rxfh.rss_context = request->rss_context;
+ ret = ops->get_rxfh(dev, &rxfh);
if (ret)
goto out_ops;
- data->hfunc = dev_hfunc;
+ data->hfunc = rxfh.hfunc;
+ data->input_xfrm = rxfh.input_xfrm;
out_ops:
ethnl_ops_complete(dev);
return ret;
@@ -111,6 +112,7 @@ rss_reply_size(const struct ethnl_req_info *req_base,
int len;
len = nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */
+ nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */
nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */
nla_total_size(data->hkey_size); /* _RSS_HKEY */
@@ -125,6 +127,8 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base,
if ((data->hfunc &&
nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) ||
+ (data->input_xfrm &&
+ nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) ||
(data->indir_size &&
nla_put(skb, ETHTOOL_A_RSS_INDIR,
sizeof(u32) * data->indir_size, data->indir_table)) ||
diff --git a/net/handshake/genl.c b/net/handshake/genl.c
index 233be5cbfec9..f55d14d7b726 100644
--- a/net/handshake/genl.c
+++ b/net/handshake/genl.c
@@ -18,7 +18,7 @@ static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HAN
/* HANDSHAKE_CMD_DONE - do */
static const struct nla_policy handshake_done_nl_policy[HANDSHAKE_A_DONE_REMOTE_AUTH + 1] = {
[HANDSHAKE_A_DONE_STATUS] = { .type = NLA_U32, },
- [HANDSHAKE_A_DONE_SOCKFD] = { .type = NLA_U32, },
+ [HANDSHAKE_A_DONE_SOCKFD] = { .type = NLA_S32, },
[HANDSHAKE_A_DONE_REMOTE_AUTH] = { .type = NLA_U32, },
};
diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c
index 6d37bab35c8f..16ed7bfd29e4 100644
--- a/net/handshake/handshake-test.c
+++ b/net/handshake/handshake-test.c
@@ -235,7 +235,7 @@ static void handshake_req_submit_test4(struct kunit *test)
KUNIT_EXPECT_PTR_EQ(test, req, result);
handshake_req_cancel(sock->sk);
- sock_release(sock);
+ fput(filp);
}
static void handshake_req_submit_test5(struct kunit *test)
@@ -272,7 +272,7 @@ static void handshake_req_submit_test5(struct kunit *test)
/* Assert */
KUNIT_EXPECT_EQ(test, err, -EAGAIN);
- sock_release(sock);
+ fput(filp);
hn->hn_pending = saved;
}
@@ -306,7 +306,7 @@ static void handshake_req_submit_test6(struct kunit *test)
KUNIT_EXPECT_EQ(test, err, -EBUSY);
handshake_req_cancel(sock->sk);
- sock_release(sock);
+ fput(filp);
}
static void handshake_req_cancel_test1(struct kunit *test)
@@ -340,7 +340,7 @@ static void handshake_req_cancel_test1(struct kunit *test)
/* Assert */
KUNIT_EXPECT_TRUE(test, result);
- sock_release(sock);
+ fput(filp);
}
static void handshake_req_cancel_test2(struct kunit *test)
@@ -382,7 +382,7 @@ static void handshake_req_cancel_test2(struct kunit *test)
/* Assert */
KUNIT_EXPECT_TRUE(test, result);
- sock_release(sock);
+ fput(filp);
}
static void handshake_req_cancel_test3(struct kunit *test)
@@ -427,7 +427,7 @@ static void handshake_req_cancel_test3(struct kunit *test)
/* Assert */
KUNIT_EXPECT_FALSE(test, result);
- sock_release(sock);
+ fput(filp);
}
static struct handshake_req *handshake_req_destroy_test;
@@ -471,7 +471,7 @@ static void handshake_req_destroy_test1(struct kunit *test)
handshake_req_cancel(sock->sk);
/* Act */
- sock_release(sock);
+ fput(filp);
/* Assert */
KUNIT_EXPECT_PTR_EQ(test, handshake_req_destroy_test, req);
diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c
index 1086653e1fad..89637e732866 100644
--- a/net/handshake/netlink.c
+++ b/net/handshake/netlink.c
@@ -87,29 +87,6 @@ struct nlmsghdr *handshake_genl_put(struct sk_buff *msg,
}
EXPORT_SYMBOL(handshake_genl_put);
-/*
- * dup() a kernel socket for use as a user space file descriptor
- * in the current process. The kernel socket must have an
- * instatiated struct file.
- *
- * Implicit argument: "current()"
- */
-static int handshake_dup(struct socket *sock)
-{
- struct file *file;
- int newfd;
-
- file = get_file(sock->file);
- newfd = get_unused_fd_flags(O_CLOEXEC);
- if (newfd < 0) {
- fput(file);
- return newfd;
- }
-
- fd_install(newfd, file);
- return newfd;
-}
-
int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
@@ -133,17 +110,20 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info)
goto out_status;
sock = req->hr_sk->sk_socket;
- fd = handshake_dup(sock);
+ fd = get_unused_fd_flags(O_CLOEXEC);
if (fd < 0) {
err = fd;
goto out_complete;
}
+
err = req->hr_proto->hp_accept(req, info, fd);
if (err) {
- fput(sock->file);
+ put_unused_fd(fd);
goto out_complete;
}
+ fd_install(fd, get_file(sock->file));
+
trace_handshake_cmd_accept(net, req, req->hr_sk, fd);
return 0;
@@ -157,26 +137,24 @@ out_status:
int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
- struct handshake_req *req = NULL;
- struct socket *sock = NULL;
+ struct handshake_req *req;
+ struct socket *sock;
int fd, status, err;
if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD))
return -EINVAL;
- fd = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]);
+ fd = nla_get_s32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]);
- err = 0;
sock = sockfd_lookup(fd, &err);
- if (err) {
- err = -EBADF;
- goto out_status;
- }
+ if (!sock)
+ return err;
req = handshake_req_hash_lookup(sock->sk);
if (!req) {
err = -EBUSY;
+ trace_handshake_cmd_done_err(net, req, sock->sk, err);
fput(sock->file);
- goto out_status;
+ return err;
}
trace_handshake_cmd_done(net, req, sock->sk, fd);
@@ -188,10 +166,6 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
handshake_complete(req, status, info);
fput(sock->file);
return 0;
-
-out_status:
- trace_handshake_cmd_done_err(net, req, sock->sk, err);
- return err;
}
static unsigned int handshake_net_id;
diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c
index bbfb4095ddd6..d697f68c598c 100644
--- a/net/handshake/tlshd.c
+++ b/net/handshake/tlshd.c
@@ -173,9 +173,9 @@ static int tls_handshake_put_certificate(struct sk_buff *msg,
if (!entry_attr)
return -EMSGSIZE;
- if (nla_put_u32(msg, HANDSHAKE_A_X509_CERT,
+ if (nla_put_s32(msg, HANDSHAKE_A_X509_CERT,
treq->th_certificate) ||
- nla_put_u32(msg, HANDSHAKE_A_X509_PRIVKEY,
+ nla_put_s32(msg, HANDSHAKE_A_X509_PRIVKEY,
treq->th_privkey)) {
nla_nest_cancel(msg, entry_attr);
return -EMSGSIZE;
@@ -214,7 +214,7 @@ static int tls_handshake_accept(struct handshake_req *req,
goto out_cancel;
ret = -EMSGSIZE;
- ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_SOCKFD, fd);
+ ret = nla_put_s32(msg, HANDSHAKE_A_ACCEPT_SOCKFD, fd);
if (ret < 0)
goto out_cancel;
ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_MESSAGE_TYPE, treq->th_type);
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 306f942c3b28..9d71b66183da 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -173,7 +173,24 @@ static int hsr_dev_open(struct net_device *dev)
static int hsr_dev_close(struct net_device *dev)
{
- /* Nothing to do here. */
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER)
+ continue;
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ case HSR_PT_SLAVE_B:
+ dev_uc_unsync(port->dev, dev);
+ dev_mc_unsync(port->dev, dev);
+ break;
+ default:
+ break;
+ }
+ }
+
return 0;
}
@@ -291,7 +308,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
skb = hsr_init_skb(master);
if (!skb) {
- WARN_ONCE(1, "HSR: Could not send supervision frame\n");
+ netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n");
return;
}
@@ -338,7 +355,7 @@ static void send_prp_supervision_frame(struct hsr_port *master,
skb = hsr_init_skb(master);
if (!skb) {
- WARN_ONCE(1, "PRP: Could not send supervision frame\n");
+ netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n");
return;
}
@@ -404,12 +421,60 @@ void hsr_del_ports(struct hsr_priv *hsr)
hsr_del_port(port);
}
+static void hsr_set_rx_mode(struct net_device *dev)
+{
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER)
+ continue;
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ case HSR_PT_SLAVE_B:
+ dev_mc_sync_multiple(port->dev, dev);
+ dev_uc_sync_multiple(port->dev, dev);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void hsr_change_rx_flags(struct net_device *dev, int change)
+{
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER)
+ continue;
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ case HSR_PT_SLAVE_B:
+ if (change & IFF_ALLMULTI)
+ dev_set_allmulti(port->dev,
+ dev->flags &
+ IFF_ALLMULTI ? 1 : -1);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
static const struct net_device_ops hsr_device_ops = {
.ndo_change_mtu = hsr_dev_change_mtu,
.ndo_open = hsr_dev_open,
.ndo_stop = hsr_dev_close,
.ndo_start_xmit = hsr_dev_xmit,
+ .ndo_change_rx_flags = hsr_change_rx_flags,
.ndo_fix_features = hsr_fix_features,
+ .ndo_set_rx_mode = hsr_set_rx_mode,
};
static struct device_type hsr_type = {
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 629daacc9607..80cdc6f6b34c 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -342,9 +342,7 @@ struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame,
skb = skb_copy_expand(frame->skb_std, 0,
skb_tailroom(frame->skb_std) + HSR_HLEN,
GFP_ATOMIC);
- prp_fill_rct(skb, frame, port);
-
- return skb;
+ return prp_fill_rct(skb, frame, port);
}
static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev,
@@ -594,6 +592,7 @@ static int fill_frame_info(struct hsr_frame_info *frame,
proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto;
/* FIXME: */
netdev_warn_once(skb->dev, "VLAN not yet supported");
+ return -EINVAL;
}
frame->is_from_san = false;
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index b77f1189d19d..6d14d935ee82 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -288,13 +288,13 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
/* And leave the HSR tag. */
if (ethhdr->h_proto == htons(ETH_P_HSR)) {
- pull_size = sizeof(struct ethhdr);
+ pull_size = sizeof(struct hsr_tag);
skb_pull(skb, pull_size);
total_pull_size += pull_size;
}
/* And leave the HSR sup tag. */
- pull_size = sizeof(struct hsr_tag);
+ pull_size = sizeof(struct hsr_sup_tag);
skb_pull(skb, pull_size);
total_pull_size += pull_size;
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index b099c3150150..cb83c8feb746 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -167,4 +167,5 @@ static void __exit hsr_exit(void)
module_init(hsr_init);
module_exit(hsr_exit);
+MODULE_DESCRIPTION("High-availability Seamless Redundancy (HSR) driver");
MODULE_LICENSE("GPL");
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 6851e33df7d1..18e01791ad79 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -83,7 +83,7 @@ struct hsr_vlan_ethhdr {
struct hsr_sup_tlv {
u8 HSR_TLV_type;
u8 HSR_TLV_length;
-};
+} __packed;
/* HSR/PRP Supervision Frame data types.
* Field names as defined in the IEC:2010 standard for HSR.
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index f05b7bdae2aa..7bce67673e83 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -4,7 +4,7 @@ obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o
obj-y += 6lowpan/
ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \
- header_ops.o sysfs.o nl802154.o trace.o
+ header_ops.o sysfs.o nl802154.o trace.o pan.o
ieee802154_socket-y := socket.o
CFLAGS_trace.o := -I$(src)
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index 57546e07e06a..60e8fff1347e 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -198,6 +198,25 @@ void wpan_phy_free(struct wpan_phy *phy)
}
EXPORT_SYMBOL(wpan_phy_free);
+static void cfg802154_free_peer_structures(struct wpan_dev *wpan_dev)
+{
+ struct ieee802154_pan_device *child, *tmp;
+
+ mutex_lock(&wpan_dev->association_lock);
+
+ kfree(wpan_dev->parent);
+ wpan_dev->parent = NULL;
+
+ list_for_each_entry_safe(child, tmp, &wpan_dev->children, node) {
+ list_del(&child->node);
+ kfree(child);
+ }
+
+ wpan_dev->nchildren = 0;
+
+ mutex_unlock(&wpan_dev->association_lock);
+}
+
int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
struct net *net)
{
@@ -276,6 +295,9 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb,
wpan_dev->identifier = ++rdev->wpan_dev_id;
list_add_rcu(&wpan_dev->list, &rdev->wpan_dev_list);
rdev->devlist_generation++;
+ mutex_init(&wpan_dev->association_lock);
+ INIT_LIST_HEAD(&wpan_dev->children);
+ wpan_dev->max_associations = SZ_16K;
wpan_dev->netdev = dev;
break;
@@ -291,6 +313,8 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb,
rdev->opencount++;
break;
case NETDEV_UNREGISTER:
+ cfg802154_free_peer_structures(wpan_dev);
+
/* It is possible to get NETDEV_UNREGISTER
* multiple times. To detect that, check
* that the interface is still on the list
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 1a265a421308..7eb37de3add2 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -234,6 +234,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
NL802154_SCAN_DONE_REASON_ABORTED),
[NL802154_ATTR_BEACON_INTERVAL] =
NLA_POLICY_MAX(NLA_U8, IEEE802154_ACTIVE_SCAN_DURATION),
+ [NL802154_ATTR_MAX_ASSOCIATIONS] = { .type = NLA_U32 },
+ [NL802154_ATTR_PEER] = { .type = NLA_NESTED },
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
[NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, },
@@ -248,7 +250,6 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
};
-#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
static int
nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
struct netlink_callback *cb,
@@ -307,7 +308,6 @@ nl802154_finish_wpan_dev_dump(struct cfg802154_registered_device *rdev)
{
rtnl_unlock();
}
-#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
/* message building helper */
static inline void *nl802154hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
@@ -1087,15 +1087,14 @@ static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info)
pan_id = nla_get_le16(info->attrs[NL802154_ATTR_PAN_ID]);
- /* TODO
- * I am not sure about to check here on broadcast pan_id.
- * Broadcast is a valid setting, comment from 802.15.4:
- * If this value is 0xffff, the device is not associated.
- *
- * This could useful to simple deassociate an device.
+ /* Only allow changing the PAN ID when the device has no more
+ * associations ongoing to avoid confusing peers.
*/
- if (pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST))
+ if (cfg802154_device_is_associated(wpan_dev)) {
+ NL_SET_ERR_MSG(info->extack,
+ "Existing associations, changing PAN ID forbidden");
return -EINVAL;
+ }
return rdev_set_pan_id(rdev, wpan_dev, pan_id);
}
@@ -1123,20 +1122,17 @@ static int nl802154_set_short_addr(struct sk_buff *skb, struct genl_info *info)
short_addr = nla_get_le16(info->attrs[NL802154_ATTR_SHORT_ADDR]);
- /* TODO
- * I am not sure about to check here on broadcast short_addr.
- * Broadcast is a valid setting, comment from 802.15.4:
- * A value of 0xfffe indicates that the device has
- * associated but has not been allocated an address. A
- * value of 0xffff indicates that the device does not
- * have a short address.
- *
- * I think we should allow to set these settings but
- * don't allow to allow socket communication with it.
+ /* The short address only has a meaning when part of a PAN, after a
+ * proper association procedure. However, we want to still offer the
+ * possibility to create static networks so changing the short address
+ * is only allowed when not already associated to other devices with
+ * the official handshake.
*/
- if (short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC) ||
- short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST))
+ if (cfg802154_device_is_associated(wpan_dev)) {
+ NL_SET_ERR_MSG(info->extack,
+ "Existing associations, changing short address forbidden");
return -EINVAL;
+ }
return rdev_set_short_addr(rdev, wpan_dev, short_addr);
}
@@ -1638,6 +1634,189 @@ nl802154_stop_beacons(struct sk_buff *skb, struct genl_info *info)
return rdev_stop_beacons(rdev, wpan_dev);
}
+static int nl802154_associate(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev;
+ struct wpan_phy *wpan_phy;
+ struct ieee802154_addr coord;
+ int err;
+
+ wpan_dev = dev->ieee802154_ptr;
+ wpan_phy = &rdev->wpan_phy;
+
+ if (wpan_phy->flags & WPAN_PHY_FLAG_DATAGRAMS_ONLY) {
+ NL_SET_ERR_MSG(info->extack, "PHY only supports datagrams");
+ return -EOPNOTSUPP;
+ }
+
+ if (!info->attrs[NL802154_ATTR_PAN_ID] ||
+ !info->attrs[NL802154_ATTR_EXTENDED_ADDR])
+ return -EINVAL;
+
+ coord.pan_id = nla_get_le16(info->attrs[NL802154_ATTR_PAN_ID]);
+ coord.mode = IEEE802154_ADDR_LONG;
+ coord.extended_addr = nla_get_le64(info->attrs[NL802154_ATTR_EXTENDED_ADDR]);
+
+ mutex_lock(&wpan_dev->association_lock);
+ err = rdev_associate(rdev, wpan_dev, &coord);
+ mutex_unlock(&wpan_dev->association_lock);
+ if (err)
+ pr_err("Association with PAN ID 0x%x failed (%d)\n",
+ le16_to_cpu(coord.pan_id), err);
+
+ return err;
+}
+
+static int nl802154_disassociate(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ struct wpan_phy *wpan_phy = &rdev->wpan_phy;
+ struct ieee802154_addr target;
+
+ if (wpan_phy->flags & WPAN_PHY_FLAG_DATAGRAMS_ONLY) {
+ NL_SET_ERR_MSG(info->extack, "PHY only supports datagrams");
+ return -EOPNOTSUPP;
+ }
+
+ target.pan_id = wpan_dev->pan_id;
+
+ if (info->attrs[NL802154_ATTR_EXTENDED_ADDR]) {
+ target.mode = IEEE802154_ADDR_LONG;
+ target.extended_addr = nla_get_le64(info->attrs[NL802154_ATTR_EXTENDED_ADDR]);
+ } else if (info->attrs[NL802154_ATTR_SHORT_ADDR]) {
+ target.mode = IEEE802154_ADDR_SHORT;
+ target.short_addr = nla_get_le16(info->attrs[NL802154_ATTR_SHORT_ADDR]);
+ } else {
+ NL_SET_ERR_MSG(info->extack, "Device address is missing");
+ return -EINVAL;
+ }
+
+ mutex_lock(&wpan_dev->association_lock);
+ rdev_disassociate(rdev, wpan_dev, &target);
+ mutex_unlock(&wpan_dev->association_lock);
+
+ return 0;
+}
+
+static int nl802154_set_max_associations(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ unsigned int max_assoc;
+
+ if (!info->attrs[NL802154_ATTR_MAX_ASSOCIATIONS]) {
+ NL_SET_ERR_MSG(info->extack, "No maximum number of association given");
+ return -EINVAL;
+ }
+
+ max_assoc = nla_get_u32(info->attrs[NL802154_ATTR_MAX_ASSOCIATIONS]);
+
+ mutex_lock(&wpan_dev->association_lock);
+ cfg802154_set_max_associations(wpan_dev, max_assoc);
+ mutex_unlock(&wpan_dev->association_lock);
+
+ return 0;
+}
+
+static int nl802154_send_peer_info(struct sk_buff *msg,
+ struct netlink_callback *cb,
+ u32 seq, int flags,
+ struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_pan_device *peer,
+ enum nl802154_peer_type type)
+{
+ struct nlattr *nla;
+ void *hdr;
+
+ ASSERT_RTNL();
+
+ hdr = nl802154hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags,
+ NL802154_CMD_LIST_ASSOCIATIONS);
+ if (!hdr)
+ return -ENOBUFS;
+
+ genl_dump_check_consistent(cb, hdr);
+
+ nla = nla_nest_start_noflag(msg, NL802154_ATTR_PEER);
+ if (!nla)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL802154_DEV_ADDR_ATTR_PEER_TYPE, type))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL802154_DEV_ADDR_ATTR_MODE, peer->mode))
+ goto nla_put_failure;
+
+ if (nla_put(msg, NL802154_DEV_ADDR_ATTR_SHORT,
+ IEEE802154_SHORT_ADDR_LEN, &peer->short_addr))
+ goto nla_put_failure;
+
+ if (nla_put(msg, NL802154_DEV_ADDR_ATTR_EXTENDED,
+ IEEE802154_EXTENDED_ADDR_LEN, &peer->extended_addr))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nla);
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int nl802154_list_associations(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct cfg802154_registered_device *rdev;
+ struct ieee802154_pan_device *child;
+ struct wpan_dev *wpan_dev;
+ int err;
+
+ err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev);
+ if (err)
+ return err;
+
+ mutex_lock(&wpan_dev->association_lock);
+
+ if (cb->args[2])
+ goto out;
+
+ if (wpan_dev->parent) {
+ err = nl802154_send_peer_info(skb, cb, cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, rdev, wpan_dev,
+ wpan_dev->parent,
+ NL802154_PEER_TYPE_PARENT);
+ if (err < 0)
+ goto out_err;
+ }
+
+ list_for_each_entry(child, &wpan_dev->children, node) {
+ err = nl802154_send_peer_info(skb, cb, cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, rdev, wpan_dev,
+ child,
+ NL802154_PEER_TYPE_CHILD);
+ if (err < 0)
+ goto out_err;
+ }
+
+ cb->args[2] = 1;
+out:
+ err = skb->len;
+out_err:
+ mutex_unlock(&wpan_dev->association_lock);
+
+ nl802154_finish_wpan_dev_dump(rdev);
+
+ return err;
+}
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = {
[NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 },
@@ -2759,6 +2938,34 @@ static const struct genl_ops nl802154_ops[] = {
NL802154_FLAG_CHECK_NETDEV_UP |
NL802154_FLAG_NEED_RTNL,
},
+ {
+ .cmd = NL802154_CMD_ASSOCIATE,
+ .doit = nl802154_associate,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_CHECK_NETDEV_UP |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_DISASSOCIATE,
+ .doit = nl802154_disassociate,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_CHECK_NETDEV_UP |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_SET_MAX_ASSOCIATIONS,
+ .doit = nl802154_set_max_associations,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_LIST_ASSOCIATIONS,
+ .dumpit = nl802154_list_associations,
+ /* can be retrieved by unprivileged users */
+ },
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
{
.cmd = NL802154_CMD_SET_SEC_PARAMS,
diff --git a/net/ieee802154/pan.c b/net/ieee802154/pan.c
new file mode 100644
index 000000000000..249df7364b3e
--- /dev/null
+++ b/net/ieee802154/pan.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * IEEE 802.15.4 PAN management
+ *
+ * Copyright (C) 2023 Qorvo US, Inc
+ * Authors:
+ * - David Girault <david.girault@qorvo.com>
+ * - Miquel Raynal <miquel.raynal@bootlin.com>
+ */
+
+#include <linux/kernel.h>
+#include <net/cfg802154.h>
+#include <net/af_ieee802154.h>
+
+/* Checks whether a device address matches one from the PAN list.
+ * This helper is meant to be used only during PAN management, when we expect
+ * extended addresses to be used.
+ */
+static bool cfg802154_pan_device_is_matching(struct ieee802154_pan_device *pan_dev,
+ struct ieee802154_addr *ext_dev)
+{
+ if (!pan_dev || !ext_dev)
+ return false;
+
+ if (ext_dev->mode == IEEE802154_ADDR_SHORT)
+ return false;
+
+ return pan_dev->extended_addr == ext_dev->extended_addr;
+}
+
+bool cfg802154_device_is_associated(struct wpan_dev *wpan_dev)
+{
+ bool is_assoc;
+
+ mutex_lock(&wpan_dev->association_lock);
+ is_assoc = !list_empty(&wpan_dev->children) || wpan_dev->parent;
+ mutex_unlock(&wpan_dev->association_lock);
+
+ return is_assoc;
+}
+
+bool cfg802154_device_is_parent(struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *target)
+{
+ lockdep_assert_held(&wpan_dev->association_lock);
+
+ return cfg802154_pan_device_is_matching(wpan_dev->parent, target);
+}
+EXPORT_SYMBOL_GPL(cfg802154_device_is_parent);
+
+struct ieee802154_pan_device *
+cfg802154_device_is_child(struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *target)
+{
+ struct ieee802154_pan_device *child;
+
+ lockdep_assert_held(&wpan_dev->association_lock);
+
+ list_for_each_entry(child, &wpan_dev->children, node)
+ if (cfg802154_pan_device_is_matching(child, target))
+ return child;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(cfg802154_device_is_child);
+
+__le16 cfg802154_get_free_short_addr(struct wpan_dev *wpan_dev)
+{
+ struct ieee802154_pan_device *child;
+ __le16 addr;
+
+ lockdep_assert_held(&wpan_dev->association_lock);
+
+ do {
+ get_random_bytes(&addr, 2);
+ if (addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST) ||
+ addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC))
+ continue;
+
+ if (wpan_dev->short_addr == addr)
+ continue;
+
+ if (wpan_dev->parent && wpan_dev->parent->short_addr == addr)
+ continue;
+
+ list_for_each_entry(child, &wpan_dev->children, node)
+ if (child->short_addr == addr)
+ continue;
+
+ break;
+ } while (1);
+
+ return addr;
+}
+EXPORT_SYMBOL_GPL(cfg802154_get_free_short_addr);
+
+unsigned int cfg802154_set_max_associations(struct wpan_dev *wpan_dev,
+ unsigned int max)
+{
+ unsigned int old_max;
+
+ lockdep_assert_held(&wpan_dev->association_lock);
+
+ old_max = wpan_dev->max_associations;
+ wpan_dev->max_associations = max;
+
+ return old_max;
+}
+EXPORT_SYMBOL_GPL(cfg802154_set_max_associations);
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 5eaae15c610e..64071ef6f57b 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -265,6 +265,36 @@ static inline int rdev_stop_beacons(struct cfg802154_registered_device *rdev,
return ret;
}
+static inline int rdev_associate(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *coord)
+{
+ int ret;
+
+ if (!rdev->ops->associate)
+ return -EOPNOTSUPP;
+
+ trace_802154_rdev_associate(&rdev->wpan_phy, wpan_dev, coord);
+ ret = rdev->ops->associate(&rdev->wpan_phy, wpan_dev, coord);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
+}
+
+static inline int rdev_disassociate(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *target)
+{
+ int ret;
+
+ if (!rdev->ops->disassociate)
+ return -EOPNOTSUPP;
+
+ trace_802154_rdev_disassociate(&rdev->wpan_phy, wpan_dev, target);
+ ret = rdev->ops->disassociate(&rdev->wpan_phy, wpan_dev, target);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
+}
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
/* TODO this is already a nl802154, so move into ieee802154 */
static inline void
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index c16db0b326fa..62aa6465253a 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -356,6 +356,44 @@ DEFINE_EVENT(802154_wdev_template, 802154_rdev_stop_beacons,
TP_ARGS(wpan_phy, wpan_dev)
);
+TRACE_EVENT(802154_rdev_associate,
+ TP_PROTO(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *coord),
+ TP_ARGS(wpan_phy, wpan_dev, coord),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ WPAN_DEV_ENTRY
+ __field(__le64, addr)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ WPAN_DEV_ASSIGN;
+ __entry->addr = coord->extended_addr;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", associating with: 0x%llx",
+ WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->addr)
+);
+
+TRACE_EVENT(802154_rdev_disassociate,
+ TP_PROTO(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *target),
+ TP_ARGS(wpan_phy, wpan_dev, target),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ WPAN_DEV_ENTRY
+ __field(__le64, addr)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ WPAN_DEV_ASSIGN;
+ __entry->addr = target->extended_addr;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", disassociating with: 0x%llx",
+ WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->addr)
+);
+
TRACE_EVENT(802154_rdev_return_int,
TP_PROTO(struct wpan_phy *wpan_phy, int ret),
TP_ARGS(wpan_phy, ret),
diff --git a/net/ife/ife.c b/net/ife/ife.c
index 13bbf8cb6a39..be05b690b9ef 100644
--- a/net/ife/ife.c
+++ b/net/ife/ife.c
@@ -82,6 +82,7 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen)
if (unlikely(!pskb_may_pull(skb, total_pull)))
return NULL;
+ ifehdr = (struct ifeheadr *)(skb->data + skb->dev->hard_header_len);
skb_set_mac_header(skb, total_pull);
__skb_pull(skb, total_pull);
*metalen = ifehdrln - IFE_METAHDRLEN;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 2dfb12230f08..8e94ed7c56a0 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -741,10 +741,27 @@ config DEFAULT_TCP_CONG
default "bbr" if DEFAULT_BBR
default "cubic"
+config TCP_SIGPOOL
+ tristate
+
+config TCP_AO
+ bool "TCP: Authentication Option (RFC5925)"
+ select CRYPTO
+ select TCP_SIGPOOL
+ depends on 64BIT && IPV6 != m # seq-number extension needs WRITE_ONCE(u64)
+ help
+ TCP-AO specifies the use of stronger Message Authentication Codes (MACs),
+ protects against replays for long-lived TCP connections, and
+ provides more details on the association of security with TCP
+ connections than TCP MD5 (See RFC5925)
+
+ If unsure, say N.
+
config TCP_MD5SIG
bool "TCP: MD5 Signature Option support (RFC2385)"
select CRYPTO
select CRYPTO_MD5
+ select TCP_SIGPOOL
help
RFC2385 specifies a method of giving MD5 protection to TCP sessions.
Its main (only?) use is to protect BGP sessions between core routers
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index b18ba8ef93ad..ec36d2ec059e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -16,8 +16,6 @@ obj-y := route.o inetpeer.o protocol.o \
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
metrics.o netlink.o nexthop.o udp_tunnel_stub.o
-obj-$(CONFIG_BPFILTER) += bpfilter/
-
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
obj-$(CONFIG_PROC_FS) += proc.o
@@ -62,12 +60,14 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
+obj-$(CONFIG_TCP_SIGPOOL) += tcp_sigpool.o
obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
obj-$(CONFIG_BPF_SYSCALL) += udp_bpf.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o xfrm4_protocol.o
+obj-$(CONFIG_TCP_AO) += tcp_ao.o
ifeq ($(CONFIG_BPF_JIT),y)
obj-$(CONFIG_BPF_SYSCALL) += bpf_tcp_ca.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3d2e30e20473..a5a820ee2026 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -330,6 +330,9 @@ lookup_protocol:
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
+ if (INET_PROTOSW_ICSK & answer_flags)
+ inet_init_csk_locks(sk);
+
inet = inet_sk(sk);
inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
@@ -452,7 +455,7 @@ int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* BPF prog is run before any checks are done so that if the prog
* changes context in a wrong way it will be caught.
*/
- err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+ err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len,
CGROUP_INET4_BIND, &flags);
if (err)
return err;
@@ -597,7 +600,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending += writebias;
- sk->sk_wait_pending++;
/* Basic assumption: if someone sets sk->sk_err, he _must_
* change state of the socket from TCP_SYN_*.
@@ -613,7 +615,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
}
remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending -= writebias;
- sk->sk_wait_pending--;
return timeo;
}
@@ -642,6 +643,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
return -EINVAL;
if (uaddr->sa_family == AF_UNSPEC) {
+ sk->sk_disconnects++;
err = sk->sk_prot->disconnect(sk, flags);
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
goto out;
@@ -696,6 +698,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int writebias = (sk->sk_protocol == IPPROTO_TCP) &&
tcp_sk(sk)->fastopen_req &&
tcp_sk(sk)->fastopen_req->data ? 1 : 0;
+ int dis = sk->sk_disconnects;
/* Error code is set above */
if (!timeo || !inet_wait_for_connect(sk, timeo, writebias))
@@ -704,6 +707,11 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
err = sock_intr_errno(timeo);
if (signal_pending(current))
goto out;
+
+ if (dis != sk->sk_disconnects) {
+ err = -EPIPE;
+ goto out;
+ }
}
/* Connection was closed by RST, timeout, ICMP error
@@ -725,6 +733,7 @@ out:
sock_error:
err = sock_error(sk) ? : -ECONNABORTED;
sock->state = SS_UNCONNECTED;
+ sk->sk_disconnects++;
if (sk->sk_prot->disconnect(sk, flags))
sock->state = SS_DISCONNECTING;
goto out;
@@ -788,6 +797,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr);
+ int sin_addr_len = sizeof(*sin);
sin->sin_family = AF_INET;
lock_sock(sk);
@@ -800,7 +810,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
}
sin->sin_port = inet->inet_dport;
sin->sin_addr.s_addr = inet->inet_daddr;
- BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
CGROUP_INET4_GETPEERNAME);
} else {
__be32 addr = inet->inet_rcv_saddr;
@@ -808,12 +818,12 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
addr = inet->inet_saddr;
sin->sin_port = inet->inet_sport;
sin->sin_addr.s_addr = addr;
- BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
CGROUP_INET4_GETSOCKNAME);
}
release_sock(sk);
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
- return sizeof(*sin);
+ return sin_addr_len;
}
EXPORT_SYMBOL(inet_getname);
@@ -1618,14 +1628,17 @@ EXPORT_SYMBOL(inet_current_timestamp);
int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
- if (sk->sk_family == AF_INET)
+ unsigned int family = READ_ONCE(sk->sk_family);
+
+ if (family == AF_INET)
return ip_recv_error(sk, msg, len, addr_len);
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
+ if (family == AF_INET6)
return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len);
#endif
return -EINVAL;
}
+EXPORT_SYMBOL(inet_recv_error);
int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
@@ -1840,9 +1853,7 @@ static __net_init int inet_init_net(struct net *net)
/*
* Set defaults for local port range
*/
- seqlock_init(&net->ipv4.ip_local_ports.lock);
- net->ipv4.ip_local_ports.range[0] = 32768;
- net->ipv4.ip_local_ports.range[1] = 60999;
+ net->ipv4.ip_local_ports.range = 60999u << 16 | 32768u;
seqlock_init(&net->ipv4.ping_group_range.lock);
/*
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 015c0f4ec5ba..a2e6e1fdf82b 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) "IPsec: " fmt
-#include <crypto/algapi.h>
#include <crypto/hash.h>
+#include <crypto/utils.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -27,9 +27,7 @@ static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags,
{
unsigned int len;
- len = size + crypto_ahash_digestsize(ahash) +
- (crypto_ahash_alignmask(ahash) &
- ~(crypto_tfm_ctx_alignment() - 1));
+ len = size + crypto_ahash_digestsize(ahash);
len = ALIGN(len, crypto_tfm_ctx_alignment());
@@ -46,10 +44,9 @@ static inline u8 *ah_tmp_auth(void *tmp, unsigned int offset)
return tmp + offset;
}
-static inline u8 *ah_tmp_icv(struct crypto_ahash *ahash, void *tmp,
- unsigned int offset)
+static inline u8 *ah_tmp_icv(void *tmp, unsigned int offset)
{
- return PTR_ALIGN((u8 *)tmp + offset, crypto_ahash_alignmask(ahash) + 1);
+ return tmp + offset;
}
static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash,
@@ -129,7 +126,7 @@ static void ah_output_done(void *data, int err)
int ihl = ip_hdrlen(skb);
iph = AH_SKB_CB(skb)->tmp;
- icv = ah_tmp_icv(ahp->ahash, iph, ihl);
+ icv = ah_tmp_icv(iph, ihl);
memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
top_iph->tos = iph->tos;
@@ -182,7 +179,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
if (!iph)
goto out;
seqhi = (__be32 *)((char *)iph + ihl);
- icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
+ icv = ah_tmp_icv(seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
seqhisg = sg + nfrags;
@@ -279,7 +276,7 @@ static void ah_input_done(void *data, int err)
work_iph = AH_SKB_CB(skb)->tmp;
auth_data = ah_tmp_auth(work_iph, ihl);
- icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
+ icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len);
err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0;
if (err)
@@ -374,7 +371,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
seqhi = (__be32 *)((char *)work_iph + ihl);
auth_data = ah_tmp_auth(seqhi, seqhi_len);
- icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
+ icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
seqhisg = sg + nfrags;
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 39dcccf0f174..ae8b15e6896f 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -271,6 +271,74 @@ static int bpf_tcp_ca_validate(void *kdata)
return tcp_validate_congestion_control(kdata);
}
+static u32 bpf_tcp_ca_ssthresh(struct sock *sk)
+{
+ return 0;
+}
+
+static void bpf_tcp_ca_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+{
+}
+
+static void bpf_tcp_ca_set_state(struct sock *sk, u8 new_state)
+{
+}
+
+static void bpf_tcp_ca_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
+{
+}
+
+static void bpf_tcp_ca_in_ack_event(struct sock *sk, u32 flags)
+{
+}
+
+static void bpf_tcp_ca_pkts_acked(struct sock *sk, const struct ack_sample *sample)
+{
+}
+
+static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
+{
+ return 0;
+}
+
+static void bpf_tcp_ca_cong_control(struct sock *sk, const struct rate_sample *rs)
+{
+}
+
+static u32 bpf_tcp_ca_undo_cwnd(struct sock *sk)
+{
+ return 0;
+}
+
+static u32 bpf_tcp_ca_sndbuf_expand(struct sock *sk)
+{
+ return 0;
+}
+
+static void __bpf_tcp_ca_init(struct sock *sk)
+{
+}
+
+static void __bpf_tcp_ca_release(struct sock *sk)
+{
+}
+
+static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = {
+ .ssthresh = bpf_tcp_ca_ssthresh,
+ .cong_avoid = bpf_tcp_ca_cong_avoid,
+ .set_state = bpf_tcp_ca_set_state,
+ .cwnd_event = bpf_tcp_ca_cwnd_event,
+ .in_ack_event = bpf_tcp_ca_in_ack_event,
+ .pkts_acked = bpf_tcp_ca_pkts_acked,
+ .min_tso_segs = bpf_tcp_ca_min_tso_segs,
+ .cong_control = bpf_tcp_ca_cong_control,
+ .undo_cwnd = bpf_tcp_ca_undo_cwnd,
+ .sndbuf_expand = bpf_tcp_ca_sndbuf_expand,
+
+ .init = __bpf_tcp_ca_init,
+ .release = __bpf_tcp_ca_release,
+};
+
struct bpf_struct_ops bpf_tcp_congestion_ops = {
.verifier_ops = &bpf_tcp_ca_verifier_ops,
.reg = bpf_tcp_ca_reg,
@@ -281,6 +349,7 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
.init = bpf_tcp_ca_init,
.validate = bpf_tcp_ca_validate,
.name = "tcp_congestion_ops",
+ .cfi_stubs = &__bpf_ops_tcp_congestion_ops,
};
static int __init bpf_tcp_ca_kfunc_init(void)
diff --git a/net/ipv4/bpfilter/Makefile b/net/ipv4/bpfilter/Makefile
deleted file mode 100644
index 00af5305e05a..000000000000
--- a/net/ipv4/bpfilter/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_BPFILTER) += sockopt.o
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
deleted file mode 100644
index 193bcc2acccc..000000000000
--- a/net/ipv4/bpfilter/sockopt.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/uaccess.h>
-#include <linux/bpfilter.h>
-#include <uapi/linux/bpf.h>
-#include <linux/wait.h>
-#include <linux/kmod.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-
-struct bpfilter_umh_ops bpfilter_ops;
-EXPORT_SYMBOL_GPL(bpfilter_ops);
-
-static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval,
- unsigned int optlen, bool is_set)
-{
- int err;
- mutex_lock(&bpfilter_ops.lock);
- if (!bpfilter_ops.sockopt) {
- mutex_unlock(&bpfilter_ops.lock);
- request_module("bpfilter");
- mutex_lock(&bpfilter_ops.lock);
-
- if (!bpfilter_ops.sockopt) {
- err = -ENOPROTOOPT;
- goto out;
- }
- }
- if (bpfilter_ops.info.tgid &&
- thread_group_exited(bpfilter_ops.info.tgid))
- umd_cleanup_helper(&bpfilter_ops.info);
-
- if (!bpfilter_ops.info.tgid) {
- err = bpfilter_ops.start();
- if (err)
- goto out;
- }
- err = bpfilter_ops.sockopt(sk, optname, optval, optlen, is_set);
-out:
- mutex_unlock(&bpfilter_ops.lock);
- return err;
-}
-
-int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval,
- unsigned int optlen)
-{
- return bpfilter_mbox_request(sk, optname, optval, optlen, true);
-}
-
-int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
- int __user *optlen)
-{
- int len;
-
- if (get_user(len, optlen))
- return -EFAULT;
-
- return bpfilter_mbox_request(sk, optname, USER_SOCKPTR(optval), len,
- false);
-}
-
-static int __init bpfilter_sockopt_init(void)
-{
- mutex_init(&bpfilter_ops.lock);
- bpfilter_ops.info.tgid = NULL;
- bpfilter_ops.info.driver_name = "bpfilter_umh";
-
- return 0;
-}
-device_initcall(bpfilter_sockopt_init);
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index cb5dbee9e018..2cc50cbfc2a3 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -39,11 +39,11 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
saddr = inet->inet_saddr;
if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
if (!oif || netif_index_is_l3_master(sock_net(sk), oif))
- oif = inet->mc_index;
+ oif = READ_ONCE(inet->mc_index);
if (!saddr)
- saddr = inet->mc_addr;
+ saddr = READ_ONCE(inet->mc_addr);
} else if (!oif) {
- oif = inet->uc_index;
+ oif = READ_ONCE(inet->uc_index);
}
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, oif,
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 9cf64ee47dd2..ca0ff15dc8fa 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -355,14 +355,14 @@ static void __inet_del_ifa(struct in_device *in_dev,
{
struct in_ifaddr *promote = NULL;
struct in_ifaddr *ifa, *ifa1;
- struct in_ifaddr *last_prim;
+ struct in_ifaddr __rcu **last_prim;
struct in_ifaddr *prev_prom = NULL;
int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
ASSERT_RTNL();
ifa1 = rtnl_dereference(*ifap);
- last_prim = rtnl_dereference(in_dev->ifa_list);
+ last_prim = ifap;
if (in_dev->dead)
goto no_promotions;
@@ -376,7 +376,7 @@ static void __inet_del_ifa(struct in_device *in_dev,
while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
ifa1->ifa_scope <= ifa->ifa_scope)
- last_prim = ifa;
+ last_prim = &ifa->ifa_next;
if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
ifa1->ifa_mask != ifa->ifa_mask ||
@@ -440,9 +440,9 @@ no_promotions:
rcu_assign_pointer(prev_prom->ifa_next, next_sec);
- last_sec = rtnl_dereference(last_prim->ifa_next);
+ last_sec = rtnl_dereference(*last_prim);
rcu_assign_pointer(promote->ifa_next, last_sec);
- rcu_assign_pointer(last_prim->ifa_next, promote);
+ rcu_assign_pointer(*last_prim, promote);
}
promote->ifa_flags &= ~IFA_F_SECONDARY;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 2be2d4922557..4ccfc104f13a 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -732,7 +732,9 @@ static inline int esp_remove_trailer(struct sk_buff *skb)
skb->csum = csum_block_sub(skb->csum, csumdiff,
skb->len - trimlen);
}
- pskb_trim(skb, skb->len - trimlen);
+ ret = pskb_trim(skb, skb->len - trimlen);
+ if (unlikely(ret))
+ return ret;
ret = nexthdr[1];
@@ -784,7 +786,7 @@ int esp_input_done2(struct sk_buff *skb, int err)
/*
* 1) if the NAT-T peer's IP or port changed then
- * advertize the change to the keying daemon.
+ * advertise the change to the keying daemon.
* This is an inbound SA, so just compare
* SRC ports.
*/
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 10e96ed6c9e3..b3271957ad9a 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -33,6 +33,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
int offset = skb_gro_offset(skb);
struct xfrm_offload *xo;
struct xfrm_state *x;
+ int encap_type = 0;
__be32 seq;
__be32 spi;
@@ -70,6 +71,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
xo->flags |= XFRM_GRO;
+ if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
+ encap_type = UDP_ENCAP_ESPINUDP;
+
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
@@ -77,7 +81,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
/* We don't need to handle errors from xfrm_input, it does all
* the error handling and frees the resources on error. */
- xfrm_input(skb, IPPROTO_ESP, spi, -2);
+ xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
return ERR_PTR(-EINPROGRESS);
out_reset:
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 513f475c6a53..5bdd1c016009 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -395,13 +395,13 @@ static int fib_default_rules_init(struct fib_rules_ops *ops)
{
int err;
- err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, 0);
+ err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL);
if (err < 0)
return err;
- err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN, 0);
+ err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN);
if (err < 0)
return err;
- err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT, 0);
+ err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT);
if (err < 0)
return err;
return 0;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 65ba18a91865..5eb1b8d302bb 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -278,7 +278,8 @@ void fib_release_info(struct fib_info *fi)
hlist_del(&nexthop_nh->nh_hash);
} endfor_nexthops(fi)
}
- fi->fib_dead = 1;
+ /* Paired with READ_ONCE() from fib_table_lookup() */
+ WRITE_ONCE(fi->fib_dead, 1);
fib_info_put(fi);
}
spin_unlock_bh(&fib_info_lock);
@@ -1324,15 +1325,18 @@ __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
unsigned char scope)
{
struct fib_nh *nh;
+ __be32 saddr;
if (nhc->nhc_family != AF_INET)
return inet_select_addr(nhc->nhc_dev, 0, scope);
nh = container_of(nhc, struct fib_nh, nh_common);
- nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
- nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
+ saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
+
+ WRITE_ONCE(nh->nh_saddr, saddr);
+ WRITE_ONCE(nh->nh_saddr_genid, atomic_read(&net->ipv4.dev_addr_genid));
- return nh->nh_saddr;
+ return saddr;
}
__be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
@@ -1346,8 +1350,9 @@ __be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
struct fib_nh *nh;
nh = container_of(nhc, struct fib_nh, nh_common);
- if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
- return nh->nh_saddr;
+ if (READ_ONCE(nh->nh_saddr_genid) ==
+ atomic_read(&net->ipv4.dev_addr_genid))
+ return READ_ONCE(nh->nh_saddr);
}
return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope);
@@ -1581,6 +1586,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
link_it:
ofi = fib_find_info(fi);
if (ofi) {
+ /* fib_table_lookup() should not see @fi yet. */
fi->fib_dead = 1;
free_fib_info(fi);
refcount_inc(&ofi->fib_treeref);
@@ -1619,6 +1625,7 @@ err_inval:
failure:
if (fi) {
+ /* fib_table_lookup() should not see @fi yet. */
fi->fib_dead = 1;
free_fib_info(fi);
}
@@ -1884,6 +1891,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local)
continue;
if (fi->fib_prefsrc == local) {
fi->fib_flags |= RTNH_F_DEAD;
+ fi->pfsrc_removed = true;
ret++;
}
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 74d403dbd2b4..3ff35f811765 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -52,6 +52,7 @@
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/rcupdate.h>
+#include <linux/rcupdate_wait.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/init.h>
@@ -1582,7 +1583,8 @@ found:
if (fa->fa_dscp &&
inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
continue;
- if (fi->fib_dead)
+ /* Paired with WRITE_ONCE() in fib_release_info() */
+ if (READ_ONCE(fi->fib_dead))
continue;
if (fa->fa_info->fib_scope < flp->flowi4_scope)
continue;
@@ -2026,6 +2028,7 @@ void fib_table_flush_external(struct fib_table *tb)
int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
{
struct trie *t = (struct trie *)tb->tb_data;
+ struct nl_info info = { .nl_net = net };
struct key_vector *pn = t->kv;
unsigned long cindex = 1;
struct hlist_node *tmp;
@@ -2088,6 +2091,9 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
fib_notify_alias_delete(net, n->key, &n->leaf, fa,
NULL);
+ if (fi->pfsrc_removed)
+ rtmsg_fib(RTM_DELROUTE, htonl(n->key), fa,
+ KEYLENGTH - fa->fa_slen, tb->tb_id, &info, 0);
hlist_del_rcu(&fa->fa_list);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c
index 3760a14b6b57..4da03bf45c9b 100644
--- a/net/ipv4/fou_bpf.c
+++ b/net/ipv4/fou_bpf.c
@@ -22,9 +22,7 @@ enum bpf_fou_encap_type {
FOU_BPF_ENCAP_GUE,
};
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
- "Global functions as their definitions will be in BTF");
+__bpf_kfunc_start_defs();
/* bpf_skb_set_fou_encap - Set FOU encap parameters
*
@@ -100,7 +98,7 @@ __bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
return 0;
}
-__diag_pop()
+__bpf_kfunc_end_defs();
BTF_SET8_START(fou_kfunc_set)
BTF_ID_FLAGS(func, bpf_skb_set_fou_encap)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index b8607763d113..e63a3bf99617 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -517,7 +517,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
} else
return rt;
- err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
+ err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
if (err)
goto relookup_failed;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 0c9e768e5628..efeeca2b1328 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -216,8 +216,10 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
int tv = get_random_u32_below(max_delay);
im->tm_running = 1;
- if (!mod_timer(&im->timer, jiffies+tv+2))
- refcount_inc(&im->refcnt);
+ if (refcount_inc_not_zero(&im->refcnt)) {
+ if (mod_timer(&im->timer, jiffies + tv + 2))
+ ip_ma_put(im);
+ }
}
static void igmp_gq_start_timer(struct in_device *in_dev)
@@ -353,8 +355,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
struct flowi4 fl4;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- unsigned int size = mtu;
+ unsigned int size;
+ size = min(mtu, IP_MAX_MTU);
while (1) {
skb = alloc_skb(size + hlen + tlen,
GFP_ATOMIC | __GFP_NOWARN);
@@ -2943,8 +2946,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
continue;
state->im = rcu_dereference(state->idev->mc_list);
}
- if (!state->im)
- break;
spin_lock_bh(&state->im->lock);
psf = state->im->sources;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index aeebe8816689..459af1f89739 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -117,37 +117,39 @@ bool inet_rcv_saddr_any(const struct sock *sk)
return !sk->sk_rcv_saddr;
}
-void inet_get_local_port_range(const struct net *net, int *low, int *high)
-{
- unsigned int seq;
-
- do {
- seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
-
- *low = net->ipv4.ip_local_ports.range[0];
- *high = net->ipv4.ip_local_ports.range[1];
- } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
-}
-EXPORT_SYMBOL(inet_get_local_port_range);
-
-void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
+/**
+ * inet_sk_get_local_port_range - fetch ephemeral ports range
+ * @sk: socket
+ * @low: pointer to low port
+ * @high: pointer to high port
+ *
+ * Fetch netns port range (/proc/sys/net/ipv4/ip_local_port_range)
+ * Range can be overridden if socket got IP_LOCAL_PORT_RANGE option.
+ * Returns true if IP_LOCAL_PORT_RANGE was set on this socket.
+ */
+bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
{
- const struct inet_sock *inet = inet_sk(sk);
- const struct net *net = sock_net(sk);
int lo, hi, sk_lo, sk_hi;
+ bool local_range = false;
+ u32 sk_range;
- inet_get_local_port_range(net, &lo, &hi);
+ inet_get_local_port_range(sock_net(sk), &lo, &hi);
- sk_lo = inet->local_port_range.lo;
- sk_hi = inet->local_port_range.hi;
+ sk_range = READ_ONCE(inet_sk(sk)->local_port_range);
+ if (unlikely(sk_range)) {
+ sk_lo = sk_range & 0xffff;
+ sk_hi = sk_range >> 16;
- if (unlikely(lo <= sk_lo && sk_lo <= hi))
- lo = sk_lo;
- if (unlikely(lo <= sk_hi && sk_hi <= hi))
- hi = sk_hi;
+ if (lo <= sk_lo && sk_lo <= hi)
+ lo = sk_lo;
+ if (lo <= sk_hi && sk_hi <= hi)
+ hi = sk_hi;
+ local_range = true;
+ }
*low = lo;
*high = hi;
+ return local_range;
}
EXPORT_SYMBOL(inet_sk_get_local_port_range);
@@ -157,8 +159,11 @@ static bool inet_use_bhash2_on_bind(const struct sock *sk)
if (sk->sk_family == AF_INET6) {
int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- return addr_type != IPV6_ADDR_ANY &&
- addr_type != IPV6_ADDR_MAPPED;
+ if (addr_type == IPV6_ADDR_ANY)
+ return false;
+
+ if (addr_type != IPV6_ADDR_MAPPED)
+ return true;
}
#endif
return sk->sk_rcv_saddr != htonl(INADDR_ANY);
@@ -211,18 +216,9 @@ static bool inet_bhash2_conflict(const struct sock *sk,
bool relax, bool reuseport_cb_ok,
bool reuseport_ok)
{
- struct inet_timewait_sock *tw2;
struct sock *sk2;
- sk_for_each_bound_bhash2(sk2, &tb2->owners) {
- if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
- reuseport_cb_ok, reuseport_ok))
- return true;
- }
-
- twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) {
- sk2 = (struct sock *)tw2;
-
+ sk_for_each_bound(sk2, &tb2->owners) {
if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
reuseport_cb_ok, reuseport_ok))
return true;
@@ -231,15 +227,20 @@ static bool inet_bhash2_conflict(const struct sock *sk,
return false;
}
+#define sk_for_each_bound_bhash(__sk, __tb2, __tb) \
+ hlist_for_each_entry(__tb2, &(__tb)->bhash2, bhash_node) \
+ sk_for_each_bound(sk2, &(__tb2)->owners)
+
/* This should be called only when the tb and tb2 hashbuckets' locks are held */
static int inet_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb,
const struct inet_bind2_bucket *tb2, /* may be null */
bool relax, bool reuseport_ok)
{
- bool reuseport_cb_ok;
- struct sock_reuseport *reuseport_cb;
kuid_t uid = sock_i_uid((struct sock *)sk);
+ struct sock_reuseport *reuseport_cb;
+ bool reuseport_cb_ok;
+ struct sock *sk2;
rcu_read_lock();
reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
@@ -247,32 +248,29 @@ static int inet_csk_bind_conflict(const struct sock *sk,
reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
rcu_read_unlock();
- /*
- * Unlike other sk lookup places we do not check
+ /* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if
+ * ipv4) should have been checked already. We need to do these two
+ * checks separately because their spinlocks have to be acquired/released
+ * independently of each other, to prevent possible deadlocks
+ */
+ if (inet_use_bhash2_on_bind(sk))
+ return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax,
+ reuseport_cb_ok, reuseport_ok);
+
+ /* Unlike other sk lookup places we do not check
* for sk_net here, since _all_ the socks listed
* in tb->owners and tb2->owners list belong
* to the same net - the one this bucket belongs to.
*/
+ sk_for_each_bound_bhash(sk2, tb2, tb) {
+ if (!inet_bind_conflict(sk, sk2, uid, relax, reuseport_cb_ok, reuseport_ok))
+ continue;
- if (!inet_use_bhash2_on_bind(sk)) {
- struct sock *sk2;
-
- sk_for_each_bound(sk2, &tb->owners)
- if (inet_bind_conflict(sk, sk2, uid, relax,
- reuseport_cb_ok, reuseport_ok) &&
- inet_rcv_saddr_equal(sk, sk2, true))
- return true;
-
- return false;
+ if (inet_rcv_saddr_equal(sk, sk2, true))
+ return true;
}
- /* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if
- * ipv4) should have been checked already. We need to do these two
- * checks separately because their spinlocks have to be acquired/released
- * independently of each other, to prevent possible deadlocks
- */
- return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok);
+ return false;
}
/* Determine if there is a bind conflict with an existing IPV6_ADDR_ANY (if ipv6) or
@@ -455,7 +453,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
kuid_t uid = sock_i_uid(sk);
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
- if (hlist_empty(&tb->owners)) {
+ if (hlist_empty(&tb->bhash2)) {
tb->fastreuse = reuse;
if (sk->sk_reuseport) {
tb->fastreuseport = FASTREUSEPORT_ANY;
@@ -547,7 +545,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
}
if (!found_port) {
- if (!hlist_empty(&tb->owners)) {
+ if (!hlist_empty(&tb->bhash2)) {
if (sk->sk_reuse == SK_FORCE_REUSE ||
(tb->fastreuse > 0 && reuse) ||
sk_reuseport_match(tb, sk))
@@ -567,7 +565,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
if (!tb2) {
tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
- net, head2, port, l3mdev, sk);
+ net, head2, tb, sk);
if (!tb2)
goto fail_unlock;
bhash2_created = true;
@@ -589,11 +587,10 @@ success:
fail_unlock:
if (ret) {
+ if (bhash2_created)
+ inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, tb2);
if (bhash_created)
inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- if (bhash2_created)
- inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
- tb2);
}
if (head2_lock_acquired)
spin_unlock(&head2->lock);
@@ -730,6 +727,10 @@ out:
}
if (req)
reqsk_put(req);
+
+ if (newsk)
+ inet_init_csk_locks(newsk);
+
return newsk;
out_err:
newsk = NULL;
@@ -1145,7 +1146,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
if (newsk) {
struct inet_connection_sock *newicsk = inet_csk(newsk);
- newsk->sk_wait_pending = 0;
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
newicsk->icsk_bind2_hash = NULL;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e13a84433413..8e6b6aa0579e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -134,7 +134,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
* hence this needs to be included regardless of socket family.
*/
if (ext & (1 << (INET_DIAG_TOS - 1)))
- if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0)
+ if (nla_put_u8(skb, INET_DIAG_TOS, READ_ONCE(inet->tos)) < 0)
goto errout;
#if IS_ENABLED(CONFIG_IPV6)
@@ -165,7 +165,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
* For cgroup2 classid is always zero.
*/
if (!classid)
- classid = sk->sk_priority;
+ classid = READ_ONCE(sk->sk_priority);
if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
goto errout;
@@ -1077,10 +1077,94 @@ skip_listen_ht:
s_i = num = s_num = 0;
}
+/* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets
+ * with bh disabled.
+ */
+#define SKARR_SZ 16
+
+ /* Dump bound but inactive (not listening, connecting, etc.) sockets */
+ if (cb->args[0] == 1) {
+ if (!(idiag_states & TCPF_BOUND_INACTIVE))
+ goto skip_bind_ht;
+
+ for (i = s_i; i < hashinfo->bhash_size; i++) {
+ struct inet_bind_hashbucket *ibb;
+ struct inet_bind2_bucket *tb2;
+ struct sock *sk_arr[SKARR_SZ];
+ int num_arr[SKARR_SZ];
+ int idx, accum, res;
+
+resume_bind_walk:
+ num = 0;
+ accum = 0;
+ ibb = &hashinfo->bhash2[i];
+
+ spin_lock_bh(&ibb->lock);
+ inet_bind_bucket_for_each(tb2, &ibb->chain) {
+ if (!net_eq(ib2_net(tb2), net))
+ continue;
+
+ sk_for_each_bound(sk, &tb2->owners) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (num < s_num)
+ goto next_bind;
+
+ if (sk->sk_state != TCP_CLOSE ||
+ !inet->inet_num)
+ goto next_bind;
+
+ if (r->sdiag_family != AF_UNSPEC &&
+ r->sdiag_family != sk->sk_family)
+ goto next_bind;
+
+ if (!inet_diag_bc_sk(bc, sk))
+ goto next_bind;
+
+ sock_hold(sk);
+ num_arr[accum] = num;
+ sk_arr[accum] = sk;
+ if (++accum == SKARR_SZ)
+ goto pause_bind_walk;
+next_bind:
+ num++;
+ }
+ }
+pause_bind_walk:
+ spin_unlock_bh(&ibb->lock);
+
+ res = 0;
+ for (idx = 0; idx < accum; idx++) {
+ if (res >= 0) {
+ res = inet_sk_diag_fill(sk_arr[idx],
+ NULL, skb, cb,
+ r, NLM_F_MULTI,
+ net_admin);
+ if (res < 0)
+ num = num_arr[idx];
+ }
+ sock_put(sk_arr[idx]);
+ }
+ if (res < 0)
+ goto done;
+
+ cond_resched();
+
+ if (accum == SKARR_SZ) {
+ s_num = num + 1;
+ goto resume_bind_walk;
+ }
+
+ s_num = 0;
+ }
+skip_bind_ht:
+ cb->args[0] = 2;
+ s_i = num = s_num = 0;
+ }
+
if (!(idiag_states & ~TCPF_LISTEN))
goto out;
-#define SKARR_SZ 16
for (i = s_i; i <= hashinfo->ehash_mask; i++) {
struct inet_ehash_bucket *head = &hashinfo->ehash[i];
spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
@@ -1481,5 +1565,6 @@ static void __exit inet_diag_exit(void)
module_init(inet_diag_init);
module_exit(inet_diag_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("INET/INET6: socket monitoring via SOCK_DIAG");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */);
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 7876b7d703cb..93e9193df544 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -76,7 +76,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
tb->port = snum;
tb->fastreuse = 0;
tb->fastreuseport = 0;
- INIT_HLIST_HEAD(&tb->owners);
+ INIT_HLIST_HEAD(&tb->bhash2);
hlist_add_head(&tb->node, &head->chain);
}
return tb;
@@ -87,7 +87,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
*/
void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb)
{
- if (hlist_empty(&tb->owners)) {
+ if (hlist_empty(&tb->bhash2)) {
__hlist_del(&tb->node);
kmem_cache_free(cachep, tb);
}
@@ -100,47 +100,52 @@ bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, const struct net
tb->l3mdev == l3mdev;
}
-static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb,
+static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb2,
struct net *net,
struct inet_bind_hashbucket *head,
- unsigned short port, int l3mdev,
+ struct inet_bind_bucket *tb,
const struct sock *sk)
{
- write_pnet(&tb->ib_net, net);
- tb->l3mdev = l3mdev;
- tb->port = port;
+ write_pnet(&tb2->ib_net, net);
+ tb2->l3mdev = tb->l3mdev;
+ tb2->port = tb->port;
#if IS_ENABLED(CONFIG_IPV6)
- tb->family = sk->sk_family;
- if (sk->sk_family == AF_INET6)
- tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr;
- else
+ BUILD_BUG_ON(USHRT_MAX < (IPV6_ADDR_ANY | IPV6_ADDR_MAPPED));
+ if (sk->sk_family == AF_INET6) {
+ tb2->addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+ tb2->v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+ } else {
+ tb2->addr_type = IPV6_ADDR_MAPPED;
+ ipv6_addr_set_v4mapped(sk->sk_rcv_saddr, &tb2->v6_rcv_saddr);
+ }
+#else
+ tb2->rcv_saddr = sk->sk_rcv_saddr;
#endif
- tb->rcv_saddr = sk->sk_rcv_saddr;
- INIT_HLIST_HEAD(&tb->owners);
- INIT_HLIST_HEAD(&tb->deathrow);
- hlist_add_head(&tb->node, &head->chain);
+ INIT_HLIST_HEAD(&tb2->owners);
+ hlist_add_head(&tb2->node, &head->chain);
+ hlist_add_head(&tb2->bhash_node, &tb->bhash2);
}
struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
struct net *net,
struct inet_bind_hashbucket *head,
- unsigned short port,
- int l3mdev,
+ struct inet_bind_bucket *tb,
const struct sock *sk)
{
- struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
+ struct inet_bind2_bucket *tb2 = kmem_cache_alloc(cachep, GFP_ATOMIC);
- if (tb)
- inet_bind2_bucket_init(tb, net, head, port, l3mdev, sk);
+ if (tb2)
+ inet_bind2_bucket_init(tb2, net, head, tb, sk);
- return tb;
+ return tb2;
}
/* Caller must hold hashbucket lock for this tb with local BH disabled */
void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
{
- if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) {
+ if (hlist_empty(&tb->owners)) {
__hlist_del(&tb->node);
+ __hlist_del(&tb->bhash_node);
kmem_cache_free(cachep, tb);
}
}
@@ -149,12 +154,11 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2,
const struct sock *sk)
{
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family != tb2->family)
- return false;
-
if (sk->sk_family == AF_INET6)
- return ipv6_addr_equal(&tb2->v6_rcv_saddr,
- &sk->sk_v6_rcv_saddr);
+ return ipv6_addr_equal(&tb2->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
+
+ if (tb2->addr_type != IPV6_ADDR_MAPPED)
+ return false;
#endif
return tb2->rcv_saddr == sk->sk_rcv_saddr;
}
@@ -163,10 +167,9 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
struct inet_bind2_bucket *tb2, unsigned short port)
{
inet_sk(sk)->inet_num = port;
- sk_add_bind_node(sk, &tb->owners);
inet_csk(sk)->icsk_bind_hash = tb;
- sk_add_bind2_node(sk, &tb2->owners);
inet_csk(sk)->icsk_bind2_hash = tb2;
+ sk_add_bind_node(sk, &tb2->owners);
}
/*
@@ -186,21 +189,20 @@ static void __inet_put_port(struct sock *sk)
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
- __sk_del_bind_node(sk);
inet_csk(sk)->icsk_bind_hash = NULL;
inet_sk(sk)->inet_num = 0;
- inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
spin_lock(&head2->lock);
if (inet_csk(sk)->icsk_bind2_hash) {
struct inet_bind2_bucket *tb2 = inet_csk(sk)->icsk_bind2_hash;
- __sk_del_bind2_node(sk);
+ __sk_del_bind_node(sk);
inet_csk(sk)->icsk_bind2_hash = NULL;
inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
}
spin_unlock(&head2->lock);
+ inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
spin_unlock(&head->lock);
}
@@ -269,8 +271,7 @@ bhash2_find:
tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, child);
if (!tb2) {
tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep,
- net, head2, port,
- l3mdev, child);
+ net, head2, tb, child);
if (!tb2)
goto error;
}
@@ -745,12 +746,12 @@ int __inet_hash(struct sock *sk, struct sock *osk)
if (err)
goto unlock;
}
+ sock_set_flag(sk, SOCK_RCU_FREE);
if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
sk->sk_family == AF_INET6)
__sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head);
else
__sk_nulls_add_node_rcu(sk, &ilb2->nulls_head);
- sock_set_flag(sk, SOCK_RCU_FREE);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
unlock:
spin_unlock(&ilb2->lock);
@@ -815,41 +816,32 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb,
const struct net *net, unsigned short port,
int l3mdev, const struct sock *sk)
{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family != tb->family)
+ if (!net_eq(ib2_net(tb), net) || tb->port != port ||
+ tb->l3mdev != l3mdev)
return false;
- if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
- else
-#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;
+ return inet_bind2_bucket_addr_match(tb, sk);
}
bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net,
unsigned short port, int l3mdev, const struct sock *sk)
{
+ if (!net_eq(ib2_net(tb), net) || tb->port != port ||
+ tb->l3mdev != l3mdev)
+ return false;
+
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family != tb->family) {
- if (sk->sk_family == AF_INET)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_any(&tb->v6_rcv_saddr);
+ if (tb->addr_type == IPV6_ADDR_ANY)
+ return true;
+ if (tb->addr_type != IPV6_ADDR_MAPPED)
return false;
- }
- if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_any(&tb->v6_rcv_saddr);
- else
+ if (sk->sk_family == AF_INET6 &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+ return false;
#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
+ return tb->rcv_saddr == 0;
}
/* The socket's bhash2 hashbucket spinlock must be held when this is called */
@@ -944,7 +936,7 @@ static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family,
spin_lock_bh(&head->lock);
spin_lock(&head2->lock);
- __sk_del_bind2_node(sk);
+ __sk_del_bind_node(sk);
inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash);
spin_unlock(&head2->lock);
@@ -959,10 +951,10 @@ static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family,
tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
if (!tb2) {
tb2 = new_tb2;
- inet_bind2_bucket_init(tb2, net, head2, port, l3mdev, sk);
+ inet_bind2_bucket_init(tb2, net, head2, inet_csk(sk)->icsk_bind_hash, sk);
}
- sk_add_bind2_node(sk, &tb2->owners);
inet_csk(sk)->icsk_bind2_hash = tb2;
+ sk_add_bind_node(sk, &tb2->owners);
spin_unlock(&head2->lock);
spin_unlock_bh(&head->lock);
@@ -1014,7 +1006,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
bool tb_created = false;
u32 remaining, offset;
int ret, i, low, high;
- int l3mdev;
+ bool local_ports;
+ int step, l3mdev;
u32 index;
if (port) {
@@ -1026,10 +1019,12 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
l3mdev = inet_sk_bound_l3mdev(sk);
- inet_sk_get_local_port_range(sk, &low, &high);
+ local_ports = inet_sk_get_local_port_range(sk, &low, &high);
+ step = local_ports ? 1 : 2;
+
high++; /* [32768, 60999] -> [32768, 61000[ */
remaining = high - low;
- if (likely(remaining > 1))
+ if (!local_ports && remaining > 1)
remaining &= ~1U;
get_random_sleepable_once(table_perturb,
@@ -1042,10 +1037,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
- offset &= ~1U;
+ if (!local_ports)
+ offset &= ~1U;
other_parity_scan:
port = low + offset;
- for (i = 0; i < remaining; i += 2, port += 2) {
+ for (i = 0; i < remaining; i += step, port += step) {
if (unlikely(port >= high))
port -= remaining;
if (inet_is_local_reserved_port(net, port))
@@ -1062,7 +1058,7 @@ other_parity_scan:
if (tb->fastreuse >= 0 ||
tb->fastreuseport >= 0)
goto next_port;
- WARN_ON(hlist_empty(&tb->owners));
+ WARN_ON(hlist_empty(&tb->bhash2));
if (!check_established(death_row, sk,
port, &tw))
goto ok;
@@ -1085,10 +1081,11 @@ next_port:
cond_resched();
}
- offset++;
- if ((offset & 1) && remaining > 1)
- goto other_parity_scan;
-
+ if (!local_ports) {
+ offset++;
+ if ((offset & 1) && remaining > 1)
+ goto other_parity_scan;
+ }
return -EADDRNOTAVAIL;
ok:
@@ -1101,7 +1098,7 @@ ok:
tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
if (!tb2) {
tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net,
- head2, port, l3mdev, sk);
+ head2, tb, sk);
if (!tb2)
goto error;
}
@@ -1111,8 +1108,8 @@ ok:
* on low contention the randomness is maximal and on high contention
* it may be inexistent.
*/
- i = max_t(int, i, get_random_u32_below(8) * 2);
- WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
+ i = max_t(int, i, get_random_u32_below(8) * step);
+ WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + step);
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, tb2, port);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index dd37a5bf6881..5befa4de5b24 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -35,13 +35,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
if (!tb)
return;
- __hlist_del(&tw->tw_bind_node);
+ __sk_del_bind_node((struct sock *)tw);
tw->tw_tb = NULL;
- inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
-
- __hlist_del(&tw->tw_bind2_node);
tw->tw_tb2 = NULL;
inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
+ inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
__sock_put((struct sock *)tw);
}
@@ -94,18 +92,6 @@ static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
hlist_nulls_add_head_rcu(&tw->tw_node, list);
}
-static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
- struct hlist_head *list)
-{
- hlist_add_head(&tw->tw_bind_node, list);
-}
-
-static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw,
- struct hlist_head *list)
-{
- hlist_add_head(&tw->tw_bind2_node, list);
-}
-
/*
* Enter the time wait state. This is called with locally disabled BH.
* Essentially we whip up a timewait bucket, copy the relevant info into it
@@ -133,11 +119,10 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
- inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
tw->tw_tb2 = icsk->icsk_bind2_hash;
WARN_ON(!icsk->icsk_bind2_hash);
- inet_twsk_add_bind2_node(tw, &tw->tw_tb2->deathrow);
+ sk_add_bind_node((struct sock *)tw, &tw->tw_tb2->owners);
spin_unlock(&bhead2->lock);
spin_unlock(&bhead->lock);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index e18931a6d153..8b65f12583eb 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -66,9 +66,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
{
struct ip_options *opt = &(IPCB(skb)->opt);
- __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
- __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
-
#ifdef CONFIG_NET_SWITCHDEV
if (skb->offload_l3_fwd_mark) {
consume_skb(skb);
@@ -131,6 +128,8 @@ int ip_forward(struct sk_buff *skb)
if (opt->is_strictroute && rt->rt_uses_gateway)
goto sr_failed;
+ __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
+
IPCB(skb)->flags |= IPSKB_FORWARDED;
mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
if (ip_exceeds_mtu(skb, mtu)) {
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 22a26d1d29a0..5169c3c72cff 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -635,15 +635,18 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
}
if (dev->header_ops) {
+ int pull_len = tunnel->hlen + sizeof(struct iphdr);
+
if (skb_cow_head(skb, 0))
goto free_skb;
tnl_params = (const struct iphdr *)skb->data;
- /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
- * to gre header.
- */
- skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+ if (!pskb_network_may_pull(skb, pull_len))
+ goto free_skb;
+
+ /* ip_tunnel_xmit() needs skb->data pointing to gre header. */
+ skb_pull(skb, pull_len);
skb_reset_mac_header(skb);
if (skb->ip_summed == CHECKSUM_PARTIAL &&
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index fe9ead9ee863..5e9c8156656a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -584,7 +584,8 @@ static void ip_sublist_rcv_finish(struct list_head *head)
static struct sk_buff *ip_extract_route_hint(const struct net *net,
struct sk_buff *skb, int rt_type)
{
- if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
+ if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
+ IPCB(skb)->flags & IPSKB_MULTIPATH)
return NULL;
return skb;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 43ba4b77b248..41537d18eecf 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -101,6 +101,8 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct iphdr *iph = ip_hdr(skb);
+ IP_INC_STATS(net, IPSTATS_MIB_OUTREQUESTS);
+
iph_set_totlen(iph, skb->len);
ip_send_check(iph);
@@ -207,6 +209,9 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
} else if (rt->rt_type == RTN_BROADCAST)
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
+ /* OUTOCTETS should be counted after fragment */
+ IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
+
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
if (!skb)
@@ -366,8 +371,6 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
/*
* If the indicated interface is up and running, send the packet.
*/
- IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -424,8 +427,6 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
- IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -545,7 +546,7 @@ EXPORT_SYMBOL(__ip_queue_xmit);
int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
{
- return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos);
+ return __ip_queue_xmit(sk, skb, fl, READ_ONCE(inet_sk(sk)->tos));
}
EXPORT_SYMBOL(ip_queue_xmit);
@@ -982,7 +983,7 @@ static int __ip_append_data(struct sock *sk,
paged = !!cork->gso_size;
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1286,6 +1287,12 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
if (unlikely(!rt))
return -EFAULT;
+ cork->fragsize = ip_sk_use_pmtu(sk) ?
+ dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
+
+ if (!inetdev_valid_mtu(cork->fragsize))
+ return -ENETUNREACH;
+
/*
* setup for corking.
*/
@@ -1302,12 +1309,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
cork->addr = ipc->addr;
}
- cork->fragsize = ip_sk_use_pmtu(sk) ?
- dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
-
- if (!inetdev_valid_mtu(cork->fragsize))
- return -ENETUNREACH;
-
cork->gso_size = ipc->gso_size;
cork->dst = &rt->dst;
@@ -1388,8 +1389,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
struct ip_options *opt = NULL;
struct rtable *rt = (struct rtable *)cork->dst;
struct iphdr *iph;
+ u8 pmtudisc, ttl;
__be16 df = 0;
- __u8 ttl;
skb = __skb_dequeue(queue);
if (!skb)
@@ -1419,8 +1420,9 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
/* DF bit is set when we want to see DF on outgoing frames.
* If ignore_df is set too, we still allow to fragment this frame
* locally. */
- if (inet->pmtudisc == IP_PMTUDISC_DO ||
- inet->pmtudisc == IP_PMTUDISC_PROBE ||
+ pmtudisc = READ_ONCE(inet->pmtudisc);
+ if (pmtudisc == IP_PMTUDISC_DO ||
+ pmtudisc == IP_PMTUDISC_PROBE ||
(skb->len <= dst_mtu(&rt->dst) &&
ip_dont_fragment(sk, &rt->dst)))
df = htons(IP_DF);
@@ -1431,14 +1433,14 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
if (cork->ttl != 0)
ttl = cork->ttl;
else if (rt->rt_type == RTN_MULTICAST)
- ttl = inet->mc_ttl;
+ ttl = READ_ONCE(inet->mc_ttl);
else
ttl = ip_select_ttl(inet, &rt->dst);
iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = 5;
- iph->tos = (cork->tos != -1) ? cork->tos : inet->tos;
+ iph->tos = (cork->tos != -1) ? cork->tos : READ_ONCE(inet->tos);
iph->frag_off = df;
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
@@ -1450,7 +1452,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
ip_options_build(skb, opt, cork->addr, rt);
}
- skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority;
+ skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
skb->mark = cork->mark;
skb->tstamp = cork->transmit_time;
/*
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index d1c73660b844..21d2ffa919e9 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -47,8 +47,6 @@
#include <linux/errqueue.h>
#include <linux/uaccess.h>
-#include <linux/bpfilter.h>
-
/*
* SOL_IP control messages.
*/
@@ -511,7 +509,7 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
* or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
*/
info = PKTINFO_SKB_CB(skb);
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
+ if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) ||
!info->ipi_ifindex)
return false;
@@ -587,12 +585,14 @@ out:
void __ip_sock_set_tos(struct sock *sk, int val)
{
+ u8 old_tos = inet_sk(sk)->tos;
+
if (sk->sk_type == SOCK_STREAM) {
val &= ~INET_ECN_MASK;
- val |= inet_sk(sk)->tos & INET_ECN_MASK;
+ val |= old_tos & INET_ECN_MASK;
}
- if (inet_sk(sk)->tos != val) {
- inet_sk(sk)->tos = val;
+ if (old_tos != val) {
+ WRITE_ONCE(inet_sk(sk)->tos, val);
WRITE_ONCE(sk->sk_priority, rt_tos2priority(val));
sk_dst_reset(sk);
}
@@ -600,9 +600,9 @@ void __ip_sock_set_tos(struct sock *sk, int val)
void ip_sock_set_tos(struct sock *sk, int val)
{
- lock_sock(sk);
+ sockopt_lock_sock(sk);
__ip_sock_set_tos(sk, val);
- release_sock(sk);
+ sockopt_release_sock(sk);
}
EXPORT_SYMBOL(ip_sock_set_tos);
@@ -622,9 +622,7 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val)
{
if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
return -EINVAL;
- lock_sock(sk);
- inet_sk(sk)->pmtudisc = val;
- release_sock(sk);
+ WRITE_ONCE(inet_sk(sk)->pmtudisc, val);
return 0;
}
EXPORT_SYMBOL(ip_sock_set_mtu_discover);
@@ -775,7 +773,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > READ_ONCE(sysctl_optmem_max))
+ if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -811,7 +809,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
+ if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -1039,6 +1037,35 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
WRITE_ONCE(inet->min_ttl, val);
return 0;
+ case IP_MULTICAST_TTL:
+ if (sk->sk_type == SOCK_STREAM)
+ return -EINVAL;
+ if (optlen < 1)
+ return -EINVAL;
+ if (val == -1)
+ val = 1;
+ if (val < 0 || val > 255)
+ return -EINVAL;
+ WRITE_ONCE(inet->mc_ttl, val);
+ return 0;
+ case IP_MTU_DISCOVER:
+ return ip_sock_set_mtu_discover(sk, val);
+ case IP_TOS: /* This sets both TOS and Precedence */
+ ip_sock_set_tos(sk, val);
+ return 0;
+ case IP_LOCAL_PORT_RANGE:
+ {
+ u16 lo = val;
+ u16 hi = val >> 16;
+
+ if (optlen != sizeof(u32))
+ return -EINVAL;
+ if (lo != 0 && hi != 0 && lo > hi)
+ return -EINVAL;
+
+ WRITE_ONCE(inet->local_port_range, val);
+ return 0;
+ }
}
err = 0;
@@ -1093,25 +1120,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
}
}
break;
- case IP_TOS: /* This sets both TOS and Precedence */
- __ip_sock_set_tos(sk, val);
- break;
- case IP_MTU_DISCOVER:
- if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
- goto e_inval;
- inet->pmtudisc = val;
- break;
- case IP_MULTICAST_TTL:
- if (sk->sk_type == SOCK_STREAM)
- goto e_inval;
- if (optlen < 1)
- goto e_inval;
- if (val == -1)
- val = 1;
- if (val < 0 || val > 255)
- goto e_inval;
- inet->mc_ttl = val;
- break;
case IP_UNICAST_IF:
{
struct net_device *dev = NULL;
@@ -1123,7 +1131,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
ifindex = (__force int)ntohl((__force __be32)val);
if (ifindex == 0) {
- inet->uc_index = 0;
+ WRITE_ONCE(inet->uc_index, 0);
err = 0;
break;
}
@@ -1140,7 +1148,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
if (sk->sk_bound_dev_if && midx != sk->sk_bound_dev_if)
break;
- inet->uc_index = ifindex;
+ WRITE_ONCE(inet->uc_index, ifindex);
err = 0;
break;
}
@@ -1178,8 +1186,8 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
if (!mreq.imr_ifindex) {
if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) {
- inet->mc_index = 0;
- inet->mc_addr = 0;
+ WRITE_ONCE(inet->mc_index, 0);
+ WRITE_ONCE(inet->mc_addr, 0);
err = 0;
break;
}
@@ -1204,8 +1212,8 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
midx != sk->sk_bound_dev_if)
break;
- inet->mc_index = mreq.imr_ifindex;
- inet->mc_addr = mreq.imr_address.s_addr;
+ WRITE_ONCE(inet->mc_index, mreq.imr_ifindex);
+ WRITE_ONCE(inet->mc_addr, mreq.imr_address.s_addr);
err = 0;
break;
}
@@ -1244,7 +1252,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
if (optlen < IP_MSFILTER_SIZE(0))
goto e_inval;
- if (optlen > READ_ONCE(sysctl_optmem_max)) {
+ if (optlen > READ_ONCE(net->core.sysctl_optmem_max)) {
err = -ENOBUFS;
break;
}
@@ -1335,20 +1343,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
err = xfrm_user_policy(sk, optname, optval, optlen);
break;
- case IP_LOCAL_PORT_RANGE:
- {
- const __u16 lo = val;
- const __u16 hi = val >> 16;
-
- if (optlen != sizeof(__u32))
- goto e_inval;
- if (lo != 0 && hi != 0 && lo > hi)
- goto e_inval;
-
- inet->local_port_range.lo = lo;
- inet->local_port_range.hi = hi;
- break;
- }
default:
err = -ENOPROTOOPT;
break;
@@ -1369,12 +1363,13 @@ e_inval:
* ipv4_pktinfo_prepare - transfer some info from rtable to skb
* @sk: socket
* @skb: buffer
+ * @drop_dst: if true, drops skb dst
*
* To support IP_CMSG_PKTINFO option, we store rt_iif and specific
* destination in skb->cb[] before dst drop.
* This way, receiver doesn't make cache line misses to read rtable.
*/
-void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst)
{
struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
bool prepare = inet_test_bit(PKTINFO, sk) ||
@@ -1403,7 +1398,8 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
pktinfo->ipi_ifindex = 0;
pktinfo->ipi_spec_dst.s_addr = 0;
}
- skb_dst_drop(skb);
+ if (drop_dst)
+ skb_dst_drop(skb);
}
int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
@@ -1415,11 +1411,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
return -ENOPROTOOPT;
err = do_ip_setsockopt(sk, level, optname, optval, optlen);
-#if IS_ENABLED(CONFIG_BPFILTER_UMH)
- if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
- optname < BPFILTER_IPT_SET_MAX)
- err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
-#endif
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
@@ -1592,27 +1583,29 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_MINTTL:
val = READ_ONCE(inet->min_ttl);
goto copyval;
- }
-
- if (needs_rtnl)
- rtnl_lock();
- sockopt_lock_sock(sk);
-
- switch (optname) {
+ case IP_MULTICAST_TTL:
+ val = READ_ONCE(inet->mc_ttl);
+ goto copyval;
+ case IP_MTU_DISCOVER:
+ val = READ_ONCE(inet->pmtudisc);
+ goto copyval;
+ case IP_TOS:
+ val = READ_ONCE(inet->tos);
+ goto copyval;
case IP_OPTIONS:
{
unsigned char optbuf[sizeof(struct ip_options)+40];
struct ip_options *opt = (struct ip_options *)optbuf;
struct ip_options_rcu *inet_opt;
- inet_opt = rcu_dereference_protected(inet->inet_opt,
- lockdep_sock_is_held(sk));
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
opt->optlen = 0;
if (inet_opt)
memcpy(optbuf, &inet_opt->opt,
sizeof(struct ip_options) +
inet_opt->opt.optlen);
- sockopt_release_sock(sk);
+ rcu_read_unlock();
if (opt->optlen == 0) {
len = 0;
@@ -1628,12 +1621,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
return 0;
}
- case IP_TOS:
- val = inet->tos;
- break;
- case IP_MTU_DISCOVER:
- val = inet->pmtudisc;
- break;
case IP_MTU:
{
struct dst_entry *dst;
@@ -1643,24 +1630,55 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
val = dst_mtu(dst);
dst_release(dst);
}
- if (!val) {
- sockopt_release_sock(sk);
+ if (!val)
return -ENOTCONN;
+ goto copyval;
+ }
+ case IP_PKTOPTIONS:
+ {
+ struct msghdr msg;
+
+ if (sk->sk_type != SOCK_STREAM)
+ return -ENOPROTOOPT;
+
+ if (optval.is_kernel) {
+ msg.msg_control_is_user = false;
+ msg.msg_control = optval.kernel;
+ } else {
+ msg.msg_control_is_user = true;
+ msg.msg_control_user = optval.user;
}
- break;
+ msg.msg_controllen = len;
+ msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0;
+
+ if (inet_test_bit(PKTINFO, sk)) {
+ struct in_pktinfo info;
+
+ info.ipi_addr.s_addr = READ_ONCE(inet->inet_rcv_saddr);
+ info.ipi_spec_dst.s_addr = READ_ONCE(inet->inet_rcv_saddr);
+ info.ipi_ifindex = READ_ONCE(inet->mc_index);
+ put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
+ }
+ if (inet_test_bit(TTL, sk)) {
+ int hlim = READ_ONCE(inet->mc_ttl);
+
+ put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
+ }
+ if (inet_test_bit(TOS, sk)) {
+ int tos = READ_ONCE(inet->rcv_tos);
+ put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
+ }
+ len -= msg.msg_controllen;
+ return copy_to_sockptr(optlen, &len, sizeof(int));
}
- case IP_MULTICAST_TTL:
- val = inet->mc_ttl;
- break;
case IP_UNICAST_IF:
- val = (__force int)htonl((__u32) inet->uc_index);
- break;
+ val = (__force int)htonl((__u32) READ_ONCE(inet->uc_index));
+ goto copyval;
case IP_MULTICAST_IF:
{
struct in_addr addr;
len = min_t(unsigned int, len, sizeof(struct in_addr));
- addr.s_addr = inet->mc_addr;
- sockopt_release_sock(sk);
+ addr.s_addr = READ_ONCE(inet->mc_addr);
if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
@@ -1668,6 +1686,16 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
return 0;
}
+ case IP_LOCAL_PORT_RANGE:
+ val = READ_ONCE(inet->local_port_range);
+ goto copyval;
+ }
+
+ if (needs_rtnl)
+ rtnl_lock();
+ sockopt_lock_sock(sk);
+
+ switch (optname) {
case IP_MSFILTER:
{
struct ip_msfilter msf;
@@ -1690,47 +1718,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
else
err = ip_get_mcast_msfilter(sk, optval, optlen, len);
goto out;
- case IP_PKTOPTIONS:
- {
- struct msghdr msg;
-
- sockopt_release_sock(sk);
-
- if (sk->sk_type != SOCK_STREAM)
- return -ENOPROTOOPT;
-
- if (optval.is_kernel) {
- msg.msg_control_is_user = false;
- msg.msg_control = optval.kernel;
- } else {
- msg.msg_control_is_user = true;
- msg.msg_control_user = optval.user;
- }
- msg.msg_controllen = len;
- msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0;
-
- if (inet_test_bit(PKTINFO, sk)) {
- struct in_pktinfo info;
-
- info.ipi_addr.s_addr = inet->inet_rcv_saddr;
- info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr;
- info.ipi_ifindex = inet->mc_index;
- put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
- }
- if (inet_test_bit(TTL, sk)) {
- int hlim = inet->mc_ttl;
- put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
- }
- if (inet_test_bit(TOS, sk)) {
- int tos = inet->rcv_tos;
- put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
- }
- len -= msg.msg_controllen;
- return copy_to_sockptr(optlen, &len, sizeof(int));
- }
- case IP_LOCAL_PORT_RANGE:
- val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
- break;
case IP_PROTOCOL:
val = inet_sk(sk)->inet_num;
break;
@@ -1771,11 +1758,6 @@ int ip_getsockopt(struct sock *sk, int level,
err = do_ip_getsockopt(sk, level, optname,
USER_SOCKPTR(optval), USER_SOCKPTR(optlen));
-#if IS_ENABLED(CONFIG_BPFILTER_UMH)
- if (optname >= BPFILTER_IPT_SO_GET_INFO &&
- optname < BPFILTER_IPT_GET_MAX)
- err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
-#endif
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 586b1b3e35b8..80ccd6661aa3 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -332,7 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu)
};
skb_reset_network_header(skb);
- csum = csum_partial(icmp6h, len, 0);
+ csum = skb_checksum(skb, skb_transport_offset(skb), len, 0);
icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len,
IPPROTO_ICMPV6, csum);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index d1e7d0ceb7ed..9ab9b3ebe0cd 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -288,11 +288,11 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IP):
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- xfrm_decode_session(skb, &fl, AF_INET);
+ xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
break;
case htons(ETH_P_IPV6):
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
- xfrm_decode_session(skb, &fl, AF_INET6);
+ xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
break;
default:
goto tx_err;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3f0c6d602fb7..362229836510 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -253,7 +253,7 @@ static int __net_init ipmr_rules_init(struct net *net)
goto err1;
}
- err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
+ err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT);
if (err < 0)
goto err2;
@@ -1025,6 +1025,10 @@ static int ipmr_cache_report(const struct mr_table *mrt,
struct sk_buff *skb;
int ret;
+ mroute_sk = rcu_dereference(mrt->mroute_sk);
+ if (!mroute_sk)
+ return -EINVAL;
+
if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
else
@@ -1069,7 +1073,8 @@ static int ipmr_cache_report(const struct mr_table *mrt,
msg = (struct igmpmsg *)skb_network_header(skb);
msg->im_vif = vifi;
msg->im_vif_hi = vifi >> 8;
- skb_dst_set(skb, dst_clone(skb_dst(pkt)));
+ ipv4_pktinfo_prepare(mroute_sk, pkt, false);
+ memcpy(skb->cb, pkt->cb, sizeof(skb->cb));
/* Add our header */
igmp = skb_put(skb, sizeof(struct igmphdr));
igmp->type = assert;
@@ -1079,12 +1084,6 @@ static int ipmr_cache_report(const struct mr_table *mrt,
skb->transport_header = skb->network_header;
}
- mroute_sk = rcu_dereference(mrt->mroute_sk);
- if (!mroute_sk) {
- kfree_skb(skb);
- return -EINVAL;
- }
-
igmpmsg_netlink_event(mrt, skb);
/* Deliver to mrouted */
@@ -1804,7 +1803,6 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
struct ip_options *opt = &(IPCB(skb)->opt);
IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
if (unlikely(opt->optlen))
ip_forward_options(skb);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index bd135165482a..591a2737808e 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -62,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
#ifdef CONFIG_XFRM
if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
- xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
+ xfrm_decode_session(net, skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
struct dst_entry *dst = skb_dst(skb);
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 3abb430af9e6..385d945d8ebe 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -36,12 +36,12 @@ static const struct xt_table packet_mangler = {
static unsigned int
ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
- unsigned int ret;
+ unsigned int ret, verdict;
const struct iphdr *iph;
- u_int8_t tos;
__be32 saddr, daddr;
- u_int32_t mark;
+ u32 mark;
int err;
+ u8 tos;
/* Save things which could affect route */
mark = skb->mark;
@@ -51,8 +51,9 @@ ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
tos = iph->tos;
ret = ipt_do_table(priv, skb, state);
+ verdict = ret & NF_VERDICT_MASK;
/* Reroute for ANY change. */
- if (ret != NF_DROP && ret != NF_STOLEN) {
+ if (verdict != NF_DROP && verdict != NF_STOLEN) {
iph = ip_hdr(skb);
if (iph->saddr != saddr ||
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 56f6ecc43451..4d42d0756fd7 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -170,3 +170,4 @@ module_init(iptable_nat_init);
module_exit(iptable_nat_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("iptables legacy nat table");
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index ca5e5b21587c..0e7f53964d0a 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -108,3 +108,4 @@ static void __exit iptable_raw_fini(void)
module_init(iptable_raw_init);
module_exit(iptable_raw_fini);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("iptables legacy raw table");
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 265b39bc435b..482e733c3375 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -186,3 +186,4 @@ module_init(nf_defrag_init);
module_exit(nf_defrag_fini);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv4 defragmentation support");
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1
index 24b73268f362..dc2cc5794160 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.asn1
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1
@@ -1,3 +1,11 @@
+-- SPDX-License-Identifier: BSD-3-Clause
+--
+-- Copyright (C) 1990, 2002 IETF Trust and the persons identified as authors
+-- of the code
+--
+-- https://www.rfc-editor.org/rfc/rfc1157#section-4
+-- https://www.rfc-editor.org/rfc/rfc3416#section-3
+
Message ::=
SEQUENCE {
version
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index f33aeab9424f..04504b2b51df 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -239,7 +239,6 @@ static int nf_reject_fill_skb_dst(struct sk_buff *skb_in)
void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
int hook)
{
- struct net_device *br_indev __maybe_unused;
struct sk_buff *nskb;
struct iphdr *niph;
const struct tcphdr *oth;
@@ -289,9 +288,13 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
- br_indev = nf_bridge_get_physindev(oldskb);
- if (br_indev) {
+ if (nf_bridge_info_exists(oldskb)) {
struct ethhdr *oeth = eth_hdr(oldskb);
+ struct net_device *br_indev;
+
+ br_indev = nf_bridge_get_physindev(oldskb, net);
+ if (!br_indev)
+ goto free_nskb;
nskb->dev = br_indev;
niph->tot_len = htons(nskb->len);
@@ -336,3 +339,4 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
EXPORT_SYMBOL_GPL(nf_send_unreach);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv4 packet rejection core");
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 75e0aee35eb7..823306487a82 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -301,7 +301,7 @@ static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr,
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
- return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len);
}
/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */
@@ -551,7 +551,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
case ICMP_DEST_UNREACH:
if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
ipv4_sk_update_pmtu(skb, sk, info);
- if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
+ if (READ_ONCE(inet_sock->pmtudisc) != IP_PMTUDISC_DONT) {
err = EMSGSIZE;
harderr = 1;
break;
@@ -581,7 +581,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
* 4.1.3.3.
*/
if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) ||
- (family == AF_INET6 && !inet6_sk(sk)->recverr)) {
+ (family == AF_INET6 && !inet6_test_bit(RECVERR6, sk))) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
} else {
@@ -773,11 +773,11 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (ipv4_is_multicast(daddr)) {
if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
- ipc.oif = inet->mc_index;
+ ipc.oif = READ_ONCE(inet->mc_index);
if (!saddr)
- saddr = inet->mc_addr;
+ saddr = READ_ONCE(inet->mc_addr);
} else if (!ipc.oif)
- ipc.oif = inet->uc_index;
+ ipc.oif = READ_ONCE(inet->uc_index);
flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope,
sk->sk_protocol, inet_sk_flowi_flags(sk), faddr,
@@ -899,7 +899,6 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
#if IS_ENABLED(CONFIG_IPV6)
} else if (family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6hdr *ip6 = ipv6_hdr(skb);
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
@@ -908,7 +907,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
sin6->sin6_port = 0;
sin6->sin6_addr = ip6->saddr;
sin6->sin6_flowinfo = 0;
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
sin6->sin6_flowinfo = ip6_flowinfo(ip6);
sin6->sin6_scope_id =
ipv6_iface_scope_id(&sin6->sin6_addr,
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index eaf1d3113b62..5f4654ebff48 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -83,7 +83,7 @@ static const struct snmp_mib snmp4_ipstats_list[] = {
SNMP_MIB_ITEM("InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS),
SNMP_MIB_ITEM("InDiscards", IPSTATS_MIB_INDISCARDS),
SNMP_MIB_ITEM("InDelivers", IPSTATS_MIB_INDELIVERS),
- SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTPKTS),
+ SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTREQUESTS),
SNMP_MIB_ITEM("OutDiscards", IPSTATS_MIB_OUTDISCARDS),
SNMP_MIB_ITEM("OutNoRoutes", IPSTATS_MIB_OUTNOROUTES),
SNMP_MIB_ITEM("ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT),
@@ -93,6 +93,7 @@ static const struct snmp_mib snmp4_ipstats_list[] = {
SNMP_MIB_ITEM("FragOKs", IPSTATS_MIB_FRAGOKS),
SNMP_MIB_ITEM("FragFails", IPSTATS_MIB_FRAGFAILS),
SNMP_MIB_ITEM("FragCreates", IPSTATS_MIB_FRAGCREATES),
+ SNMP_MIB_ITEM("OutTransmits", IPSTATS_MIB_OUTPKTS),
SNMP_MIB_SENTINEL
};
@@ -298,6 +299,11 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS),
SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE),
SNMP_MIB_ITEM("TCPPLBRehash", LINUX_MIB_TCPPLBREHASH),
+ SNMP_MIB_ITEM("TCPAORequired", LINUX_MIB_TCPAOREQUIRED),
+ SNMP_MIB_ITEM("TCPAOBad", LINUX_MIB_TCPAOBAD),
+ SNMP_MIB_ITEM("TCPAOKeyNotFound", LINUX_MIB_TCPAOKEYNOTFOUND),
+ SNMP_MIB_ITEM("TCPAOGood", LINUX_MIB_TCPAOGOOD),
+ SNMP_MIB_ITEM("TCPAODroppedIcmps", LINUX_MIB_TCPAODROPPEDICMPS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 4b5db5d1edc2..aea89326c697 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -239,7 +239,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
if (code > NR_ICMP_UNREACH)
break;
if (code == ICMP_FRAG_NEEDED) {
- harderr = inet->pmtudisc != IP_PMTUDISC_DONT;
+ harderr = READ_ONCE(inet->pmtudisc) != IP_PMTUDISC_DONT;
err = EMSGSIZE;
} else {
err = icmp_err_convert[code].errno;
@@ -292,7 +292,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
/* Charge it to the socket. */
- ipv4_pktinfo_prepare(sk, skb);
+ ipv4_pktinfo_prepare(sk, skb, true);
if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
kfree_skb_reason(skb, reason);
return NET_RX_DROP;
@@ -482,7 +482,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int free = 0;
__be32 daddr;
__be32 saddr;
- int err;
+ int uc_index, err;
struct ip_options_data opt_copy;
struct raw_frag_vec rfv;
int hdrincl;
@@ -576,24 +576,25 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
tos = get_rttos(&ipc, inet);
scope = ip_sendmsg_scope(inet, &ipc, msg);
+ uc_index = READ_ONCE(inet->uc_index);
if (ipv4_is_multicast(daddr)) {
if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
- ipc.oif = inet->mc_index;
+ ipc.oif = READ_ONCE(inet->mc_index);
if (!saddr)
- saddr = inet->mc_addr;
+ saddr = READ_ONCE(inet->mc_addr);
} else if (!ipc.oif) {
- ipc.oif = inet->uc_index;
- } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
+ ipc.oif = uc_index;
+ } else if (ipv4_is_lbcast(daddr) && uc_index) {
/* oif is set, packet is to local broadcast
* and uc_index is set. oif is most likely set
* by sk_bound_dev_if. If uc_index != oif check if the
* oif is an L3 master and uc_index is an L3 slave.
* If so, we want to allow the send using the uc_index.
*/
- if (ipc.oif != inet->uc_index &&
+ if (ipc.oif != uc_index &&
ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
- inet->uc_index)) {
- ipc.oif = inet->uc_index;
+ uc_index)) {
+ ipc.oif = uc_index;
}
}
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index 63a40e4b678f..fe2140c8375c 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -257,5 +257,6 @@ static void __exit raw_diag_exit(void)
module_init(raw_diag_init);
module_exit(raw_diag_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("RAW socket monitoring via SOCK_DIAG");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */);
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d8c99bdc6170..16615d107cf0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -780,7 +780,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
goto reject_redirect;
}
- n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
+ n = __ipv4_neigh_lookup(rt->dst.dev, (__force u32)new_gw);
if (!n)
n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
if (!IS_ERR(n)) {
@@ -1213,6 +1213,7 @@ EXPORT_INDIRECT_CALLABLE(ipv4_dst_check);
static void ipv4_send_dest_unreach(struct sk_buff *skb)
{
+ struct net_device *dev;
struct ip_options opt;
int res;
@@ -1230,7 +1231,8 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb)
opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
rcu_read_lock();
- res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+ dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev;
+ res = __ip_options_compile(dev_net(dev), &opt, skb, NULL);
rcu_read_unlock();
if (res)
@@ -1630,7 +1632,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
{
struct rtable *rt;
- rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+ rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK,
(noxfrm ? DST_NOXFRM : 0));
if (rt) {
@@ -1658,7 +1660,7 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
{
struct rtable *new_rt;
- new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+ new_rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK,
rt->dst.flags);
if (new_rt) {
@@ -2144,6 +2146,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
+ IPCB(skb)->flags |= IPSKB_MULTIPATH;
}
#endif
@@ -2831,7 +2834,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
struct rtable *ort = (struct rtable *) dst_orig;
struct rtable *rt;
- rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
+ rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0);
if (rt) {
struct dst_entry *new = &rt->dst;
@@ -2882,54 +2885,6 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
}
EXPORT_SYMBOL_GPL(ip_route_output_flow);
-struct rtable *ip_route_output_tunnel(struct sk_buff *skb,
- struct net_device *dev,
- struct net *net, __be32 *saddr,
- const struct ip_tunnel_info *info,
- u8 protocol, bool use_cache)
-{
-#ifdef CONFIG_DST_CACHE
- struct dst_cache *dst_cache;
-#endif
- struct rtable *rt = NULL;
- struct flowi4 fl4;
- __u8 tos;
-
-#ifdef CONFIG_DST_CACHE
- dst_cache = (struct dst_cache *)&info->dst_cache;
- if (use_cache) {
- rt = dst_cache_get_ip4(dst_cache, saddr);
- if (rt)
- return rt;
- }
-#endif
- memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_mark = skb->mark;
- fl4.flowi4_proto = protocol;
- fl4.daddr = info->key.u.ipv4.dst;
- fl4.saddr = info->key.u.ipv4.src;
- tos = info->key.tos;
- fl4.flowi4_tos = RT_TOS(tos);
-
- rt = ip_route_output_key(net, &fl4);
- if (IS_ERR(rt)) {
- netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
- return ERR_PTR(-ENETUNREACH);
- }
- if (rt->dst.dev == dev) { /* is this necessary? */
- netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr);
- ip_rt_put(rt);
- return ERR_PTR(-ELOOP);
- }
-#ifdef CONFIG_DST_CACHE
- if (use_cache)
- dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
-#endif
- *saddr = fl4.saddr;
- return rt;
-}
-EXPORT_SYMBOL_GPL(ip_route_output_tunnel);
-
/* called with rcu_read_lock held */
static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
struct rtable *rt, u32 table_id, struct flowi4 *fl4,
@@ -3414,6 +3369,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fa->fa_type == fri.type) {
fri.offload = READ_ONCE(fa->offload);
fri.trap = READ_ONCE(fa->trap);
+ fri.offload_failed =
+ READ_ONCE(fa->offload_failed);
break;
}
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index dc478a0574cb..61f1c96cfe63 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -41,7 +41,6 @@ static siphash_aligned_key_t syncookie_secret[2];
* requested/supported by the syn/synack exchange.
*/
#define TSBITS 6
-#define TSMASK (((__u32)1 << TSBITS) - 1)
static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
u32 count, int c)
@@ -52,6 +51,14 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
count, &syncookie_secret[c]);
}
+/* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */
+static u64 tcp_ns_to_ts(bool usec_ts, u64 val)
+{
+ if (usec_ts)
+ return div_u64(val, NSEC_PER_USEC);
+
+ return div_u64(val, NSEC_PER_MSEC);
+}
/*
* when syncookies are in effect and tcp timestamps are enabled we encode
@@ -62,27 +69,24 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
*/
u64 cookie_init_timestamp(struct request_sock *req, u64 now)
{
- struct inet_request_sock *ireq;
- u32 ts, ts_now = tcp_ns_to_ts(now);
+ const struct inet_request_sock *ireq = inet_rsk(req);
+ u64 ts, ts_now = tcp_ns_to_ts(false, now);
u32 options = 0;
- ireq = inet_rsk(req);
-
options = ireq->wscale_ok ? ireq->snd_wscale : TS_OPT_WSCALE_MASK;
if (ireq->sack_ok)
options |= TS_OPT_SACK;
if (ireq->ecn_ok)
options |= TS_OPT_ECN;
- ts = ts_now & ~TSMASK;
+ ts = (ts_now >> TSBITS) << TSBITS;
ts |= options;
- if (ts > ts_now) {
- ts >>= TSBITS;
- ts--;
- ts <<= TSBITS;
- ts |= options;
- }
- return (u64)ts * (NSEC_PER_SEC / TCP_TS_HZ);
+ if (ts > ts_now)
+ ts -= (1UL << TSBITS);
+
+ if (tcp_rsk(req)->req_usec_ts)
+ return ts * NSEC_PER_USEC;
+ return ts * NSEC_PER_MSEC;
}
@@ -185,12 +189,14 @@ __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp)
* Check if a ack sequence number is a valid syncookie.
* Return the decoded mss if it is, or 0 if not.
*/
-int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
- u32 cookie)
+int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th)
{
+ __u32 cookie = ntohl(th->ack_seq) - 1;
__u32 seq = ntohl(th->seq) - 1;
- __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
- th->source, th->dest, seq);
+ __u32 mssind;
+
+ mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
+ th->source, th->dest, seq);
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
@@ -198,7 +204,7 @@ EXPORT_SYMBOL_GPL(__cookie_v4_check);
struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct dst_entry *dst, u32 tsoff)
+ struct dst_entry *dst)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct sock *child;
@@ -208,7 +214,6 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
NULL, &own_req);
if (child) {
refcount_set(&req->rsk_refcnt, 1);
- tcp_sk(child)->tsoffset = tsoff;
sock_rps_save_rxhash(child, skb);
if (rsk_drop_req(req)) {
@@ -265,26 +270,46 @@ bool cookie_timestamp_decode(const struct net *net,
}
EXPORT_SYMBOL(cookie_timestamp_decode);
-bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
- const struct net *net, const struct dst_entry *dst)
+static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req)
{
- bool ecn_ok = tcp_opt->rcv_tsecr & TS_OPT_ECN;
+ struct inet_request_sock *ireq = inet_rsk(req);
+ struct tcp_request_sock *treq = tcp_rsk(req);
+ const struct tcphdr *th = tcp_hdr(skb);
- if (!ecn_ok)
- return false;
+ req->num_retrans = 0;
- if (READ_ONCE(net->ipv4.sysctl_tcp_ecn))
- return true;
+ ireq->ir_num = ntohs(th->dest);
+ ireq->ir_rmt_port = th->source;
+ ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
+ ireq->ir_mark = inet_request_mark(sk, skb);
+
+ if (IS_ENABLED(CONFIG_SMC))
+ ireq->smc_ok = 0;
+
+ treq->snt_synack = 0;
+ treq->tfo_listener = false;
+ treq->txhash = net_tx_rndhash();
+ treq->rcv_isn = ntohl(th->seq) - 1;
+ treq->snt_isn = ntohl(th->ack_seq) - 1;
+ treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
+ treq->req_usec_ts = false;
+
+#if IS_ENABLED(CONFIG_MPTCP)
+ treq->is_mptcp = sk_is_mptcp(sk);
+ if (treq->is_mptcp)
+ return mptcp_subflow_init_cookie_req(req, sk, skb);
+#endif
- return dst_feature(dst, RTAX_FEATURE_ECN);
+ return 0;
}
-EXPORT_SYMBOL(cookie_ecn_ok);
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
- const struct tcp_request_sock_ops *af_ops,
- struct sock *sk,
- struct sk_buff *skb)
+ struct sock *sk, struct sk_buff *skb,
+ struct tcp_options_received *tcp_opt,
+ int mss, u32 tsoff)
{
+ struct inet_request_sock *ireq;
struct tcp_request_sock *treq;
struct request_sock *req;
@@ -296,120 +321,109 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
if (!req)
return NULL;
+ if (cookie_tcp_reqsk_init(sk, skb, req)) {
+ reqsk_free(req);
+ return NULL;
+ }
+
+ ireq = inet_rsk(req);
treq = tcp_rsk(req);
- /* treq->af_specific might be used to perform TCP_MD5 lookup */
- treq->af_specific = af_ops;
+ req->mss = mss;
+ req->ts_recent = tcp_opt->saw_tstamp ? tcp_opt->rcv_tsval : 0;
- treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
-#if IS_ENABLED(CONFIG_MPTCP)
- treq->is_mptcp = sk_is_mptcp(sk);
- if (treq->is_mptcp) {
- int err = mptcp_subflow_init_cookie_req(req, sk, skb);
+ ireq->snd_wscale = tcp_opt->snd_wscale;
+ ireq->tstamp_ok = tcp_opt->saw_tstamp;
+ ireq->sack_ok = tcp_opt->sack_ok;
+ ireq->wscale_ok = tcp_opt->wscale_ok;
+ ireq->ecn_ok = !!(tcp_opt->rcv_tsecr & TS_OPT_ECN);
- if (err) {
- reqsk_free(req);
- return NULL;
- }
- }
-#endif
+ treq->ts_off = tsoff;
return req;
}
EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc);
-/* On input, sk is a listener.
- * Output is listener if incoming packet would not create a child
- * NULL if memory could not be allocated.
- */
-struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
+static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
- struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
struct tcp_options_received tcp_opt;
- struct inet_request_sock *ireq;
- struct tcp_request_sock *treq;
- struct tcp_sock *tp = tcp_sk(sk);
- const struct tcphdr *th = tcp_hdr(skb);
- __u32 cookie = ntohl(th->ack_seq) - 1;
- struct sock *ret = sk;
- struct request_sock *req;
- int full_space, mss;
- struct rtable *rt;
- __u8 rcv_wscale;
- struct flowi4 fl4;
u32 tsoff = 0;
-
- if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
- !th->ack || th->rst)
- goto out;
+ int mss;
if (tcp_synq_no_recent_overflow(sk))
goto out;
- mss = __cookie_v4_check(ip_hdr(skb), th, cookie);
- if (mss == 0) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
+ mss = __cookie_v4_check(ip_hdr(skb), tcp_hdr(skb));
+ if (!mss) {
+ __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
+ __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV);
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+ tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
- tsoff = secure_tcp_ts_off(sock_net(sk),
+ tsoff = secure_tcp_ts_off(net,
ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr);
tcp_opt.rcv_tsecr -= tsoff;
}
- if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
+ if (!cookie_timestamp_decode(net, &tcp_opt))
goto out;
- ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops,
- &tcp_request_sock_ipv4_ops, sk, skb);
- if (!req)
+ return cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb,
+ &tcp_opt, mss, tsoff);
+out:
+ return ERR_PTR(-EINVAL);
+}
+
+/* On input, sk is a listener.
+ * Output is listener if incoming packet would not create a child
+ * NULL if memory could not be allocated.
+ */
+struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
+{
+ struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_request_sock *ireq;
+ struct net *net = sock_net(sk);
+ struct request_sock *req;
+ struct sock *ret = sk;
+ struct flowi4 fl4;
+ struct rtable *rt;
+ __u8 rcv_wscale;
+ int full_space;
+
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
+ goto out;
+
+ req = cookie_tcp_check(net, sk, skb);
+ if (IS_ERR(req))
goto out;
+ if (!req)
+ goto out_drop;
ireq = inet_rsk(req);
- treq = tcp_rsk(req);
- treq->rcv_isn = ntohl(th->seq) - 1;
- treq->snt_isn = cookie;
- treq->ts_off = 0;
- treq->txhash = net_tx_rndhash();
- req->mss = mss;
- ireq->ir_num = ntohs(th->dest);
- ireq->ir_rmt_port = th->source;
+
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
- ireq->ir_mark = inet_request_mark(sk, skb);
- ireq->snd_wscale = tcp_opt.snd_wscale;
- ireq->sack_ok = tcp_opt.sack_ok;
- ireq->wscale_ok = tcp_opt.wscale_ok;
- ireq->tstamp_ok = tcp_opt.saw_tstamp;
- req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
- treq->snt_synack = 0;
- treq->tfo_listener = false;
-
- if (IS_ENABLED(CONFIG_SMC))
- ireq->smc_ok = 0;
-
- ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
*/
- RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb));
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
- if (security_inet_conn_request(sk, skb, req)) {
- reqsk_free(req);
- goto out;
- }
+ if (security_inet_conn_request(sk, skb, req))
+ goto out_free;
- req->num_retrans = 0;
+ tcp_ao_syncookie(sk, skb, req, AF_INET);
/*
* We need to lookup the route here to get at the correct
@@ -423,11 +437,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
opt->srr ? opt->faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
security_req_classify_flow(req, flowi4_to_flowi_common(&fl4));
- rt = ip_route_output_key(sock_net(sk), &fl4);
- if (IS_ERR(rt)) {
- reqsk_free(req);
- goto out;
- }
+ rt = ip_route_output_key(net, &fl4);
+ if (IS_ERR(rt))
+ goto out_free;
/* Try to redo what tcp_v4_send_synack did. */
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
@@ -443,13 +455,18 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
dst_metric(&rt->dst, RTAX_INITRWND));
ireq->rcv_wscale = rcv_wscale;
- ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst);
+ ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst);
- ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff);
+ ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst);
/* ip_queue_xmit() depends on our flow being setup
* Normal sockets get it right from inet_csk_route_child_sock()
*/
if (ret)
inet_sk(ret)->cork.fl.u.ip4 = fl4;
-out: return ret;
+out:
+ return ret;
+out_free:
+ reqsk_free(req);
+out_drop:
+ return NULL;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 6ac890b4073f..7e4f16a7dcc1 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -50,26 +50,22 @@ static int tcp_plb_max_cong_thresh = 256;
static int sysctl_tcp_low_latency __read_mostly;
/* Update system visible IP port range */
-static void set_local_port_range(struct net *net, int range[2])
+static void set_local_port_range(struct net *net, unsigned int low, unsigned int high)
{
- bool same_parity = !((range[0] ^ range[1]) & 1);
+ bool same_parity = !((low ^ high) & 1);
- write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
if (same_parity && !net->ipv4.ip_local_ports.warned) {
net->ipv4.ip_local_ports.warned = true;
pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
}
- net->ipv4.ip_local_ports.range[0] = range[0];
- net->ipv4.ip_local_ports.range[1] = range[1];
- write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
+ WRITE_ONCE(net->ipv4.ip_local_ports.range, high << 16 | low);
}
/* Validate changes from /proc interface. */
static int ipv4_local_port_range(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net =
- container_of(table->data, struct net, ipv4.ip_local_ports.range);
+ struct net *net = table->data;
int ret;
int range[2];
struct ctl_table tmp = {
@@ -93,7 +89,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
(range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock)))
ret = -EINVAL;
else
- set_local_port_range(net, range);
+ set_local_port_range(net, range[0], range[1]);
}
return ret;
@@ -733,8 +729,8 @@ static struct ctl_table ipv4_net_table[] = {
},
{
.procname = "ip_local_port_range",
- .maxlen = sizeof(init_net.ipv4.ip_local_ports.range),
- .data = &init_net.ipv4.ip_local_ports.range,
+ .maxlen = 0,
+ .data = &init_net,
.mode = 0644,
.proc_handler = ipv4_local_port_range,
},
@@ -1367,6 +1363,15 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ZERO,
},
{
+ .procname = "tcp_backlog_ack_defer",
+ .data = &init_net.ipv4.sysctl_tcp_backlog_ack_defer,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
.procname = "tcp_reflect_tos",
.data = &init_net.ipv4.sysctl_tcp_reflect_tos,
.maxlen = sizeof(u8),
@@ -1489,6 +1494,14 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+ {
+ .procname = "tcp_pingpong_thresh",
+ .data = &init_net.ipv4.sysctl_tcp_pingpong_thresh,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b1559481898d..7e2481b9eae1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -722,6 +722,7 @@ void tcp_push(struct sock *sk, int flags, int mss_now,
if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
+ smp_mb__after_atomic();
}
/* It is possible TX completion already happened
* before we set TSQ_THROTTLED.
@@ -831,7 +832,9 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
*/
if (!skb_queue_empty(&sk->sk_receive_queue))
break;
- sk_wait_data(sk, &timeo, NULL);
+ ret = sk_wait_data(sk, &timeo, NULL);
+ if (ret < 0)
+ break;
if (signal_pending(current)) {
ret = sock_intr_errno(timeo);
break;
@@ -925,10 +928,11 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
return mss_now;
}
-/* In some cases, both sendmsg() could have added an skb to the write queue,
- * but failed adding payload on it. We need to remove it to consume less
+/* In some cases, sendmsg() could have added an skb to the write queue,
+ * but failed adding payload on it. We need to remove it to consume less
* memory, but more importantly be able to generate EPOLLOUT for Edge Trigger
- * epoll() users.
+ * epoll() users. Another reason is that tcp_write_xmit() does not like
+ * finding an empty skb in the write queue.
*/
void tcp_remove_empty_skb(struct sock *sk)
{
@@ -1287,6 +1291,7 @@ new_segment:
wait_for_space:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ tcp_remove_empty_skb(sk);
if (copied)
tcp_push(sk, flags & ~MSG_MORE, mss_now,
TCP_NAGLE_PUSH, size_goal);
@@ -1621,16 +1626,13 @@ EXPORT_SYMBOL(tcp_read_sock);
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
- struct tcp_sock *tp = tcp_sk(sk);
- u32 seq = tp->copied_seq;
struct sk_buff *skb;
int copied = 0;
- u32 offset;
if (sk->sk_state == TCP_LISTEN)
return -ENOTCONN;
- while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+ while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
u8 tcp_flags;
int used;
@@ -1643,13 +1645,10 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
copied = used;
break;
}
- seq += used;
copied += used;
- if (tcp_flags & TCPHDR_FIN) {
- ++seq;
+ if (tcp_flags & TCPHDR_FIN)
break;
- }
}
return copied;
}
@@ -1787,7 +1786,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
static bool can_map_frag(const skb_frag_t *frag)
{
- return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag);
+ struct page *page;
+
+ if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag))
+ return false;
+
+ page = skb_frag_page(frag);
+
+ if (PageCompound(page) || page->mapping)
+ return false;
+
+ return true;
}
static int find_next_mappable_frag(const skb_frag_t *frag,
@@ -1851,7 +1860,6 @@ static int receive_fallback_to_copy(struct sock *sk,
{
unsigned long copy_address = (unsigned long)zc->copybuf_address;
struct msghdr msg = {};
- struct iovec iov;
int err;
zc->length = 0;
@@ -1860,8 +1868,8 @@ static int receive_fallback_to_copy(struct sock *sk,
if (copy_address != zc->copybuf_address)
return -EINVAL;
- err = import_single_range(ITER_DEST, (void __user *)copy_address,
- inq, &iov, &msg.msg_iter);
+ err = import_ubuf(ITER_DEST, (void __user *)copy_address, inq,
+ &msg.msg_iter);
if (err)
return err;
@@ -1888,14 +1896,13 @@ static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc,
{
unsigned long copy_address = (unsigned long)zc->copybuf_address;
struct msghdr msg = {};
- struct iovec iov;
int err;
if (copy_address != zc->copybuf_address)
return -EINVAL;
- err = import_single_range(ITER_DEST, (void __user *)copy_address,
- copylen, &iov, &msg.msg_iter);
+ err = import_ubuf(ITER_DEST, (void __user *)copy_address, copylen,
+ &msg.msg_iter);
if (err)
return err;
err = skb_copy_datagram_msg(skb, *offset, &msg, copylen);
@@ -2256,14 +2263,14 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
}
}
- if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
+ if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE)
has_timestamping = true;
else
tss->ts[0] = (struct timespec64) {0};
}
if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
- if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
+ if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE)
has_timestamping = true;
else
tss->ts[2] = (struct timespec64) {0};
@@ -2448,7 +2455,11 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
__sk_flush_backlog(sk);
} else {
tcp_cleanup_rbuf(sk, copied);
- sk_wait_data(sk, &timeo, last);
+ err = sk_wait_data(sk, &timeo, last);
+ if (err < 0) {
+ err = copied ? : err;
+ goto out;
+ }
}
if ((flags & MSG_PEEK) &&
@@ -2603,6 +2614,7 @@ void tcp_set_state(struct sock *sk, int state)
BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN);
BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING);
BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV);
+ BUILD_BUG_ON((int)BPF_TCP_BOUND_INACTIVE != (int)TCP_BOUND_INACTIVE);
BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES);
/* bpf uapi header bpf.h defines an anonymous enum with values
@@ -2972,12 +2984,6 @@ int tcp_disconnect(struct sock *sk, int flags)
int old_state = sk->sk_state;
u32 seq;
- /* Deny disconnect if other threads are blocked in sk_wait_event()
- * or inet_wait_for_connect().
- */
- if (sk->sk_wait_pending)
- return -EBUSY;
-
if (old_state != TCP_CLOSE)
tcp_set_state(sk, TCP_CLOSE);
@@ -3372,9 +3378,25 @@ int tcp_set_window_clamp(struct sock *sk, int val)
return -EINVAL;
tp->window_clamp = 0;
} else {
- tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
- SOCK_MIN_RCVBUF / 2 : val;
- tp->rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp);
+ u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp;
+ u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
+ SOCK_MIN_RCVBUF / 2 : val;
+
+ if (new_window_clamp == old_window_clamp)
+ return 0;
+
+ tp->window_clamp = new_window_clamp;
+ if (new_window_clamp < old_window_clamp) {
+ /* need to apply the reserved mem provisioning only
+ * when shrinking the window clamp
+ */
+ __tcp_adjust_rcv_ssthresh(sk, tp->window_clamp);
+
+ } else {
+ new_rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp);
+ tp->rcv_ssthresh = max(new_rcv_ssthresh,
+ tp->rcv_ssthresh);
+ }
}
return 0;
}
@@ -3597,6 +3619,35 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
__tcp_sock_set_quickack(sk, val);
break;
+ case TCP_AO_REPAIR:
+ if (!tcp_can_repair_sock(sk)) {
+ err = -EPERM;
+ break;
+ }
+ err = tcp_ao_set_repair(sk, optval, optlen);
+ break;
+#ifdef CONFIG_TCP_AO
+ case TCP_AO_ADD_KEY:
+ case TCP_AO_DEL_KEY:
+ case TCP_AO_INFO: {
+ /* If this is the first TCP-AO setsockopt() on the socket,
+ * sk_state has to be LISTEN or CLOSE. Allow TCP_REPAIR
+ * in any state.
+ */
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ goto ao_parse;
+ if (rcu_dereference_protected(tcp_sk(sk)->ao_info,
+ lockdep_sock_is_held(sk)))
+ goto ao_parse;
+ if (tp->repair)
+ goto ao_parse;
+ err = -EISCONN;
+ break;
+ao_parse:
+ err = tp->af_specific->ao_parse(sk, optname, optval, optlen);
+ break;
+ }
+#endif
#ifdef CONFIG_TCP_MD5SIG
case TCP_MD5SIG:
case TCP_MD5SIG_EXT:
@@ -3635,10 +3686,16 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
tp->fastopen_no_cookie = val;
break;
case TCP_TIMESTAMP:
- if (!tp->repair)
+ if (!tp->repair) {
err = -EPERM;
- else
- WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw());
+ break;
+ }
+ /* val is an opaque field,
+ * and low order bit contains usec_ts enable bit.
+ * Its a best effort, and we do not care if user makes an error.
+ */
+ tp->tcp_usec_ts = val & 1;
+ WRITE_ONCE(tp->tsoffset, val - tcp_clock_ts(tp->tcp_usec_ts));
break;
case TCP_REPAIR_WINDOW:
err = tcp_repair_set_window(tp, optval, optlen);
@@ -3760,9 +3817,12 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_options |= TCPI_OPT_ECN_SEEN;
if (tp->syn_data_acked)
info->tcpi_options |= TCPI_OPT_SYN_DATA;
+ if (tp->tcp_usec_ts)
+ info->tcpi_options |= TCPI_OPT_USEC_TS;
info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
- info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
+ info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato,
+ tcp_delack_max(sk)));
info->tcpi_snd_mss = tp->mss_cache;
info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
@@ -3818,6 +3878,13 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_rcv_wnd = tp->rcv_wnd;
info->tcpi_rehash = tp->plb_rehash + tp->timeout_rehash;
info->tcpi_fastopen_client_fail = tp->fastopen_client_fail;
+
+ info->tcpi_total_rto = tp->total_rto;
+ info->tcpi_total_rto_recoveries = tp->total_rto_recoveries;
+ info->tcpi_total_rto_time = tp->total_rto_time;
+ if (tp->rto_stamp)
+ info->tcpi_total_rto_time += tcp_clock_ms() - tp->rto_stamp;
+
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -4141,7 +4208,11 @@ int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_TIMESTAMP:
- val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset);
+ val = tcp_clock_ts(tp->tcp_usec_ts) + READ_ONCE(tp->tsoffset);
+ if (tp->tcp_usec_ts)
+ val |= 1;
+ else
+ val &= ~1;
break;
case TCP_NOTSENT_LOWAT:
val = READ_ONCE(tp->notsent_lowat);
@@ -4251,6 +4322,23 @@ zerocopy_rcv_out:
return err;
}
#endif
+ case TCP_AO_REPAIR:
+ if (!tcp_can_repair_sock(sk))
+ return -EPERM;
+ return tcp_ao_get_repair(sk, optval, optlen);
+ case TCP_AO_GET_KEYS:
+ case TCP_AO_INFO: {
+ int err;
+
+ sockopt_lock_sock(sk);
+ if (optname == TCP_AO_GET_KEYS)
+ err = tcp_ao_get_mkts(sk, optval, optlen);
+ else
+ err = tcp_ao_get_sock_info(sk, optval, optlen);
+ sockopt_release_sock(sk);
+
+ return err;
+ }
default:
return -ENOPROTOOPT;
}
@@ -4289,141 +4377,52 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
EXPORT_SYMBOL(tcp_getsockopt);
#ifdef CONFIG_TCP_MD5SIG
-static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
-static DEFINE_MUTEX(tcp_md5sig_mutex);
-static bool tcp_md5sig_pool_populated = false;
+int tcp_md5_sigpool_id = -1;
+EXPORT_SYMBOL_GPL(tcp_md5_sigpool_id);
-static void __tcp_alloc_md5sig_pool(void)
+int tcp_md5_alloc_sigpool(void)
{
- struct crypto_ahash *hash;
- int cpu;
-
- hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(hash))
- return;
-
- for_each_possible_cpu(cpu) {
- void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
- struct ahash_request *req;
-
- if (!scratch) {
- scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
- sizeof(struct tcphdr),
- GFP_KERNEL,
- cpu_to_node(cpu));
- if (!scratch)
- return;
- per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
- }
- if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
- continue;
-
- req = ahash_request_alloc(hash, GFP_KERNEL);
- if (!req)
- return;
-
- ahash_request_set_callback(req, 0, NULL, NULL);
-
- per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
- }
- /* before setting tcp_md5sig_pool_populated, we must commit all writes
- * to memory. See smp_rmb() in tcp_get_md5sig_pool()
- */
- smp_wmb();
- /* Paired with READ_ONCE() from tcp_alloc_md5sig_pool()
- * and tcp_get_md5sig_pool().
- */
- WRITE_ONCE(tcp_md5sig_pool_populated, true);
-}
-
-bool tcp_alloc_md5sig_pool(void)
-{
- /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
- if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) {
- mutex_lock(&tcp_md5sig_mutex);
-
- if (!tcp_md5sig_pool_populated)
- __tcp_alloc_md5sig_pool();
+ size_t scratch_size;
+ int ret;
- mutex_unlock(&tcp_md5sig_mutex);
+ scratch_size = sizeof(union tcp_md5sum_block) + sizeof(struct tcphdr);
+ ret = tcp_sigpool_alloc_ahash("md5", scratch_size);
+ if (ret >= 0) {
+ /* As long as any md5 sigpool was allocated, the return
+ * id would stay the same. Re-write the id only for the case
+ * when previously all MD5 keys were deleted and this call
+ * allocates the first MD5 key, which may return a different
+ * sigpool id than was used previously.
+ */
+ WRITE_ONCE(tcp_md5_sigpool_id, ret); /* Avoids the compiler potentially being smart here */
+ return 0;
}
- /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
- return READ_ONCE(tcp_md5sig_pool_populated);
+ return ret;
}
-EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
-
-/**
- * tcp_get_md5sig_pool - get md5sig_pool for this user
- *
- * We use percpu structure, so if we succeed, we exit with preemption
- * and BH disabled, to make sure another thread or softirq handling
- * wont try to get same context.
- */
-struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
+void tcp_md5_release_sigpool(void)
{
- local_bh_disable();
-
- /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
- if (READ_ONCE(tcp_md5sig_pool_populated)) {
- /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
- smp_rmb();
- return this_cpu_ptr(&tcp_md5sig_pool);
- }
- local_bh_enable();
- return NULL;
+ tcp_sigpool_release(READ_ONCE(tcp_md5_sigpool_id));
}
-EXPORT_SYMBOL(tcp_get_md5sig_pool);
-int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
- const struct sk_buff *skb, unsigned int header_len)
+void tcp_md5_add_sigpool(void)
{
- struct scatterlist sg;
- const struct tcphdr *tp = tcp_hdr(skb);
- struct ahash_request *req = hp->md5_req;
- unsigned int i;
- const unsigned int head_data_len = skb_headlen(skb) > header_len ?
- skb_headlen(skb) - header_len : 0;
- const struct skb_shared_info *shi = skb_shinfo(skb);
- struct sk_buff *frag_iter;
-
- sg_init_table(&sg, 1);
-
- sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
- ahash_request_set_crypt(req, &sg, NULL, head_data_len);
- if (crypto_ahash_update(req))
- return 1;
-
- for (i = 0; i < shi->nr_frags; ++i) {
- const skb_frag_t *f = &shi->frags[i];
- unsigned int offset = skb_frag_off(f);
- struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
-
- sg_set_page(&sg, page, skb_frag_size(f),
- offset_in_page(offset));
- ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
- if (crypto_ahash_update(req))
- return 1;
- }
-
- skb_walk_frags(skb, frag_iter)
- if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
- return 1;
-
- return 0;
+ tcp_sigpool_get(READ_ONCE(tcp_md5_sigpool_id));
}
-EXPORT_SYMBOL(tcp_md5_hash_skb_data);
-int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
+int tcp_md5_hash_key(struct tcp_sigpool *hp,
+ const struct tcp_md5sig_key *key)
{
u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */
struct scatterlist sg;
sg_init_one(&sg, key->key, keylen);
- ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen);
+ ahash_request_set_crypt(hp->req, &sg, NULL, keylen);
- /* We use data_race() because tcp_md5_do_add() might change key->key under us */
- return data_race(crypto_ahash_update(hp->md5_req));
+ /* We use data_race() because tcp_md5_do_add() might change
+ * key->key under us
+ */
+ return data_race(crypto_ahash_update(hp->req));
}
EXPORT_SYMBOL(tcp_md5_hash_key);
@@ -4431,42 +4430,24 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
enum skb_drop_reason
tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
const void *saddr, const void *daddr,
- int family, int dif, int sdif)
+ int family, int l3index, const __u8 *hash_location)
{
- /*
- * This gets called for each TCP segment that arrives
- * so we want to be efficient.
+ /* This gets called for each TCP segment that has TCP-MD5 option.
* We have 3 drop cases:
* o No MD5 hash and one expected.
* o MD5 hash and we're not expecting one.
* o MD5 hash and its wrong.
*/
- const __u8 *hash_location = NULL;
- struct tcp_md5sig_key *hash_expected;
- const struct tcphdr *th = tcp_hdr(skb);
const struct tcp_sock *tp = tcp_sk(sk);
- int genhash, l3index;
+ struct tcp_md5sig_key *key;
u8 newhash[16];
+ int genhash;
- /* sdif set, means packet ingressed via a device
- * in an L3 domain and dif is set to the l3mdev
- */
- l3index = sdif ? dif : 0;
-
- hash_expected = tcp_md5_do_lookup(sk, l3index, saddr, family);
- hash_location = tcp_parse_md5sig_option(th);
-
- /* We've parsed the options - do we have a hash? */
- if (!hash_expected && !hash_location)
- return SKB_NOT_DROPPED_YET;
+ key = tcp_md5_do_lookup(sk, l3index, saddr, family);
- if (hash_expected && !hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
- return SKB_DROP_REASON_TCP_MD5NOTFOUND;
- }
-
- if (!hash_expected && hash_location) {
+ if (!key && hash_location) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
+ tcp_hash_fail("Unexpected MD5 Hash found", family, skb, "");
return SKB_DROP_REASON_TCP_MD5UNEXPECTED;
}
@@ -4475,27 +4456,26 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
* IPv4-mapped case.
*/
if (family == AF_INET)
- genhash = tcp_v4_md5_hash_skb(newhash,
- hash_expected,
- NULL, skb);
+ genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
else
- genhash = tp->af_specific->calc_md5_hash(newhash,
- hash_expected,
+ genhash = tp->af_specific->calc_md5_hash(newhash, key,
NULL, skb);
-
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
if (family == AF_INET) {
- net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n",
- saddr, ntohs(th->source),
- daddr, ntohs(th->dest),
- genhash ? " tcp_v4_calc_md5_hash failed"
- : "", l3index);
+ tcp_hash_fail("MD5 Hash failed", AF_INET, skb, "%s L3 index %d",
+ genhash ? "tcp_v4_calc_md5_hash failed"
+ : "", l3index);
} else {
- net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
- genhash ? "failed" : "mismatch",
- saddr, ntohs(th->source),
- daddr, ntohs(th->dest), l3index);
+ if (genhash) {
+ tcp_hash_fail("MD5 Hash failed",
+ AF_INET6, skb, "L3 index %d",
+ l3index);
+ } else {
+ tcp_hash_fail("MD5 Hash mismatch",
+ AF_INET6, skb, "L3 index %d",
+ l3index);
+ }
}
return SKB_DROP_REASON_TCP_MD5FAILURE;
}
@@ -4615,6 +4595,97 @@ static void __init tcp_init_mem(void)
sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; /* 9.37 % */
}
+static void __init tcp_struct_check(void)
+{
+ /* TX read-mostly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, max_window);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, rcv_ssthresh);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, reordering);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, lost_skb_hint);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 40);
+
+ /* TXRX read-mostly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, snd_wnd);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, mss_cache);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, snd_cwnd);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, prr_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 31);
+
+ /* RX read-mostly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rcv_tstamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_wl1);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tlp_high_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rttvar_us);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, retrans_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, advmss);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, urg_data);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, lost);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rtt_min);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, out_of_order_queue);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_ssthresh);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 69);
+
+ /* TX read-write hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, segs_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, data_segs_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, bytes_sent);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, snd_sml);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, chrono_start);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, chrono_stat);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, write_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, pushed_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, lsndtime);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, mdev_us);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_wstamp_ns);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_clock_cache);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_mstamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, rtt_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 113);
+
+ /* TXRX read-write hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_nxt);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_nxt);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_una);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, window_clamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, srtt_us);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, packets_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_up);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered_ce);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 76);
+
+ /* RX read-write hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, segs_in);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, data_segs_in);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_wup);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, max_packets_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, cwnd_usage_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_delivered);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_interval_us);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_last_tsecr);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, first_tx_mstamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_mstamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_est);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcvq_space);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 99);
+}
+
void __init tcp_init(void)
{
int max_rshare, max_wshare, cnt;
@@ -4625,6 +4696,8 @@ void __init tcp_init(void)
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
sizeof_field(struct sk_buff, cb));
+ tcp_struct_check();
+
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE);
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
new file mode 100644
index 000000000000..87db432c6bb4
--- /dev/null
+++ b/net/ipv4/tcp_ao.c
@@ -0,0 +1,2408 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * INET An implementation of the TCP Authentication Option (TCP-AO).
+ * See RFC5925.
+ *
+ * Authors: Dmitry Safonov <dima@arista.com>
+ * Francesco Ruggeri <fruggeri@arista.com>
+ * Salam Noureddine <noureddine@arista.com>
+ */
+#define pr_fmt(fmt) "TCP: " fmt
+
+#include <crypto/hash.h>
+#include <linux/inetdevice.h>
+#include <linux/tcp.h>
+
+#include <net/tcp.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
+
+DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_ao_needed, HZ);
+
+int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
+ unsigned int len, struct tcp_sigpool *hp)
+{
+ struct scatterlist sg;
+ int ret;
+
+ if (crypto_ahash_setkey(crypto_ahash_reqtfm(hp->req),
+ mkt->key, mkt->keylen))
+ goto clear_hash;
+
+ ret = crypto_ahash_init(hp->req);
+ if (ret)
+ goto clear_hash;
+
+ sg_init_one(&sg, ctx, len);
+ ahash_request_set_crypt(hp->req, &sg, key, len);
+ crypto_ahash_update(hp->req);
+
+ ret = crypto_ahash_final(hp->req);
+ if (ret)
+ goto clear_hash;
+
+ return 0;
+clear_hash:
+ memset(key, 0, tcp_ao_digest_size(mkt));
+ return 1;
+}
+
+bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code)
+{
+ bool ignore_icmp = false;
+ struct tcp_ao_info *ao;
+
+ if (!static_branch_unlikely(&tcp_ao_needed.key))
+ return false;
+
+ /* RFC5925, 7.8:
+ * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4
+ * messages of Type 3 (destination unreachable), Codes 2-4 (protocol
+ * unreachable, port unreachable, and fragmentation needed -- ’hard
+ * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
+ * (administratively prohibited) and Code 4 (port unreachable) intended
+ * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
+ * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs.
+ */
+ if (family == AF_INET) {
+ if (type != ICMP_DEST_UNREACH)
+ return false;
+ if (code < ICMP_PROT_UNREACH || code > ICMP_FRAG_NEEDED)
+ return false;
+ } else {
+ if (type != ICMPV6_DEST_UNREACH)
+ return false;
+ if (code != ICMPV6_ADM_PROHIBITED && code != ICMPV6_PORT_UNREACH)
+ return false;
+ }
+
+ rcu_read_lock();
+ switch (sk->sk_state) {
+ case TCP_TIME_WAIT:
+ ao = rcu_dereference(tcp_twsk(sk)->ao_info);
+ break;
+ case TCP_SYN_SENT:
+ case TCP_SYN_RECV:
+ case TCP_LISTEN:
+ case TCP_NEW_SYN_RECV:
+ /* RFC5925 specifies to ignore ICMPs *only* on connections
+ * in synchronized states.
+ */
+ rcu_read_unlock();
+ return false;
+ default:
+ ao = rcu_dereference(tcp_sk(sk)->ao_info);
+ }
+
+ if (ao && !ao->accept_icmps) {
+ ignore_icmp = true;
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAODROPPEDICMPS);
+ atomic64_inc(&ao->counters.dropped_icmp);
+ }
+ rcu_read_unlock();
+
+ return ignore_icmp;
+}
+
+/* Optimized version of tcp_ao_do_lookup(): only for sockets for which
+ * it's known that the keys in ao_info are matching peer's
+ * family/address/VRF/etc.
+ */
+struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao,
+ int sndid, int rcvid)
+{
+ struct tcp_ao_key *key;
+
+ hlist_for_each_entry_rcu(key, &ao->head, node) {
+ if ((sndid >= 0 && key->sndid != sndid) ||
+ (rcvid >= 0 && key->rcvid != rcvid))
+ continue;
+ return key;
+ }
+
+ return NULL;
+}
+
+static int ipv4_prefix_cmp(const struct in_addr *addr1,
+ const struct in_addr *addr2,
+ unsigned int prefixlen)
+{
+ __be32 mask = inet_make_mask(prefixlen);
+ __be32 a1 = addr1->s_addr & mask;
+ __be32 a2 = addr2->s_addr & mask;
+
+ if (a1 == a2)
+ return 0;
+ return memcmp(&a1, &a2, sizeof(a1));
+}
+
+static int __tcp_ao_key_cmp(const struct tcp_ao_key *key, int l3index,
+ const union tcp_ao_addr *addr, u8 prefixlen,
+ int family, int sndid, int rcvid)
+{
+ if (sndid >= 0 && key->sndid != sndid)
+ return (key->sndid > sndid) ? 1 : -1;
+ if (rcvid >= 0 && key->rcvid != rcvid)
+ return (key->rcvid > rcvid) ? 1 : -1;
+ if (l3index >= 0 && (key->keyflags & TCP_AO_KEYF_IFINDEX)) {
+ if (key->l3index != l3index)
+ return (key->l3index > l3index) ? 1 : -1;
+ }
+
+ if (family == AF_UNSPEC)
+ return 0;
+ if (key->family != family)
+ return (key->family > family) ? 1 : -1;
+
+ if (family == AF_INET) {
+ if (ntohl(key->addr.a4.s_addr) == INADDR_ANY)
+ return 0;
+ if (ntohl(addr->a4.s_addr) == INADDR_ANY)
+ return 0;
+ return ipv4_prefix_cmp(&key->addr.a4, &addr->a4, prefixlen);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ if (ipv6_addr_any(&key->addr.a6) || ipv6_addr_any(&addr->a6))
+ return 0;
+ if (ipv6_prefix_equal(&key->addr.a6, &addr->a6, prefixlen))
+ return 0;
+ return memcmp(&key->addr.a6, &addr->a6, sizeof(addr->a6));
+#endif
+ }
+ return -1;
+}
+
+static int tcp_ao_key_cmp(const struct tcp_ao_key *key, int l3index,
+ const union tcp_ao_addr *addr, u8 prefixlen,
+ int family, int sndid, int rcvid)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6 && ipv6_addr_v4mapped(&addr->a6)) {
+ __be32 addr4 = addr->a6.s6_addr32[3];
+
+ return __tcp_ao_key_cmp(key, l3index,
+ (union tcp_ao_addr *)&addr4,
+ prefixlen, AF_INET, sndid, rcvid);
+ }
+#endif
+ return __tcp_ao_key_cmp(key, l3index, addr,
+ prefixlen, family, sndid, rcvid);
+}
+
+static struct tcp_ao_key *__tcp_ao_do_lookup(const struct sock *sk, int l3index,
+ const union tcp_ao_addr *addr, int family, u8 prefix,
+ int sndid, int rcvid)
+{
+ struct tcp_ao_key *key;
+ struct tcp_ao_info *ao;
+
+ if (!static_branch_unlikely(&tcp_ao_needed.key))
+ return NULL;
+
+ ao = rcu_dereference_check(tcp_sk(sk)->ao_info,
+ lockdep_sock_is_held(sk));
+ if (!ao)
+ return NULL;
+
+ hlist_for_each_entry_rcu(key, &ao->head, node) {
+ u8 prefixlen = min(prefix, key->prefixlen);
+
+ if (!tcp_ao_key_cmp(key, l3index, addr, prefixlen,
+ family, sndid, rcvid))
+ return key;
+ }
+ return NULL;
+}
+
+struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, int l3index,
+ const union tcp_ao_addr *addr,
+ int family, int sndid, int rcvid)
+{
+ return __tcp_ao_do_lookup(sk, l3index, addr, family, U8_MAX, sndid, rcvid);
+}
+
+static struct tcp_ao_info *tcp_ao_alloc_info(gfp_t flags)
+{
+ struct tcp_ao_info *ao;
+
+ ao = kzalloc(sizeof(*ao), flags);
+ if (!ao)
+ return NULL;
+ INIT_HLIST_HEAD(&ao->head);
+ refcount_set(&ao->refcnt, 1);
+
+ return ao;
+}
+
+static void tcp_ao_link_mkt(struct tcp_ao_info *ao, struct tcp_ao_key *mkt)
+{
+ hlist_add_head_rcu(&mkt->node, &ao->head);
+}
+
+static struct tcp_ao_key *tcp_ao_copy_key(struct sock *sk,
+ struct tcp_ao_key *key)
+{
+ struct tcp_ao_key *new_key;
+
+ new_key = sock_kmalloc(sk, tcp_ao_sizeof_key(key),
+ GFP_ATOMIC);
+ if (!new_key)
+ return NULL;
+
+ *new_key = *key;
+ INIT_HLIST_NODE(&new_key->node);
+ tcp_sigpool_get(new_key->tcp_sigpool_id);
+ atomic64_set(&new_key->pkt_good, 0);
+ atomic64_set(&new_key->pkt_bad, 0);
+
+ return new_key;
+}
+
+static void tcp_ao_key_free_rcu(struct rcu_head *head)
+{
+ struct tcp_ao_key *key = container_of(head, struct tcp_ao_key, rcu);
+
+ tcp_sigpool_release(key->tcp_sigpool_id);
+ kfree_sensitive(key);
+}
+
+void tcp_ao_destroy_sock(struct sock *sk, bool twsk)
+{
+ struct tcp_ao_info *ao;
+ struct tcp_ao_key *key;
+ struct hlist_node *n;
+
+ if (twsk) {
+ ao = rcu_dereference_protected(tcp_twsk(sk)->ao_info, 1);
+ tcp_twsk(sk)->ao_info = NULL;
+ } else {
+ ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, 1);
+ tcp_sk(sk)->ao_info = NULL;
+ }
+
+ if (!ao || !refcount_dec_and_test(&ao->refcnt))
+ return;
+
+ hlist_for_each_entry_safe(key, n, &ao->head, node) {
+ hlist_del_rcu(&key->node);
+ if (!twsk)
+ atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc);
+ call_rcu(&key->rcu, tcp_ao_key_free_rcu);
+ }
+
+ kfree_rcu(ao, rcu);
+ static_branch_slow_dec_deferred(&tcp_ao_needed);
+}
+
+void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp)
+{
+ struct tcp_ao_info *ao_info = rcu_dereference_protected(tp->ao_info, 1);
+
+ if (ao_info) {
+ struct tcp_ao_key *key;
+ struct hlist_node *n;
+ int omem = 0;
+
+ hlist_for_each_entry_safe(key, n, &ao_info->head, node) {
+ omem += tcp_ao_sizeof_key(key);
+ }
+
+ refcount_inc(&ao_info->refcnt);
+ atomic_sub(omem, &(((struct sock *)tp)->sk_omem_alloc));
+ rcu_assign_pointer(tcptw->ao_info, ao_info);
+ } else {
+ tcptw->ao_info = NULL;
+ }
+}
+
+/* 4 tuple and ISNs are expected in NBO */
+static int tcp_v4_ao_calc_key(struct tcp_ao_key *mkt, u8 *key,
+ __be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport,
+ __be32 sisn, __be32 disn)
+{
+ /* See RFC5926 3.1.1 */
+ struct kdf_input_block {
+ u8 counter;
+ u8 label[6];
+ struct tcp4_ao_context ctx;
+ __be16 outlen;
+ } __packed * tmp;
+ struct tcp_sigpool hp;
+ int err;
+
+ err = tcp_sigpool_start(mkt->tcp_sigpool_id, &hp);
+ if (err)
+ return err;
+
+ tmp = hp.scratch;
+ tmp->counter = 1;
+ memcpy(tmp->label, "TCP-AO", 6);
+ tmp->ctx.saddr = saddr;
+ tmp->ctx.daddr = daddr;
+ tmp->ctx.sport = sport;
+ tmp->ctx.dport = dport;
+ tmp->ctx.sisn = sisn;
+ tmp->ctx.disn = disn;
+ tmp->outlen = htons(tcp_ao_digest_size(mkt) * 8); /* in bits */
+
+ err = tcp_ao_calc_traffic_key(mkt, key, tmp, sizeof(*tmp), &hp);
+ tcp_sigpool_end(&hp);
+
+ return err;
+}
+
+int tcp_v4_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key,
+ const struct sock *sk,
+ __be32 sisn, __be32 disn, bool send)
+{
+ if (send)
+ return tcp_v4_ao_calc_key(mkt, key, sk->sk_rcv_saddr,
+ sk->sk_daddr, htons(sk->sk_num),
+ sk->sk_dport, sisn, disn);
+ else
+ return tcp_v4_ao_calc_key(mkt, key, sk->sk_daddr,
+ sk->sk_rcv_saddr, sk->sk_dport,
+ htons(sk->sk_num), disn, sisn);
+}
+
+static int tcp_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key,
+ const struct sock *sk,
+ __be32 sisn, __be32 disn, bool send)
+{
+ if (mkt->family == AF_INET)
+ return tcp_v4_ao_calc_key_sk(mkt, key, sk, sisn, disn, send);
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (mkt->family == AF_INET6)
+ return tcp_v6_ao_calc_key_sk(mkt, key, sk, sisn, disn, send);
+#endif
+ else
+ return -EOPNOTSUPP;
+}
+
+int tcp_v4_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key,
+ struct request_sock *req)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ return tcp_v4_ao_calc_key(mkt, key,
+ ireq->ir_loc_addr, ireq->ir_rmt_addr,
+ htons(ireq->ir_num), ireq->ir_rmt_port,
+ htonl(tcp_rsk(req)->snt_isn),
+ htonl(tcp_rsk(req)->rcv_isn));
+}
+
+static int tcp_v4_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key,
+ const struct sk_buff *skb,
+ __be32 sisn, __be32 disn)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+
+ return tcp_v4_ao_calc_key(mkt, key, iph->saddr, iph->daddr,
+ th->source, th->dest, sisn, disn);
+}
+
+static int tcp_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key,
+ const struct sk_buff *skb,
+ __be32 sisn, __be32 disn, int family)
+{
+ if (family == AF_INET)
+ return tcp_v4_ao_calc_key_skb(mkt, key, skb, sisn, disn);
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (family == AF_INET6)
+ return tcp_v6_ao_calc_key_skb(mkt, key, skb, sisn, disn);
+#endif
+ return -EAFNOSUPPORT;
+}
+
+static int tcp_v4_ao_hash_pseudoheader(struct tcp_sigpool *hp,
+ __be32 daddr, __be32 saddr,
+ int nbytes)
+{
+ struct tcp4_pseudohdr *bp;
+ struct scatterlist sg;
+
+ bp = hp->scratch;
+ bp->saddr = saddr;
+ bp->daddr = daddr;
+ bp->pad = 0;
+ bp->protocol = IPPROTO_TCP;
+ bp->len = cpu_to_be16(nbytes);
+
+ sg_init_one(&sg, bp, sizeof(*bp));
+ ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp));
+ return crypto_ahash_update(hp->req);
+}
+
+static int tcp_ao_hash_pseudoheader(unsigned short int family,
+ const struct sock *sk,
+ const struct sk_buff *skb,
+ struct tcp_sigpool *hp, int nbytes)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+
+ /* TODO: Can we rely on checksum being zero to mean outbound pkt? */
+ if (!th->check) {
+ if (family == AF_INET)
+ return tcp_v4_ao_hash_pseudoheader(hp, sk->sk_daddr,
+ sk->sk_rcv_saddr, skb->len);
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (family == AF_INET6)
+ return tcp_v6_ao_hash_pseudoheader(hp, &sk->sk_v6_daddr,
+ &sk->sk_v6_rcv_saddr, skb->len);
+#endif
+ else
+ return -EAFNOSUPPORT;
+ }
+
+ if (family == AF_INET) {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ return tcp_v4_ao_hash_pseudoheader(hp, iph->daddr,
+ iph->saddr, skb->len);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (family == AF_INET6) {
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ return tcp_v6_ao_hash_pseudoheader(hp, &iph->daddr,
+ &iph->saddr, skb->len);
+#endif
+ }
+ return -EAFNOSUPPORT;
+}
+
+u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq)
+{
+ u32 sne = next_sne;
+
+ if (before(seq, next_seq)) {
+ if (seq > next_seq)
+ sne--;
+ } else {
+ if (seq < next_seq)
+ sne++;
+ }
+
+ return sne;
+}
+
+/* tcp_ao_hash_sne(struct tcp_sigpool *hp)
+ * @hp - used for hashing
+ * @sne - sne value
+ */
+static int tcp_ao_hash_sne(struct tcp_sigpool *hp, u32 sne)
+{
+ struct scatterlist sg;
+ __be32 *bp;
+
+ bp = (__be32 *)hp->scratch;
+ *bp = htonl(sne);
+
+ sg_init_one(&sg, bp, sizeof(*bp));
+ ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp));
+ return crypto_ahash_update(hp->req);
+}
+
+static int tcp_ao_hash_header(struct tcp_sigpool *hp,
+ const struct tcphdr *th,
+ bool exclude_options, u8 *hash,
+ int hash_offset, int hash_len)
+{
+ int err, len = th->doff << 2;
+ struct scatterlist sg;
+ u8 *hdr = hp->scratch;
+
+ /* We are not allowed to change tcphdr, make a local copy */
+ if (exclude_options) {
+ len = sizeof(*th) + sizeof(struct tcp_ao_hdr) + hash_len;
+ memcpy(hdr, th, sizeof(*th));
+ memcpy(hdr + sizeof(*th),
+ (u8 *)th + hash_offset - sizeof(struct tcp_ao_hdr),
+ sizeof(struct tcp_ao_hdr));
+ memset(hdr + sizeof(*th) + sizeof(struct tcp_ao_hdr),
+ 0, hash_len);
+ ((struct tcphdr *)hdr)->check = 0;
+ } else {
+ len = th->doff << 2;
+ memcpy(hdr, th, len);
+ /* zero out tcp-ao hash */
+ ((struct tcphdr *)hdr)->check = 0;
+ memset(hdr + hash_offset, 0, hash_len);
+ }
+
+ sg_init_one(&sg, hdr, len);
+ ahash_request_set_crypt(hp->req, &sg, NULL, len);
+ err = crypto_ahash_update(hp->req);
+ WARN_ON_ONCE(err != 0);
+ return err;
+}
+
+int tcp_ao_hash_hdr(unsigned short int family, char *ao_hash,
+ struct tcp_ao_key *key, const u8 *tkey,
+ const union tcp_ao_addr *daddr,
+ const union tcp_ao_addr *saddr,
+ const struct tcphdr *th, u32 sne)
+{
+ int tkey_len = tcp_ao_digest_size(key);
+ int hash_offset = ao_hash - (char *)th;
+ struct tcp_sigpool hp;
+ void *hash_buf = NULL;
+
+ hash_buf = kmalloc(tkey_len, GFP_ATOMIC);
+ if (!hash_buf)
+ goto clear_hash_noput;
+
+ if (tcp_sigpool_start(key->tcp_sigpool_id, &hp))
+ goto clear_hash_noput;
+
+ if (crypto_ahash_setkey(crypto_ahash_reqtfm(hp.req), tkey, tkey_len))
+ goto clear_hash;
+
+ if (crypto_ahash_init(hp.req))
+ goto clear_hash;
+
+ if (tcp_ao_hash_sne(&hp, sne))
+ goto clear_hash;
+ if (family == AF_INET) {
+ if (tcp_v4_ao_hash_pseudoheader(&hp, daddr->a4.s_addr,
+ saddr->a4.s_addr, th->doff * 4))
+ goto clear_hash;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (family == AF_INET6) {
+ if (tcp_v6_ao_hash_pseudoheader(&hp, &daddr->a6,
+ &saddr->a6, th->doff * 4))
+ goto clear_hash;
+#endif
+ } else {
+ WARN_ON_ONCE(1);
+ goto clear_hash;
+ }
+ if (tcp_ao_hash_header(&hp, th,
+ !!(key->keyflags & TCP_AO_KEYF_EXCLUDE_OPT),
+ ao_hash, hash_offset, tcp_ao_maclen(key)))
+ goto clear_hash;
+ ahash_request_set_crypt(hp.req, NULL, hash_buf, 0);
+ if (crypto_ahash_final(hp.req))
+ goto clear_hash;
+
+ memcpy(ao_hash, hash_buf, tcp_ao_maclen(key));
+ tcp_sigpool_end(&hp);
+ kfree(hash_buf);
+ return 0;
+
+clear_hash:
+ tcp_sigpool_end(&hp);
+clear_hash_noput:
+ memset(ao_hash, 0, tcp_ao_maclen(key));
+ kfree(hash_buf);
+ return 1;
+}
+
+int tcp_ao_hash_skb(unsigned short int family,
+ char *ao_hash, struct tcp_ao_key *key,
+ const struct sock *sk, const struct sk_buff *skb,
+ const u8 *tkey, int hash_offset, u32 sne)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ int tkey_len = tcp_ao_digest_size(key);
+ struct tcp_sigpool hp;
+ void *hash_buf = NULL;
+
+ hash_buf = kmalloc(tkey_len, GFP_ATOMIC);
+ if (!hash_buf)
+ goto clear_hash_noput;
+
+ if (tcp_sigpool_start(key->tcp_sigpool_id, &hp))
+ goto clear_hash_noput;
+
+ if (crypto_ahash_setkey(crypto_ahash_reqtfm(hp.req), tkey, tkey_len))
+ goto clear_hash;
+
+ /* For now use sha1 by default. Depends on alg in tcp_ao_key */
+ if (crypto_ahash_init(hp.req))
+ goto clear_hash;
+
+ if (tcp_ao_hash_sne(&hp, sne))
+ goto clear_hash;
+ if (tcp_ao_hash_pseudoheader(family, sk, skb, &hp, skb->len))
+ goto clear_hash;
+ if (tcp_ao_hash_header(&hp, th,
+ !!(key->keyflags & TCP_AO_KEYF_EXCLUDE_OPT),
+ ao_hash, hash_offset, tcp_ao_maclen(key)))
+ goto clear_hash;
+ if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2))
+ goto clear_hash;
+ ahash_request_set_crypt(hp.req, NULL, hash_buf, 0);
+ if (crypto_ahash_final(hp.req))
+ goto clear_hash;
+
+ memcpy(ao_hash, hash_buf, tcp_ao_maclen(key));
+ tcp_sigpool_end(&hp);
+ kfree(hash_buf);
+ return 0;
+
+clear_hash:
+ tcp_sigpool_end(&hp);
+clear_hash_noput:
+ memset(ao_hash, 0, tcp_ao_maclen(key));
+ kfree(hash_buf);
+ return 1;
+}
+
+int tcp_v4_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key,
+ const struct sock *sk, const struct sk_buff *skb,
+ const u8 *tkey, int hash_offset, u32 sne)
+{
+ return tcp_ao_hash_skb(AF_INET, ao_hash, key, sk, skb,
+ tkey, hash_offset, sne);
+}
+
+int tcp_v4_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key,
+ struct request_sock *req, const struct sk_buff *skb,
+ int hash_offset, u32 sne)
+{
+ void *hash_buf = NULL;
+ int err;
+
+ hash_buf = kmalloc(tcp_ao_digest_size(ao_key), GFP_ATOMIC);
+ if (!hash_buf)
+ return -ENOMEM;
+
+ err = tcp_v4_ao_calc_key_rsk(ao_key, hash_buf, req);
+ if (err)
+ goto out;
+
+ err = tcp_ao_hash_skb(AF_INET, ao_hash, ao_key, req_to_sk(req), skb,
+ hash_buf, hash_offset, sne);
+out:
+ kfree(hash_buf);
+ return err;
+}
+
+struct tcp_ao_key *tcp_v4_ao_lookup_rsk(const struct sock *sk,
+ struct request_sock *req,
+ int sndid, int rcvid)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+ union tcp_ao_addr *addr = (union tcp_ao_addr *)&ireq->ir_rmt_addr;
+ int l3index;
+
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
+ return tcp_ao_do_lookup(sk, l3index, addr, AF_INET, sndid, rcvid);
+}
+
+struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk,
+ int sndid, int rcvid)
+{
+ int l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
+ addr_sk->sk_bound_dev_if);
+ union tcp_ao_addr *addr = (union tcp_ao_addr *)&addr_sk->sk_daddr;
+
+ return tcp_ao_do_lookup(sk, l3index, addr, AF_INET, sndid, rcvid);
+}
+
+int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb,
+ const struct tcp_ao_hdr *aoh, int l3index, u32 seq,
+ struct tcp_ao_key **key, char **traffic_key,
+ bool *allocated_traffic_key, u8 *keyid, u32 *sne)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct tcp_ao_info *ao_info;
+
+ *allocated_traffic_key = false;
+ /* If there's no socket - than initial sisn/disn are unknown.
+ * Drop the segment. RFC5925 (7.7) advises to require graceful
+ * restart [RFC4724]. Alternatively, the RFC5925 advises to
+ * save/restore traffic keys before/after reboot.
+ * Linux TCP-AO support provides TCP_AO_ADD_KEY and TCP_AO_REPAIR
+ * options to restore a socket post-reboot.
+ */
+ if (!sk)
+ return -ENOTCONN;
+
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) {
+ unsigned int family = READ_ONCE(sk->sk_family);
+ union tcp_ao_addr *addr;
+ __be32 disn, sisn;
+
+ if (sk->sk_state == TCP_NEW_SYN_RECV) {
+ struct request_sock *req = inet_reqsk(sk);
+
+ sisn = htonl(tcp_rsk(req)->rcv_isn);
+ disn = htonl(tcp_rsk(req)->snt_isn);
+ *sne = tcp_ao_compute_sne(0, tcp_rsk(req)->snt_isn, seq);
+ } else {
+ sisn = th->seq;
+ disn = 0;
+ }
+ if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6)
+ addr = (union tcp_md5_addr *)&ipv6_hdr(skb)->saddr;
+ else
+ addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6 && ipv6_addr_v4mapped(&sk->sk_v6_daddr))
+ family = AF_INET;
+#endif
+
+ sk = sk_const_to_full_sk(sk);
+ ao_info = rcu_dereference(tcp_sk(sk)->ao_info);
+ if (!ao_info)
+ return -ENOENT;
+ *key = tcp_ao_do_lookup(sk, l3index, addr, family,
+ -1, aoh->rnext_keyid);
+ if (!*key)
+ return -ENOENT;
+ *traffic_key = kmalloc(tcp_ao_digest_size(*key), GFP_ATOMIC);
+ if (!*traffic_key)
+ return -ENOMEM;
+ *allocated_traffic_key = true;
+ if (tcp_ao_calc_key_skb(*key, *traffic_key, skb,
+ sisn, disn, family))
+ return -1;
+ *keyid = (*key)->rcvid;
+ } else {
+ struct tcp_ao_key *rnext_key;
+ u32 snd_basis;
+
+ if (sk->sk_state == TCP_TIME_WAIT) {
+ ao_info = rcu_dereference(tcp_twsk(sk)->ao_info);
+ snd_basis = tcp_twsk(sk)->tw_snd_nxt;
+ } else {
+ ao_info = rcu_dereference(tcp_sk(sk)->ao_info);
+ snd_basis = tcp_sk(sk)->snd_una;
+ }
+ if (!ao_info)
+ return -ENOENT;
+
+ *key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1);
+ if (!*key)
+ return -ENOENT;
+ *traffic_key = snd_other_key(*key);
+ rnext_key = READ_ONCE(ao_info->rnext_key);
+ *keyid = rnext_key->rcvid;
+ *sne = tcp_ao_compute_sne(READ_ONCE(ao_info->snd_sne),
+ snd_basis, seq);
+ }
+ return 0;
+}
+
+int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb,
+ struct tcp_ao_key *key, struct tcphdr *th,
+ __u8 *hash_location)
+{
+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_ao_info *ao;
+ void *tkey_buf = NULL;
+ u8 *traffic_key;
+ u32 sne;
+
+ ao = rcu_dereference_protected(tcp_sk(sk)->ao_info,
+ lockdep_sock_is_held(sk));
+ traffic_key = snd_other_key(key);
+ if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
+ __be32 disn;
+
+ if (!(tcb->tcp_flags & TCPHDR_ACK)) {
+ disn = 0;
+ tkey_buf = kmalloc(tcp_ao_digest_size(key), GFP_ATOMIC);
+ if (!tkey_buf)
+ return -ENOMEM;
+ traffic_key = tkey_buf;
+ } else {
+ disn = ao->risn;
+ }
+ tp->af_specific->ao_calc_key_sk(key, traffic_key,
+ sk, ao->lisn, disn, true);
+ }
+ sne = tcp_ao_compute_sne(READ_ONCE(ao->snd_sne), READ_ONCE(tp->snd_una),
+ ntohl(th->seq));
+ tp->af_specific->calc_ao_hash(hash_location, key, sk, skb, traffic_key,
+ hash_location - (u8 *)th, sne);
+ kfree(tkey_buf);
+ return 0;
+}
+
+static struct tcp_ao_key *tcp_ao_inbound_lookup(unsigned short int family,
+ const struct sock *sk, const struct sk_buff *skb,
+ int sndid, int rcvid, int l3index)
+{
+ if (family == AF_INET) {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ return tcp_ao_do_lookup(sk, l3index,
+ (union tcp_ao_addr *)&iph->saddr,
+ AF_INET, sndid, rcvid);
+ } else {
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ return tcp_ao_do_lookup(sk, l3index,
+ (union tcp_ao_addr *)&iph->saddr,
+ AF_INET6, sndid, rcvid);
+ }
+}
+
+void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
+ struct request_sock *req, unsigned short int family)
+{
+ struct tcp_request_sock *treq = tcp_rsk(req);
+ const struct tcphdr *th = tcp_hdr(skb);
+ const struct tcp_ao_hdr *aoh;
+ struct tcp_ao_key *key;
+ int l3index;
+
+ /* treq->af_specific is used to perform TCP_AO lookup
+ * in tcp_create_openreq_child().
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6)
+ treq->af_specific = &tcp_request_sock_ipv6_ops;
+ else
+#endif
+ treq->af_specific = &tcp_request_sock_ipv4_ops;
+
+ treq->used_tcp_ao = false;
+
+ if (tcp_parse_auth_options(th, NULL, &aoh) || !aoh)
+ return;
+
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk), inet_rsk(req)->ir_iif);
+ key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid, l3index);
+ if (!key)
+ /* Key not found, continue without TCP-AO */
+ return;
+
+ treq->ao_rcv_next = aoh->keyid;
+ treq->ao_keyid = aoh->rnext_keyid;
+ treq->used_tcp_ao = true;
+}
+
+static enum skb_drop_reason
+tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb,
+ unsigned short int family, struct tcp_ao_info *info,
+ const struct tcp_ao_hdr *aoh, struct tcp_ao_key *key,
+ u8 *traffic_key, u8 *phash, u32 sne, int l3index)
+{
+ u8 maclen = aoh->length - sizeof(struct tcp_ao_hdr);
+ const struct tcphdr *th = tcp_hdr(skb);
+ void *hash_buf = NULL;
+
+ if (maclen != tcp_ao_maclen(key)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD);
+ atomic64_inc(&info->counters.pkt_bad);
+ atomic64_inc(&key->pkt_bad);
+ tcp_hash_fail("AO hash wrong length", family, skb,
+ "%u != %d L3index: %d", maclen,
+ tcp_ao_maclen(key), l3index);
+ return SKB_DROP_REASON_TCP_AOFAILURE;
+ }
+
+ hash_buf = kmalloc(tcp_ao_digest_size(key), GFP_ATOMIC);
+ if (!hash_buf)
+ return SKB_DROP_REASON_NOT_SPECIFIED;
+
+ /* XXX: make it per-AF callback? */
+ tcp_ao_hash_skb(family, hash_buf, key, sk, skb, traffic_key,
+ (phash - (u8 *)th), sne);
+ if (memcmp(phash, hash_buf, maclen)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD);
+ atomic64_inc(&info->counters.pkt_bad);
+ atomic64_inc(&key->pkt_bad);
+ tcp_hash_fail("AO hash mismatch", family, skb,
+ "L3index: %d", l3index);
+ kfree(hash_buf);
+ return SKB_DROP_REASON_TCP_AOFAILURE;
+ }
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOGOOD);
+ atomic64_inc(&info->counters.pkt_good);
+ atomic64_inc(&key->pkt_good);
+ kfree(hash_buf);
+ return SKB_NOT_DROPPED_YET;
+}
+
+enum skb_drop_reason
+tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
+ unsigned short int family, const struct request_sock *req,
+ int l3index, const struct tcp_ao_hdr *aoh)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ u8 *phash = (u8 *)(aoh + 1); /* hash goes just after the header */
+ struct tcp_ao_info *info;
+ enum skb_drop_reason ret;
+ struct tcp_ao_key *key;
+ __be32 sisn, disn;
+ u8 *traffic_key;
+ u32 sne = 0;
+
+ info = rcu_dereference(tcp_sk(sk)->ao_info);
+ if (!info) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND);
+ tcp_hash_fail("AO key not found", family, skb,
+ "keyid: %u L3index: %d", aoh->keyid, l3index);
+ return SKB_DROP_REASON_TCP_AOUNEXPECTED;
+ }
+
+ if (unlikely(th->syn)) {
+ sisn = th->seq;
+ disn = 0;
+ }
+
+ /* Fast-path */
+ if (likely((1 << sk->sk_state) & TCP_AO_ESTABLISHED)) {
+ enum skb_drop_reason err;
+ struct tcp_ao_key *current_key;
+
+ /* Check if this socket's rnext_key matches the keyid in the
+ * packet. If not we lookup the key based on the keyid
+ * matching the rcvid in the mkt.
+ */
+ key = READ_ONCE(info->rnext_key);
+ if (key->rcvid != aoh->keyid) {
+ key = tcp_ao_established_key(info, -1, aoh->keyid);
+ if (!key)
+ goto key_not_found;
+ }
+
+ /* Delayed retransmitted SYN */
+ if (unlikely(th->syn && !th->ack))
+ goto verify_hash;
+
+ sne = tcp_ao_compute_sne(info->rcv_sne, tcp_sk(sk)->rcv_nxt,
+ ntohl(th->seq));
+ /* Established socket, traffic key are cached */
+ traffic_key = rcv_other_key(key);
+ err = tcp_ao_verify_hash(sk, skb, family, info, aoh, key,
+ traffic_key, phash, sne, l3index);
+ if (err)
+ return err;
+ current_key = READ_ONCE(info->current_key);
+ /* Key rotation: the peer asks us to use new key (RNext) */
+ if (unlikely(aoh->rnext_keyid != current_key->sndid)) {
+ /* If the key is not found we do nothing. */
+ key = tcp_ao_established_key(info, aoh->rnext_keyid, -1);
+ if (key)
+ /* pairs with tcp_ao_del_cmd */
+ WRITE_ONCE(info->current_key, key);
+ }
+ return SKB_NOT_DROPPED_YET;
+ }
+
+ /* Lookup key based on peer address and keyid.
+ * current_key and rnext_key must not be used on tcp listen
+ * sockets as otherwise:
+ * - request sockets would race on those key pointers
+ * - tcp_ao_del_cmd() allows async key removal
+ */
+ key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid, l3index);
+ if (!key)
+ goto key_not_found;
+
+ if (th->syn && !th->ack)
+ goto verify_hash;
+
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) {
+ /* Make the initial syn the likely case here */
+ if (unlikely(req)) {
+ sne = tcp_ao_compute_sne(0, tcp_rsk(req)->rcv_isn,
+ ntohl(th->seq));
+ sisn = htonl(tcp_rsk(req)->rcv_isn);
+ disn = htonl(tcp_rsk(req)->snt_isn);
+ } else if (unlikely(th->ack && !th->syn)) {
+ /* Possible syncookie packet */
+ sisn = htonl(ntohl(th->seq) - 1);
+ disn = htonl(ntohl(th->ack_seq) - 1);
+ sne = tcp_ao_compute_sne(0, ntohl(sisn),
+ ntohl(th->seq));
+ } else if (unlikely(!th->syn)) {
+ /* no way to figure out initial sisn/disn - drop */
+ return SKB_DROP_REASON_TCP_FLAGS;
+ }
+ } else if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+ disn = info->lisn;
+ if (th->syn || th->rst)
+ sisn = th->seq;
+ else
+ sisn = info->risn;
+ } else {
+ WARN_ONCE(1, "TCP-AO: Unexpected sk_state %d", sk->sk_state);
+ return SKB_DROP_REASON_TCP_AOFAILURE;
+ }
+verify_hash:
+ traffic_key = kmalloc(tcp_ao_digest_size(key), GFP_ATOMIC);
+ if (!traffic_key)
+ return SKB_DROP_REASON_NOT_SPECIFIED;
+ tcp_ao_calc_key_skb(key, traffic_key, skb, sisn, disn, family);
+ ret = tcp_ao_verify_hash(sk, skb, family, info, aoh, key,
+ traffic_key, phash, sne, l3index);
+ kfree(traffic_key);
+ return ret;
+
+key_not_found:
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND);
+ atomic64_inc(&info->counters.key_not_found);
+ tcp_hash_fail("Requested by the peer AO key id not found",
+ family, skb, "L3index: %d", l3index);
+ return SKB_DROP_REASON_TCP_AOKEYNOTFOUND;
+}
+
+static int tcp_ao_cache_traffic_keys(const struct sock *sk,
+ struct tcp_ao_info *ao,
+ struct tcp_ao_key *ao_key)
+{
+ u8 *traffic_key = snd_other_key(ao_key);
+ int ret;
+
+ ret = tcp_ao_calc_key_sk(ao_key, traffic_key, sk,
+ ao->lisn, ao->risn, true);
+ if (ret)
+ return ret;
+
+ traffic_key = rcv_other_key(ao_key);
+ ret = tcp_ao_calc_key_sk(ao_key, traffic_key, sk,
+ ao->lisn, ao->risn, false);
+ return ret;
+}
+
+void tcp_ao_connect_init(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_ao_info *ao_info;
+ union tcp_ao_addr *addr;
+ struct tcp_ao_key *key;
+ int family, l3index;
+
+ ao_info = rcu_dereference_protected(tp->ao_info,
+ lockdep_sock_is_held(sk));
+ if (!ao_info)
+ return;
+
+ /* Remove all keys that don't match the peer */
+ family = sk->sk_family;
+ if (family == AF_INET)
+ addr = (union tcp_ao_addr *)&sk->sk_daddr;
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (family == AF_INET6)
+ addr = (union tcp_ao_addr *)&sk->sk_v6_daddr;
+#endif
+ else
+ return;
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
+ sk->sk_bound_dev_if);
+
+ hlist_for_each_entry_rcu(key, &ao_info->head, node) {
+ if (!tcp_ao_key_cmp(key, l3index, addr, key->prefixlen, family, -1, -1))
+ continue;
+
+ if (key == ao_info->current_key)
+ ao_info->current_key = NULL;
+ if (key == ao_info->rnext_key)
+ ao_info->rnext_key = NULL;
+ hlist_del_rcu(&key->node);
+ atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc);
+ call_rcu(&key->rcu, tcp_ao_key_free_rcu);
+ }
+
+ key = tp->af_specific->ao_lookup(sk, sk, -1, -1);
+ if (key) {
+ /* if current_key or rnext_key were not provided,
+ * use the first key matching the peer
+ */
+ if (!ao_info->current_key)
+ ao_info->current_key = key;
+ if (!ao_info->rnext_key)
+ ao_info->rnext_key = key;
+ tp->tcp_header_len += tcp_ao_len_aligned(key);
+
+ ao_info->lisn = htonl(tp->write_seq);
+ ao_info->snd_sne = 0;
+ } else {
+ /* Can't happen: tcp_connect() verifies that there's
+ * at least one tcp-ao key that matches the remote peer.
+ */
+ WARN_ON_ONCE(1);
+ rcu_assign_pointer(tp->ao_info, NULL);
+ kfree(ao_info);
+ }
+}
+
+void tcp_ao_established(struct sock *sk)
+{
+ struct tcp_ao_info *ao;
+ struct tcp_ao_key *key;
+
+ ao = rcu_dereference_protected(tcp_sk(sk)->ao_info,
+ lockdep_sock_is_held(sk));
+ if (!ao)
+ return;
+
+ hlist_for_each_entry_rcu(key, &ao->head, node)
+ tcp_ao_cache_traffic_keys(sk, ao, key);
+}
+
+void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_ao_info *ao;
+ struct tcp_ao_key *key;
+
+ ao = rcu_dereference_protected(tcp_sk(sk)->ao_info,
+ lockdep_sock_is_held(sk));
+ if (!ao)
+ return;
+
+ WRITE_ONCE(ao->risn, tcp_hdr(skb)->seq);
+ ao->rcv_sne = 0;
+
+ hlist_for_each_entry_rcu(key, &ao->head, node)
+ tcp_ao_cache_traffic_keys(sk, ao, key);
+}
+
+int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk,
+ struct request_sock *req, struct sk_buff *skb,
+ int family)
+{
+ struct tcp_ao_key *key, *new_key, *first_key;
+ struct tcp_ao_info *new_ao, *ao;
+ struct hlist_node *key_head;
+ int l3index, ret = -ENOMEM;
+ union tcp_ao_addr *addr;
+ bool match = false;
+
+ ao = rcu_dereference(tcp_sk(sk)->ao_info);
+ if (!ao)
+ return 0;
+
+ /* New socket without TCP-AO on it */
+ if (!tcp_rsk_used_ao(req))
+ return 0;
+
+ new_ao = tcp_ao_alloc_info(GFP_ATOMIC);
+ if (!new_ao)
+ return -ENOMEM;
+ new_ao->lisn = htonl(tcp_rsk(req)->snt_isn);
+ new_ao->risn = htonl(tcp_rsk(req)->rcv_isn);
+ new_ao->ao_required = ao->ao_required;
+ new_ao->accept_icmps = ao->accept_icmps;
+
+ if (family == AF_INET) {
+ addr = (union tcp_ao_addr *)&newsk->sk_daddr;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (family == AF_INET6) {
+ addr = (union tcp_ao_addr *)&newsk->sk_v6_daddr;
+#endif
+ } else {
+ ret = -EAFNOSUPPORT;
+ goto free_ao;
+ }
+ l3index = l3mdev_master_ifindex_by_index(sock_net(newsk),
+ newsk->sk_bound_dev_if);
+
+ hlist_for_each_entry_rcu(key, &ao->head, node) {
+ if (tcp_ao_key_cmp(key, l3index, addr, key->prefixlen, family, -1, -1))
+ continue;
+
+ new_key = tcp_ao_copy_key(newsk, key);
+ if (!new_key)
+ goto free_and_exit;
+
+ tcp_ao_cache_traffic_keys(newsk, new_ao, new_key);
+ tcp_ao_link_mkt(new_ao, new_key);
+ match = true;
+ }
+
+ if (!match) {
+ /* RFC5925 (7.4.1) specifies that the TCP-AO status
+ * of a connection is determined on the initial SYN.
+ * At this point the connection was TCP-AO enabled, so
+ * it can't switch to being unsigned if peer's key
+ * disappears on the listening socket.
+ */
+ ret = -EKEYREJECTED;
+ goto free_and_exit;
+ }
+
+ if (!static_key_fast_inc_not_disabled(&tcp_ao_needed.key.key)) {
+ ret = -EUSERS;
+ goto free_and_exit;
+ }
+
+ key_head = rcu_dereference(hlist_first_rcu(&new_ao->head));
+ first_key = hlist_entry_safe(key_head, struct tcp_ao_key, node);
+
+ key = tcp_ao_established_key(new_ao, tcp_rsk(req)->ao_keyid, -1);
+ if (key)
+ new_ao->current_key = key;
+ else
+ new_ao->current_key = first_key;
+
+ /* set rnext_key */
+ key = tcp_ao_established_key(new_ao, -1, tcp_rsk(req)->ao_rcv_next);
+ if (key)
+ new_ao->rnext_key = key;
+ else
+ new_ao->rnext_key = first_key;
+
+ sk_gso_disable(newsk);
+ rcu_assign_pointer(tcp_sk(newsk)->ao_info, new_ao);
+
+ return 0;
+
+free_and_exit:
+ hlist_for_each_entry_safe(key, key_head, &new_ao->head, node) {
+ hlist_del(&key->node);
+ tcp_sigpool_release(key->tcp_sigpool_id);
+ atomic_sub(tcp_ao_sizeof_key(key), &newsk->sk_omem_alloc);
+ kfree_sensitive(key);
+ }
+free_ao:
+ kfree(new_ao);
+ return ret;
+}
+
+static bool tcp_ao_can_set_current_rnext(struct sock *sk)
+{
+ /* There aren't current/rnext keys on TCP_LISTEN sockets */
+ if (sk->sk_state == TCP_LISTEN)
+ return false;
+ return true;
+}
+
+static int tcp_ao_verify_ipv4(struct sock *sk, struct tcp_ao_add *cmd,
+ union tcp_ao_addr **addr)
+{
+ struct sockaddr_in *sin = (struct sockaddr_in *)&cmd->addr;
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (sin->sin_family != AF_INET)
+ return -EINVAL;
+
+ /* Currently matching is not performed on port (or port ranges) */
+ if (sin->sin_port != 0)
+ return -EINVAL;
+
+ /* Check prefix and trailing 0's in addr */
+ if (cmd->prefix != 0) {
+ __be32 mask;
+
+ if (ntohl(sin->sin_addr.s_addr) == INADDR_ANY)
+ return -EINVAL;
+ if (cmd->prefix > 32)
+ return -EINVAL;
+
+ mask = inet_make_mask(cmd->prefix);
+ if (sin->sin_addr.s_addr & ~mask)
+ return -EINVAL;
+
+ /* Check that MKT address is consistent with socket */
+ if (ntohl(inet->inet_daddr) != INADDR_ANY &&
+ (inet->inet_daddr & mask) != sin->sin_addr.s_addr)
+ return -EINVAL;
+ } else {
+ if (ntohl(sin->sin_addr.s_addr) != INADDR_ANY)
+ return -EINVAL;
+ }
+
+ *addr = (union tcp_ao_addr *)&sin->sin_addr;
+ return 0;
+}
+
+static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key)
+{
+ unsigned int syn_tcp_option_space;
+ bool is_kdf_aes_128_cmac = false;
+ struct crypto_ahash *tfm;
+ struct tcp_sigpool hp;
+ void *tmp_key = NULL;
+ int err;
+
+ /* RFC5926, 3.1.1.2. KDF_AES_128_CMAC */
+ if (!strcmp("cmac(aes128)", cmd->alg_name)) {
+ strscpy(cmd->alg_name, "cmac(aes)", sizeof(cmd->alg_name));
+ is_kdf_aes_128_cmac = (cmd->keylen != 16);
+ tmp_key = kmalloc(cmd->keylen, GFP_KERNEL);
+ if (!tmp_key)
+ return -ENOMEM;
+ }
+
+ key->maclen = cmd->maclen ?: 12; /* 12 is the default in RFC5925 */
+
+ /* Check: maclen + tcp-ao header <= (MAX_TCP_OPTION_SPACE - mss
+ * - tstamp (including sackperm)
+ * - wscale),
+ * see tcp_syn_options(), tcp_synack_options(), commit 33ad798c924b.
+ *
+ * In order to allow D-SACK with TCP-AO, the header size should be:
+ * (MAX_TCP_OPTION_SPACE - TCPOLEN_TSTAMP_ALIGNED
+ * - TCPOLEN_SACK_BASE_ALIGNED
+ * - 2 * TCPOLEN_SACK_PERBLOCK) = 8 (maclen = 4),
+ * see tcp_established_options().
+ *
+ * RFC5925, 2.2:
+ * Typical MACs are 96-128 bits (12-16 bytes), but any length
+ * that fits in the header of the segment being authenticated
+ * is allowed.
+ *
+ * RFC5925, 7.6:
+ * TCP-AO continues to consume 16 bytes in non-SYN segments,
+ * leaving a total of 24 bytes for other options, of which
+ * the timestamp consumes 10. This leaves 14 bytes, of which 10
+ * are used for a single SACK block. When two SACK blocks are used,
+ * such as to handle D-SACK, a smaller TCP-AO MAC would be required
+ * to make room for the additional SACK block (i.e., to leave 18
+ * bytes for the D-SACK variant of the SACK option) [RFC2883].
+ * Note that D-SACK is not supportable in TCP MD5 in the presence
+ * of timestamps, because TCP MD5’s MAC length is fixed and too
+ * large to leave sufficient option space.
+ */
+ syn_tcp_option_space = MAX_TCP_OPTION_SPACE;
+ syn_tcp_option_space -= TCPOLEN_MSS_ALIGNED;
+ syn_tcp_option_space -= TCPOLEN_TSTAMP_ALIGNED;
+ syn_tcp_option_space -= TCPOLEN_WSCALE_ALIGNED;
+ if (tcp_ao_len_aligned(key) > syn_tcp_option_space) {
+ err = -EMSGSIZE;
+ goto err_kfree;
+ }
+
+ key->keylen = cmd->keylen;
+ memcpy(key->key, cmd->key, cmd->keylen);
+
+ err = tcp_sigpool_start(key->tcp_sigpool_id, &hp);
+ if (err)
+ goto err_kfree;
+
+ tfm = crypto_ahash_reqtfm(hp.req);
+ if (is_kdf_aes_128_cmac) {
+ void *scratch = hp.scratch;
+ struct scatterlist sg;
+
+ memcpy(tmp_key, cmd->key, cmd->keylen);
+ sg_init_one(&sg, tmp_key, cmd->keylen);
+
+ /* Using zero-key of 16 bytes as described in RFC5926 */
+ memset(scratch, 0, 16);
+ err = crypto_ahash_setkey(tfm, scratch, 16);
+ if (err)
+ goto err_pool_end;
+
+ err = crypto_ahash_init(hp.req);
+ if (err)
+ goto err_pool_end;
+
+ ahash_request_set_crypt(hp.req, &sg, key->key, cmd->keylen);
+ err = crypto_ahash_update(hp.req);
+ if (err)
+ goto err_pool_end;
+
+ err |= crypto_ahash_final(hp.req);
+ if (err)
+ goto err_pool_end;
+ key->keylen = 16;
+ }
+
+ err = crypto_ahash_setkey(tfm, key->key, key->keylen);
+ if (err)
+ goto err_pool_end;
+
+ tcp_sigpool_end(&hp);
+ kfree_sensitive(tmp_key);
+
+ if (tcp_ao_maclen(key) > key->digest_size)
+ return -EINVAL;
+
+ return 0;
+
+err_pool_end:
+ tcp_sigpool_end(&hp);
+err_kfree:
+ kfree_sensitive(tmp_key);
+ return err;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int tcp_ao_verify_ipv6(struct sock *sk, struct tcp_ao_add *cmd,
+ union tcp_ao_addr **paddr,
+ unsigned short int *family)
+{
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd->addr;
+ struct in6_addr *addr = &sin6->sin6_addr;
+ u8 prefix = cmd->prefix;
+
+ if (sin6->sin6_family != AF_INET6)
+ return -EINVAL;
+
+ /* Currently matching is not performed on port (or port ranges) */
+ if (sin6->sin6_port != 0)
+ return -EINVAL;
+
+ /* Check prefix and trailing 0's in addr */
+ if (cmd->prefix != 0 && ipv6_addr_v4mapped(addr)) {
+ __be32 addr4 = addr->s6_addr32[3];
+ __be32 mask;
+
+ if (prefix > 32 || ntohl(addr4) == INADDR_ANY)
+ return -EINVAL;
+
+ mask = inet_make_mask(prefix);
+ if (addr4 & ~mask)
+ return -EINVAL;
+
+ /* Check that MKT address is consistent with socket */
+ if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+ __be32 daddr4 = sk->sk_v6_daddr.s6_addr32[3];
+
+ if (!ipv6_addr_v4mapped(&sk->sk_v6_daddr))
+ return -EINVAL;
+ if ((daddr4 & mask) != addr4)
+ return -EINVAL;
+ }
+
+ *paddr = (union tcp_ao_addr *)&addr->s6_addr32[3];
+ *family = AF_INET;
+ return 0;
+ } else if (cmd->prefix != 0) {
+ struct in6_addr pfx;
+
+ if (ipv6_addr_any(addr) || prefix > 128)
+ return -EINVAL;
+
+ ipv6_addr_prefix(&pfx, addr, prefix);
+ if (ipv6_addr_cmp(&pfx, addr))
+ return -EINVAL;
+
+ /* Check that MKT address is consistent with socket */
+ if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+ !ipv6_prefix_equal(&sk->sk_v6_daddr, addr, prefix))
+
+ return -EINVAL;
+ } else {
+ if (!ipv6_addr_any(addr))
+ return -EINVAL;
+ }
+
+ *paddr = (union tcp_ao_addr *)addr;
+ return 0;
+}
+#else
+static int tcp_ao_verify_ipv6(struct sock *sk, struct tcp_ao_add *cmd,
+ union tcp_ao_addr **paddr,
+ unsigned short int *family)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+static struct tcp_ao_info *setsockopt_ao_info(struct sock *sk)
+{
+ if (sk_fullsock(sk)) {
+ return rcu_dereference_protected(tcp_sk(sk)->ao_info,
+ lockdep_sock_is_held(sk));
+ } else if (sk->sk_state == TCP_TIME_WAIT) {
+ return rcu_dereference_protected(tcp_twsk(sk)->ao_info,
+ lockdep_sock_is_held(sk));
+ }
+ return ERR_PTR(-ESOCKTNOSUPPORT);
+}
+
+static struct tcp_ao_info *getsockopt_ao_info(struct sock *sk)
+{
+ if (sk_fullsock(sk))
+ return rcu_dereference(tcp_sk(sk)->ao_info);
+ else if (sk->sk_state == TCP_TIME_WAIT)
+ return rcu_dereference(tcp_twsk(sk)->ao_info);
+
+ return ERR_PTR(-ESOCKTNOSUPPORT);
+}
+
+#define TCP_AO_KEYF_ALL (TCP_AO_KEYF_IFINDEX | TCP_AO_KEYF_EXCLUDE_OPT)
+#define TCP_AO_GET_KEYF_VALID (TCP_AO_KEYF_IFINDEX)
+
+static struct tcp_ao_key *tcp_ao_key_alloc(struct sock *sk,
+ struct tcp_ao_add *cmd)
+{
+ const char *algo = cmd->alg_name;
+ unsigned int digest_size;
+ struct crypto_ahash *tfm;
+ struct tcp_ao_key *key;
+ struct tcp_sigpool hp;
+ int err, pool_id;
+ size_t size;
+
+ /* Force null-termination of alg_name */
+ cmd->alg_name[ARRAY_SIZE(cmd->alg_name) - 1] = '\0';
+
+ /* RFC5926, 3.1.1.2. KDF_AES_128_CMAC */
+ if (!strcmp("cmac(aes128)", algo))
+ algo = "cmac(aes)";
+
+ /* Full TCP header (th->doff << 2) should fit into scratch area,
+ * see tcp_ao_hash_header().
+ */
+ pool_id = tcp_sigpool_alloc_ahash(algo, 60);
+ if (pool_id < 0)
+ return ERR_PTR(pool_id);
+
+ err = tcp_sigpool_start(pool_id, &hp);
+ if (err)
+ goto err_free_pool;
+
+ tfm = crypto_ahash_reqtfm(hp.req);
+ digest_size = crypto_ahash_digestsize(tfm);
+ tcp_sigpool_end(&hp);
+
+ size = sizeof(struct tcp_ao_key) + (digest_size << 1);
+ key = sock_kmalloc(sk, size, GFP_KERNEL);
+ if (!key) {
+ err = -ENOMEM;
+ goto err_free_pool;
+ }
+
+ key->tcp_sigpool_id = pool_id;
+ key->digest_size = digest_size;
+ return key;
+
+err_free_pool:
+ tcp_sigpool_release(pool_id);
+ return ERR_PTR(err);
+}
+
+static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family,
+ sockptr_t optval, int optlen)
+{
+ struct tcp_ao_info *ao_info;
+ union tcp_ao_addr *addr;
+ struct tcp_ao_key *key;
+ struct tcp_ao_add cmd;
+ int ret, l3index = 0;
+ bool first = false;
+
+ if (optlen < sizeof(cmd))
+ return -EINVAL;
+
+ ret = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen);
+ if (ret)
+ return ret;
+
+ if (cmd.keylen > TCP_AO_MAXKEYLEN)
+ return -EINVAL;
+
+ if (cmd.reserved != 0 || cmd.reserved2 != 0)
+ return -EINVAL;
+
+ if (family == AF_INET)
+ ret = tcp_ao_verify_ipv4(sk, &cmd, &addr);
+ else
+ ret = tcp_ao_verify_ipv6(sk, &cmd, &addr, &family);
+ if (ret)
+ return ret;
+
+ if (cmd.keyflags & ~TCP_AO_KEYF_ALL)
+ return -EINVAL;
+
+ if (cmd.set_current || cmd.set_rnext) {
+ if (!tcp_ao_can_set_current_rnext(sk))
+ return -EINVAL;
+ }
+
+ if (cmd.ifindex && !(cmd.keyflags & TCP_AO_KEYF_IFINDEX))
+ return -EINVAL;
+
+ /* For cmd.tcp_ifindex = 0 the key will apply to the default VRF */
+ if (cmd.keyflags & TCP_AO_KEYF_IFINDEX && cmd.ifindex) {
+ int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(sock_net(sk), cmd.ifindex);
+ if (dev && netif_is_l3_master(dev))
+ l3index = dev->ifindex;
+ rcu_read_unlock();
+
+ if (!dev || !l3index)
+ return -EINVAL;
+
+ if (!bound_dev_if || bound_dev_if != cmd.ifindex) {
+ /* tcp_ao_established_key() doesn't expect having
+ * non peer-matching key on an established TCP-AO
+ * connection.
+ */
+ if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)))
+ return -EINVAL;
+ }
+
+ /* It's still possible to bind after adding keys or even
+ * re-bind to a different dev (with CAP_NET_RAW).
+ * So, no reason to return error here, rather try to be
+ * nice and warn the user.
+ */
+ if (bound_dev_if && bound_dev_if != cmd.ifindex)
+ net_warn_ratelimited("AO key ifindex %d != sk bound ifindex %d\n",
+ cmd.ifindex, bound_dev_if);
+ }
+
+ /* Don't allow keys for peers that have a matching TCP-MD5 key */
+ if (cmd.keyflags & TCP_AO_KEYF_IFINDEX) {
+ /* Non-_exact version of tcp_md5_do_lookup() will
+ * as well match keys that aren't bound to a specific VRF
+ * (that will make them match AO key with
+ * sysctl_tcp_l3dev_accept = 1
+ */
+ if (tcp_md5_do_lookup(sk, l3index, addr, family))
+ return -EKEYREJECTED;
+ } else {
+ if (tcp_md5_do_lookup_any_l3index(sk, addr, family))
+ return -EKEYREJECTED;
+ }
+
+ ao_info = setsockopt_ao_info(sk);
+ if (IS_ERR(ao_info))
+ return PTR_ERR(ao_info);
+
+ if (!ao_info) {
+ ao_info = tcp_ao_alloc_info(GFP_KERNEL);
+ if (!ao_info)
+ return -ENOMEM;
+ first = true;
+ } else {
+ /* Check that neither RecvID nor SendID match any
+ * existing key for the peer, RFC5925 3.1:
+ * > The IDs of MKTs MUST NOT overlap where their
+ * > TCP connection identifiers overlap.
+ */
+ if (__tcp_ao_do_lookup(sk, l3index, addr, family, cmd.prefix, -1, cmd.rcvid))
+ return -EEXIST;
+ if (__tcp_ao_do_lookup(sk, l3index, addr, family,
+ cmd.prefix, cmd.sndid, -1))
+ return -EEXIST;
+ }
+
+ key = tcp_ao_key_alloc(sk, &cmd);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto err_free_ao;
+ }
+
+ INIT_HLIST_NODE(&key->node);
+ memcpy(&key->addr, addr, (family == AF_INET) ? sizeof(struct in_addr) :
+ sizeof(struct in6_addr));
+ key->prefixlen = cmd.prefix;
+ key->family = family;
+ key->keyflags = cmd.keyflags;
+ key->sndid = cmd.sndid;
+ key->rcvid = cmd.rcvid;
+ key->l3index = l3index;
+ atomic64_set(&key->pkt_good, 0);
+ atomic64_set(&key->pkt_bad, 0);
+
+ ret = tcp_ao_parse_crypto(&cmd, key);
+ if (ret < 0)
+ goto err_free_sock;
+
+ if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) {
+ tcp_ao_cache_traffic_keys(sk, ao_info, key);
+ if (first) {
+ ao_info->current_key = key;
+ ao_info->rnext_key = key;
+ }
+ }
+
+ tcp_ao_link_mkt(ao_info, key);
+ if (first) {
+ if (!static_branch_inc(&tcp_ao_needed.key)) {
+ ret = -EUSERS;
+ goto err_free_sock;
+ }
+ sk_gso_disable(sk);
+ rcu_assign_pointer(tcp_sk(sk)->ao_info, ao_info);
+ }
+
+ if (cmd.set_current)
+ WRITE_ONCE(ao_info->current_key, key);
+ if (cmd.set_rnext)
+ WRITE_ONCE(ao_info->rnext_key, key);
+ return 0;
+
+err_free_sock:
+ atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc);
+ tcp_sigpool_release(key->tcp_sigpool_id);
+ kfree_sensitive(key);
+err_free_ao:
+ if (first)
+ kfree(ao_info);
+ return ret;
+}
+
+static int tcp_ao_delete_key(struct sock *sk, struct tcp_ao_info *ao_info,
+ bool del_async, struct tcp_ao_key *key,
+ struct tcp_ao_key *new_current,
+ struct tcp_ao_key *new_rnext)
+{
+ int err;
+
+ hlist_del_rcu(&key->node);
+
+ /* Support for async delete on listening sockets: as they don't
+ * need current_key/rnext_key maintaining, we don't need to check
+ * them and we can just free all resources in RCU fashion.
+ */
+ if (del_async) {
+ atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc);
+ call_rcu(&key->rcu, tcp_ao_key_free_rcu);
+ return 0;
+ }
+
+ /* At this moment another CPU could have looked this key up
+ * while it was unlinked from the list. Wait for RCU grace period,
+ * after which the key is off-list and can't be looked up again;
+ * the rx path [just before RCU came] might have used it and set it
+ * as current_key (very unlikely).
+ * Free the key with next RCU grace period (in case it was
+ * current_key before tcp_ao_current_rnext() might have
+ * changed it in forced-delete).
+ */
+ synchronize_rcu();
+ if (new_current)
+ WRITE_ONCE(ao_info->current_key, new_current);
+ if (new_rnext)
+ WRITE_ONCE(ao_info->rnext_key, new_rnext);
+
+ if (unlikely(READ_ONCE(ao_info->current_key) == key ||
+ READ_ONCE(ao_info->rnext_key) == key)) {
+ err = -EBUSY;
+ goto add_key;
+ }
+
+ atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc);
+ call_rcu(&key->rcu, tcp_ao_key_free_rcu);
+
+ return 0;
+add_key:
+ hlist_add_head_rcu(&key->node, &ao_info->head);
+ return err;
+}
+
+#define TCP_AO_DEL_KEYF_ALL (TCP_AO_KEYF_IFINDEX)
+static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family,
+ sockptr_t optval, int optlen)
+{
+ struct tcp_ao_key *key, *new_current = NULL, *new_rnext = NULL;
+ int err, addr_len, l3index = 0;
+ struct tcp_ao_info *ao_info;
+ union tcp_ao_addr *addr;
+ struct tcp_ao_del cmd;
+ __u8 prefix;
+ u16 port;
+
+ if (optlen < sizeof(cmd))
+ return -EINVAL;
+
+ err = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen);
+ if (err)
+ return err;
+
+ if (cmd.reserved != 0 || cmd.reserved2 != 0)
+ return -EINVAL;
+
+ if (cmd.set_current || cmd.set_rnext) {
+ if (!tcp_ao_can_set_current_rnext(sk))
+ return -EINVAL;
+ }
+
+ if (cmd.keyflags & ~TCP_AO_DEL_KEYF_ALL)
+ return -EINVAL;
+
+ /* No sanity check for TCP_AO_KEYF_IFINDEX as if a VRF
+ * was destroyed, there still should be a way to delete keys,
+ * that were bound to that l3intf. So, fail late at lookup stage
+ * if there is no key for that ifindex.
+ */
+ if (cmd.ifindex && !(cmd.keyflags & TCP_AO_KEYF_IFINDEX))
+ return -EINVAL;
+
+ ao_info = setsockopt_ao_info(sk);
+ if (IS_ERR(ao_info))
+ return PTR_ERR(ao_info);
+ if (!ao_info)
+ return -ENOENT;
+
+ /* For sockets in TCP_CLOSED it's possible set keys that aren't
+ * matching the future peer (address/VRF/etc),
+ * tcp_ao_connect_init() will choose a correct matching MKT
+ * if there's any.
+ */
+ if (cmd.set_current) {
+ new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1);
+ if (!new_current)
+ return -ENOENT;
+ }
+ if (cmd.set_rnext) {
+ new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext);
+ if (!new_rnext)
+ return -ENOENT;
+ }
+ if (cmd.del_async && sk->sk_state != TCP_LISTEN)
+ return -EINVAL;
+
+ if (family == AF_INET) {
+ struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.addr;
+
+ addr = (union tcp_ao_addr *)&sin->sin_addr;
+ addr_len = sizeof(struct in_addr);
+ port = ntohs(sin->sin_port);
+ } else {
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.addr;
+ struct in6_addr *addr6 = &sin6->sin6_addr;
+
+ if (ipv6_addr_v4mapped(addr6)) {
+ addr = (union tcp_ao_addr *)&addr6->s6_addr32[3];
+ addr_len = sizeof(struct in_addr);
+ family = AF_INET;
+ } else {
+ addr = (union tcp_ao_addr *)addr6;
+ addr_len = sizeof(struct in6_addr);
+ }
+ port = ntohs(sin6->sin6_port);
+ }
+ prefix = cmd.prefix;
+
+ /* Currently matching is not performed on port (or port ranges) */
+ if (port != 0)
+ return -EINVAL;
+
+ /* We could choose random present key here for current/rnext
+ * but that's less predictable. Let's be strict and don't
+ * allow removing a key that's in use. RFC5925 doesn't
+ * specify how-to coordinate key removal, but says:
+ * "It is presumed that an MKT affecting a particular
+ * connection cannot be destroyed during an active connection"
+ */
+ hlist_for_each_entry_rcu(key, &ao_info->head, node) {
+ if (cmd.sndid != key->sndid ||
+ cmd.rcvid != key->rcvid)
+ continue;
+
+ if (family != key->family ||
+ prefix != key->prefixlen ||
+ memcmp(addr, &key->addr, addr_len))
+ continue;
+
+ if ((cmd.keyflags & TCP_AO_KEYF_IFINDEX) !=
+ (key->keyflags & TCP_AO_KEYF_IFINDEX))
+ continue;
+
+ if (key->l3index != l3index)
+ continue;
+
+ if (key == new_current || key == new_rnext)
+ continue;
+
+ return tcp_ao_delete_key(sk, ao_info, cmd.del_async, key,
+ new_current, new_rnext);
+ }
+ return -ENOENT;
+}
+
+/* cmd.ao_required makes a socket TCP-AO only.
+ * Don't allow any md5 keys for any l3intf on the socket together with it.
+ * Restricting it early in setsockopt() removes a check for
+ * ao_info->ao_required on inbound tcp segment fast-path.
+ */
+static int tcp_ao_required_verify(struct sock *sk)
+{
+#ifdef CONFIG_TCP_MD5SIG
+ const struct tcp_md5sig_info *md5sig;
+
+ if (!static_branch_unlikely(&tcp_md5_needed.key))
+ return 0;
+
+ md5sig = rcu_dereference_check(tcp_sk(sk)->md5sig_info,
+ lockdep_sock_is_held(sk));
+ if (!md5sig)
+ return 0;
+
+ if (rcu_dereference_check(hlist_first_rcu(&md5sig->head),
+ lockdep_sock_is_held(sk)))
+ return 1;
+#endif
+ return 0;
+}
+
+static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family,
+ sockptr_t optval, int optlen)
+{
+ struct tcp_ao_key *new_current = NULL, *new_rnext = NULL;
+ struct tcp_ao_info *ao_info;
+ struct tcp_ao_info_opt cmd;
+ bool first = false;
+ int err;
+
+ if (optlen < sizeof(cmd))
+ return -EINVAL;
+
+ err = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen);
+ if (err)
+ return err;
+
+ if (cmd.set_current || cmd.set_rnext) {
+ if (!tcp_ao_can_set_current_rnext(sk))
+ return -EINVAL;
+ }
+
+ if (cmd.reserved != 0 || cmd.reserved2 != 0)
+ return -EINVAL;
+
+ ao_info = setsockopt_ao_info(sk);
+ if (IS_ERR(ao_info))
+ return PTR_ERR(ao_info);
+ if (!ao_info) {
+ if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)))
+ return -EINVAL;
+ ao_info = tcp_ao_alloc_info(GFP_KERNEL);
+ if (!ao_info)
+ return -ENOMEM;
+ first = true;
+ }
+
+ if (cmd.ao_required && tcp_ao_required_verify(sk))
+ return -EKEYREJECTED;
+
+ /* For sockets in TCP_CLOSED it's possible set keys that aren't
+ * matching the future peer (address/port/VRF/etc),
+ * tcp_ao_connect_init() will choose a correct matching MKT
+ * if there's any.
+ */
+ if (cmd.set_current) {
+ new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1);
+ if (!new_current) {
+ err = -ENOENT;
+ goto out;
+ }
+ }
+ if (cmd.set_rnext) {
+ new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext);
+ if (!new_rnext) {
+ err = -ENOENT;
+ goto out;
+ }
+ }
+ if (cmd.set_counters) {
+ atomic64_set(&ao_info->counters.pkt_good, cmd.pkt_good);
+ atomic64_set(&ao_info->counters.pkt_bad, cmd.pkt_bad);
+ atomic64_set(&ao_info->counters.key_not_found, cmd.pkt_key_not_found);
+ atomic64_set(&ao_info->counters.ao_required, cmd.pkt_ao_required);
+ atomic64_set(&ao_info->counters.dropped_icmp, cmd.pkt_dropped_icmp);
+ }
+
+ ao_info->ao_required = cmd.ao_required;
+ ao_info->accept_icmps = cmd.accept_icmps;
+ if (new_current)
+ WRITE_ONCE(ao_info->current_key, new_current);
+ if (new_rnext)
+ WRITE_ONCE(ao_info->rnext_key, new_rnext);
+ if (first) {
+ if (!static_branch_inc(&tcp_ao_needed.key)) {
+ err = -EUSERS;
+ goto out;
+ }
+ sk_gso_disable(sk);
+ rcu_assign_pointer(tcp_sk(sk)->ao_info, ao_info);
+ }
+ return 0;
+out:
+ if (first)
+ kfree(ao_info);
+ return err;
+}
+
+int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family,
+ sockptr_t optval, int optlen)
+{
+ if (WARN_ON_ONCE(family != AF_INET && family != AF_INET6))
+ return -EAFNOSUPPORT;
+
+ switch (cmd) {
+ case TCP_AO_ADD_KEY:
+ return tcp_ao_add_cmd(sk, family, optval, optlen);
+ case TCP_AO_DEL_KEY:
+ return tcp_ao_del_cmd(sk, family, optval, optlen);
+ case TCP_AO_INFO:
+ return tcp_ao_info_cmd(sk, family, optval, optlen);
+ default:
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+}
+
+int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen)
+{
+ return tcp_parse_ao(sk, cmd, AF_INET, optval, optlen);
+}
+
+/* tcp_ao_copy_mkts_to_user(ao_info, optval, optlen)
+ *
+ * @ao_info: struct tcp_ao_info on the socket that
+ * socket getsockopt(TCP_AO_GET_KEYS) is executed on
+ * @optval: pointer to array of tcp_ao_getsockopt structures in user space.
+ * Must be != NULL.
+ * @optlen: pointer to size of tcp_ao_getsockopt structure.
+ * Must be != NULL.
+ *
+ * Return value: 0 on success, a negative error number otherwise.
+ *
+ * optval points to an array of tcp_ao_getsockopt structures in user space.
+ * optval[0] is used as both input and output to getsockopt. It determines
+ * which keys are returned by the kernel.
+ * optval[0].nkeys is the size of the array in user space. On return it contains
+ * the number of keys matching the search criteria.
+ * If tcp_ao_getsockopt::get_all is set, then all keys in the socket are
+ * returned, otherwise only keys matching <addr, prefix, sndid, rcvid>
+ * in optval[0] are returned.
+ * optlen is also used as both input and output. The user provides the size
+ * of struct tcp_ao_getsockopt in user space, and the kernel returns the size
+ * of the structure in kernel space.
+ * The size of struct tcp_ao_getsockopt may differ between user and kernel.
+ * There are three cases to consider:
+ * * If usize == ksize, then keys are copied verbatim.
+ * * If usize < ksize, then the userspace has passed an old struct to a
+ * newer kernel. The rest of the trailing bytes in optval[0]
+ * (ksize - usize) are interpreted as 0 by the kernel.
+ * * If usize > ksize, then the userspace has passed a new struct to an
+ * older kernel. The trailing bytes unknown to the kernel (usize - ksize)
+ * are checked to ensure they are zeroed, otherwise -E2BIG is returned.
+ * On return the kernel fills in min(usize, ksize) in each entry of the array.
+ * The layout of the fields in the user and kernel structures is expected to
+ * be the same (including in the 32bit vs 64bit case).
+ */
+static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info,
+ sockptr_t optval, sockptr_t optlen)
+{
+ struct tcp_ao_getsockopt opt_in, opt_out;
+ struct tcp_ao_key *key, *current_key;
+ bool do_address_matching = true;
+ union tcp_ao_addr *addr = NULL;
+ int err, l3index, user_len;
+ unsigned int max_keys; /* maximum number of keys to copy to user */
+ size_t out_offset = 0;
+ size_t bytes_to_write; /* number of bytes to write to user level */
+ u32 matched_keys; /* keys from ao_info matched so far */
+ int optlen_out;
+ __be16 port = 0;
+
+ if (copy_from_sockptr(&user_len, optlen, sizeof(int)))
+ return -EFAULT;
+
+ if (user_len <= 0)
+ return -EINVAL;
+
+ memset(&opt_in, 0, sizeof(struct tcp_ao_getsockopt));
+ err = copy_struct_from_sockptr(&opt_in, sizeof(opt_in),
+ optval, user_len);
+ if (err < 0)
+ return err;
+
+ if (opt_in.pkt_good || opt_in.pkt_bad)
+ return -EINVAL;
+ if (opt_in.keyflags & ~TCP_AO_GET_KEYF_VALID)
+ return -EINVAL;
+ if (opt_in.ifindex && !(opt_in.keyflags & TCP_AO_KEYF_IFINDEX))
+ return -EINVAL;
+
+ if (opt_in.reserved != 0)
+ return -EINVAL;
+
+ max_keys = opt_in.nkeys;
+ l3index = (opt_in.keyflags & TCP_AO_KEYF_IFINDEX) ? opt_in.ifindex : -1;
+
+ if (opt_in.get_all || opt_in.is_current || opt_in.is_rnext) {
+ if (opt_in.get_all && (opt_in.is_current || opt_in.is_rnext))
+ return -EINVAL;
+ do_address_matching = false;
+ }
+
+ switch (opt_in.addr.ss_family) {
+ case AF_INET: {
+ struct sockaddr_in *sin;
+ __be32 mask;
+
+ sin = (struct sockaddr_in *)&opt_in.addr;
+ port = sin->sin_port;
+ addr = (union tcp_ao_addr *)&sin->sin_addr;
+
+ if (opt_in.prefix > 32)
+ return -EINVAL;
+
+ if (ntohl(sin->sin_addr.s_addr) == INADDR_ANY &&
+ opt_in.prefix != 0)
+ return -EINVAL;
+
+ mask = inet_make_mask(opt_in.prefix);
+ if (sin->sin_addr.s_addr & ~mask)
+ return -EINVAL;
+
+ break;
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 *sin6;
+ struct in6_addr *addr6;
+
+ sin6 = (struct sockaddr_in6 *)&opt_in.addr;
+ addr = (union tcp_ao_addr *)&sin6->sin6_addr;
+ addr6 = &sin6->sin6_addr;
+ port = sin6->sin6_port;
+
+ /* We don't have to change family and @addr here if
+ * ipv6_addr_v4mapped() like in key adding:
+ * tcp_ao_key_cmp() does it. Do the sanity checks though.
+ */
+ if (opt_in.prefix != 0) {
+ if (ipv6_addr_v4mapped(addr6)) {
+ __be32 mask, addr4 = addr6->s6_addr32[3];
+
+ if (opt_in.prefix > 32 ||
+ ntohl(addr4) == INADDR_ANY)
+ return -EINVAL;
+ mask = inet_make_mask(opt_in.prefix);
+ if (addr4 & ~mask)
+ return -EINVAL;
+ } else {
+ struct in6_addr pfx;
+
+ if (ipv6_addr_any(addr6) ||
+ opt_in.prefix > 128)
+ return -EINVAL;
+
+ ipv6_addr_prefix(&pfx, addr6, opt_in.prefix);
+ if (ipv6_addr_cmp(&pfx, addr6))
+ return -EINVAL;
+ }
+ } else if (!ipv6_addr_any(addr6)) {
+ return -EINVAL;
+ }
+ break;
+ }
+ case 0:
+ if (!do_address_matching)
+ break;
+ fallthrough;
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ if (!do_address_matching) {
+ /* We could just ignore those, but let's do stricter checks */
+ if (addr || port)
+ return -EINVAL;
+ if (opt_in.prefix || opt_in.sndid || opt_in.rcvid)
+ return -EINVAL;
+ }
+
+ bytes_to_write = min_t(int, user_len, sizeof(struct tcp_ao_getsockopt));
+ matched_keys = 0;
+ /* May change in RX, while we're dumping, pre-fetch it */
+ current_key = READ_ONCE(ao_info->current_key);
+
+ hlist_for_each_entry_rcu(key, &ao_info->head, node) {
+ if (opt_in.get_all)
+ goto match;
+
+ if (opt_in.is_current || opt_in.is_rnext) {
+ if (opt_in.is_current && key == current_key)
+ goto match;
+ if (opt_in.is_rnext && key == ao_info->rnext_key)
+ goto match;
+ continue;
+ }
+
+ if (tcp_ao_key_cmp(key, l3index, addr, opt_in.prefix,
+ opt_in.addr.ss_family,
+ opt_in.sndid, opt_in.rcvid) != 0)
+ continue;
+match:
+ matched_keys++;
+ if (matched_keys > max_keys)
+ continue;
+
+ memset(&opt_out, 0, sizeof(struct tcp_ao_getsockopt));
+
+ if (key->family == AF_INET) {
+ struct sockaddr_in *sin_out = (struct sockaddr_in *)&opt_out.addr;
+
+ sin_out->sin_family = key->family;
+ sin_out->sin_port = 0;
+ memcpy(&sin_out->sin_addr, &key->addr, sizeof(struct in_addr));
+ } else {
+ struct sockaddr_in6 *sin6_out = (struct sockaddr_in6 *)&opt_out.addr;
+
+ sin6_out->sin6_family = key->family;
+ sin6_out->sin6_port = 0;
+ memcpy(&sin6_out->sin6_addr, &key->addr, sizeof(struct in6_addr));
+ }
+ opt_out.sndid = key->sndid;
+ opt_out.rcvid = key->rcvid;
+ opt_out.prefix = key->prefixlen;
+ opt_out.keyflags = key->keyflags;
+ opt_out.is_current = (key == current_key);
+ opt_out.is_rnext = (key == ao_info->rnext_key);
+ opt_out.nkeys = 0;
+ opt_out.maclen = key->maclen;
+ opt_out.keylen = key->keylen;
+ opt_out.ifindex = key->l3index;
+ opt_out.pkt_good = atomic64_read(&key->pkt_good);
+ opt_out.pkt_bad = atomic64_read(&key->pkt_bad);
+ memcpy(&opt_out.key, key->key, key->keylen);
+ tcp_sigpool_algo(key->tcp_sigpool_id, opt_out.alg_name, 64);
+
+ /* Copy key to user */
+ if (copy_to_sockptr_offset(optval, out_offset,
+ &opt_out, bytes_to_write))
+ return -EFAULT;
+ out_offset += user_len;
+ }
+
+ optlen_out = (int)sizeof(struct tcp_ao_getsockopt);
+ if (copy_to_sockptr(optlen, &optlen_out, sizeof(int)))
+ return -EFAULT;
+
+ out_offset = offsetof(struct tcp_ao_getsockopt, nkeys);
+ if (copy_to_sockptr_offset(optval, out_offset,
+ &matched_keys, sizeof(u32)))
+ return -EFAULT;
+
+ return 0;
+}
+
+int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen)
+{
+ struct tcp_ao_info *ao_info;
+
+ ao_info = setsockopt_ao_info(sk);
+ if (IS_ERR(ao_info))
+ return PTR_ERR(ao_info);
+ if (!ao_info)
+ return -ENOENT;
+
+ return tcp_ao_copy_mkts_to_user(ao_info, optval, optlen);
+}
+
+int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen)
+{
+ struct tcp_ao_info_opt out, in = {};
+ struct tcp_ao_key *current_key;
+ struct tcp_ao_info *ao;
+ int err, len;
+
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
+ return -EFAULT;
+
+ if (len <= 0)
+ return -EINVAL;
+
+ /* Copying this "in" only to check ::reserved, ::reserved2,
+ * that may be needed to extend (struct tcp_ao_info_opt) and
+ * what getsockopt() provides in future.
+ */
+ err = copy_struct_from_sockptr(&in, sizeof(in), optval, len);
+ if (err)
+ return err;
+
+ if (in.reserved != 0 || in.reserved2 != 0)
+ return -EINVAL;
+
+ ao = setsockopt_ao_info(sk);
+ if (IS_ERR(ao))
+ return PTR_ERR(ao);
+ if (!ao)
+ return -ENOENT;
+
+ memset(&out, 0, sizeof(out));
+ out.ao_required = ao->ao_required;
+ out.accept_icmps = ao->accept_icmps;
+ out.pkt_good = atomic64_read(&ao->counters.pkt_good);
+ out.pkt_bad = atomic64_read(&ao->counters.pkt_bad);
+ out.pkt_key_not_found = atomic64_read(&ao->counters.key_not_found);
+ out.pkt_ao_required = atomic64_read(&ao->counters.ao_required);
+ out.pkt_dropped_icmp = atomic64_read(&ao->counters.dropped_icmp);
+
+ current_key = READ_ONCE(ao->current_key);
+ if (current_key) {
+ out.set_current = 1;
+ out.current_key = current_key->sndid;
+ }
+ if (ao->rnext_key) {
+ out.set_rnext = 1;
+ out.rnext = ao->rnext_key->rcvid;
+ }
+
+ if (copy_to_sockptr(optval, &out, min_t(int, len, sizeof(out))))
+ return -EFAULT;
+
+ return 0;
+}
+
+int tcp_ao_set_repair(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_ao_repair cmd;
+ struct tcp_ao_key *key;
+ struct tcp_ao_info *ao;
+ int err;
+
+ if (optlen < sizeof(cmd))
+ return -EINVAL;
+
+ err = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen);
+ if (err)
+ return err;
+
+ if (!tp->repair)
+ return -EPERM;
+
+ ao = setsockopt_ao_info(sk);
+ if (IS_ERR(ao))
+ return PTR_ERR(ao);
+ if (!ao)
+ return -ENOENT;
+
+ WRITE_ONCE(ao->lisn, cmd.snt_isn);
+ WRITE_ONCE(ao->risn, cmd.rcv_isn);
+ WRITE_ONCE(ao->snd_sne, cmd.snd_sne);
+ WRITE_ONCE(ao->rcv_sne, cmd.rcv_sne);
+
+ hlist_for_each_entry_rcu(key, &ao->head, node)
+ tcp_ao_cache_traffic_keys(sk, ao, key);
+
+ return 0;
+}
+
+int tcp_ao_get_repair(struct sock *sk, sockptr_t optval, sockptr_t optlen)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_ao_repair opt;
+ struct tcp_ao_info *ao;
+ int len;
+
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
+ return -EFAULT;
+
+ if (len <= 0)
+ return -EINVAL;
+
+ if (!tp->repair)
+ return -EPERM;
+
+ rcu_read_lock();
+ ao = getsockopt_ao_info(sk);
+ if (IS_ERR_OR_NULL(ao)) {
+ rcu_read_unlock();
+ return ao ? PTR_ERR(ao) : -ENOENT;
+ }
+
+ opt.snt_isn = ao->lisn;
+ opt.rcv_isn = ao->risn;
+ opt.snd_sne = READ_ONCE(ao->snd_sne);
+ opt.rcv_sne = READ_ONCE(ao->rcv_sne);
+ rcu_read_unlock();
+
+ if (copy_to_sockptr(optval, &opt, min_t(int, len, sizeof(opt))))
+ return -EFAULT;
+ return 0;
+}
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 146792cd26fe..22358032dd48 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -258,7 +258,7 @@ static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
u64 rate = bw;
rate = bbr_rate_bytes_per_sec(sk, rate, gain);
- rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+ rate = min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate));
return rate;
}
@@ -278,7 +278,8 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
}
bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT;
do_div(bw, rtt_us);
- sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
+ WRITE_ONCE(sk->sk_pacing_rate,
+ bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain));
}
/* Pace using current bw estimate and a gain factor. */
@@ -290,14 +291,14 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
bbr_init_pacing_rate_from_rtt(sk);
- if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
- sk->sk_pacing_rate = rate;
+ if (bbr_full_bw_reached(sk) || rate > READ_ONCE(sk->sk_pacing_rate))
+ WRITE_ONCE(sk->sk_pacing_rate, rate);
}
/* override sysctl_tcp_min_tso_segs */
__bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk)
{
- return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
+ return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2;
}
static u32 bbr_tso_segs_goal(struct sock *sk)
@@ -309,7 +310,7 @@ static u32 bbr_tso_segs_goal(struct sock *sk)
* driver provided sk_gso_max_size.
*/
bytes = min_t(unsigned long,
- sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
+ READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift),
GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 81f0dff69e0b..53b0d62fd2c2 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -222,6 +222,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
int *addr_len)
{
struct tcp_sock *tcp = tcp_sk(sk);
+ int peek = flags & MSG_PEEK;
u32 seq = tcp->copied_seq;
struct sk_psock *psock;
int copied = 0;
@@ -306,15 +307,22 @@ msg_bytes_ready:
}
data = tcp_msg_wait_data(sk, psock, timeo);
+ if (data < 0) {
+ copied = data;
+ goto unlock;
+ }
if (data && !sk_psock_queue_empty(psock))
goto msg_bytes_ready;
copied = -EAGAIN;
}
out:
- WRITE_ONCE(tcp->copied_seq, seq);
+ if (!peek)
+ WRITE_ONCE(tcp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
if (copied > 0)
__tcp_cleanup_rbuf(sk, copied);
+
+unlock:
release_sock(sk);
sk_psock_put(sk, psock);
return copied;
@@ -349,6 +357,10 @@ msg_bytes_ready:
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
data = tcp_msg_wait_data(sk, psock, timeo);
+ if (data < 0) {
+ ret = data;
+ goto unlock;
+ }
if (data) {
if (!sk_psock_queue_empty(psock))
goto msg_bytes_ready;
@@ -359,6 +371,8 @@ msg_bytes_ready:
copied = -EAGAIN;
}
ret = copied;
+
+unlock:
release_sock(sk);
sk_psock_put(sk, psock);
return ret;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 01b50fa79189..4cbe4b44425a 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -247,4 +247,5 @@ static void __exit tcp_diag_exit(void)
module_init(tcp_diag_init);
module_exit(tcp_diag_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TCP socket monitoring via SOCK_DIAG");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 06fe1cf645d5..df7b13f0e5e0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -202,23 +202,17 @@ static void bpf_skops_established(struct sock *sk, int bpf_op,
}
#endif
-static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
- unsigned int len)
+static __cold void tcp_gro_dev_warn(const struct sock *sk, const struct sk_buff *skb,
+ unsigned int len)
{
- static bool __once __read_mostly;
+ struct net_device *dev;
- if (!__once) {
- struct net_device *dev;
-
- __once = true;
-
- rcu_read_lock();
- dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
- if (!dev || len >= dev->mtu)
- pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
- dev ? dev->name : "Unknown driver");
- rcu_read_unlock();
- }
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
+ if (!dev || len >= READ_ONCE(dev->mtu))
+ pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
+ dev ? dev->name : "Unknown driver");
+ rcu_read_unlock();
}
/* Adapt the MSS value used to make delayed ack decision to the
@@ -250,9 +244,21 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss);
/* Account for possibly-removed options */
- if (unlikely(len > icsk->icsk_ack.rcv_mss +
- MAX_TCP_OPTION_SPACE))
- tcp_gro_dev_warn(sk, skb, len);
+ DO_ONCE_LITE_IF(len > icsk->icsk_ack.rcv_mss + MAX_TCP_OPTION_SPACE,
+ tcp_gro_dev_warn, sk, skb, len);
+ /* If the skb has a len of exactly 1*MSS and has the PSH bit
+ * set then it is likely the end of an application write. So
+ * more data may not be arriving soon, and yet the data sender
+ * may be waiting for an ACK if cwnd-bound or using TX zero
+ * copy. So we set ICSK_ACK_PUSHED here so that
+ * tcp_cleanup_rbuf() will send an ACK immediately if the app
+ * reads all of the data and is not ping-pong. If len > MSS
+ * then this logic does not matter (and does not hurt) because
+ * tcp_cleanup_rbuf() will always ACK immediately if the app
+ * reads data and there is more than an MSS of unACKed data.
+ */
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH)
+ icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
} else {
/* Otherwise, we make more careful check taking into account,
* that SACKs block is variable.
@@ -680,6 +686,23 @@ new_measure:
tp->rcv_rtt_est.time = tp->tcp_mstamp;
}
+static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp)
+{
+ u32 delta, delta_us;
+
+ delta = tcp_time_stamp_ts(tp) - tp->rx_opt.rcv_tsecr;
+ if (tp->tcp_usec_ts)
+ return delta;
+
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ if (!delta)
+ delta = 1;
+ delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ return delta_us;
+ }
+ return -1;
+}
+
static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
const struct sk_buff *skb)
{
@@ -691,15 +714,10 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
if (TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
- u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
- u32 delta_us;
-
- if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
- if (!delta)
- delta = 1;
- delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- tcp_rcv_rtt_update(tp, delta_us, 0);
- }
+ s32 delta = tcp_rtt_tsopt_us(tp);
+
+ if (delta >= 0)
+ tcp_rcv_rtt_update(tp, delta, 0);
}
}
@@ -765,6 +783,16 @@ new_measure:
tp->rcvq_space.time = tp->tcp_mstamp;
}
+static void tcp_save_lrcv_flowlabel(struct sock *sk, const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (skb->protocol == htons(ETH_P_IPV6))
+ icsk->icsk_ack.lrcv_flowlabel = ntohl(ip6_flowlabel(ipv6_hdr(skb)));
+#endif
+}
+
/* There is something which you must keep in mind when you analyze the
* behavior of the tp->ato delayed ack timeout interval. When a
* connection starts up, we want to ack as quickly as possible. The
@@ -813,6 +841,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
}
}
icsk->icsk_ack.lrcvtime = now;
+ tcp_save_lrcv_flowlabel(sk, skb);
tcp_ecn_check_ce(sk, skb);
@@ -927,8 +956,8 @@ static void tcp_update_pacing_rate(struct sock *sk)
* without any lock. We want to make sure compiler wont store
* intermediate values in this location.
*/
- WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
- sk->sk_max_pacing_rate));
+ WRITE_ONCE(sk->sk_pacing_rate,
+ min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate)));
}
/* Calculate rto without backoff. This is the second half of Van Jacobson's
@@ -2088,6 +2117,10 @@ void tcp_clear_retrans(struct tcp_sock *tp)
tp->undo_marker = 0;
tp->undo_retrans = -1;
tp->sacked_out = 0;
+ tp->rto_stamp = 0;
+ tp->total_rto = 0;
+ tp->total_rto_recoveries = 0;
+ tp->total_rto_time = 0;
}
static inline void tcp_init_undo(struct tcp_sock *tp)
@@ -2194,16 +2227,17 @@ void tcp_enter_loss(struct sock *sk)
* restore sanity to the SACK scoreboard. If the apparent reneging
* persists until this RTO then we'll clear the SACK scoreboard.
*/
-static bool tcp_check_sack_reneging(struct sock *sk, int flag)
+static bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag)
{
- if (flag & FLAG_SACK_RENEGING &&
- flag & FLAG_SND_UNA_ADVANCED) {
+ if (*ack_flag & FLAG_SACK_RENEGING &&
+ *ack_flag & FLAG_SND_UNA_ADVANCED) {
struct tcp_sock *tp = tcp_sk(sk);
unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
msecs_to_jiffies(10));
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
delay, TCP_RTO_MAX);
+ *ack_flag &= ~FLAG_SET_XMIT_TIMER;
return true;
}
return false;
@@ -2414,7 +2448,7 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
const struct sk_buff *skb)
{
return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
- tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
+ tcp_tsopt_ecr_before(tp, tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb));
}
/* Nothing was retransmitted or returned timestamp is less
@@ -2825,6 +2859,14 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
tcp_set_ca_state(sk, TCP_CA_Recovery);
}
+static void tcp_update_rto_time(struct tcp_sock *tp)
+{
+ if (tp->rto_stamp) {
+ tp->total_rto_time += tcp_time_stamp_ms(tp) - tp->rto_stamp;
+ tp->rto_stamp = 0;
+ }
+}
+
/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
* recovered or spurious. Otherwise retransmits more on partial ACKs.
*/
@@ -2973,7 +3015,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
tp->prior_ssthresh = 0;
/* B. In all the states check for reneging SACKs. */
- if (tcp_check_sack_reneging(sk, flag))
+ if (tcp_check_sack_reneging(sk, ack_flag))
return;
/* C. Check consistency of the current state. */
@@ -3029,6 +3071,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag, num_dupack, rexmit);
+ if (icsk->icsk_ca_state != TCP_CA_Loss)
+ tcp_update_rto_time(tp);
tcp_identify_packet_loss(sk, ack_flag);
if (!(icsk->icsk_ca_state == TCP_CA_Open ||
(*ack_flag & FLAG_LOST_RETRANS)))
@@ -3108,17 +3152,10 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
* left edge of the send window.
* See draft-ietf-tcplw-high-performance-00, section 3.3.
*/
- if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
- flag & FLAG_ACKED) {
- u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
-
- if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
- if (!delta)
- delta = 1;
- seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- ca_rtt_us = seq_rtt_us;
- }
- }
+ if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp &&
+ tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED)
+ seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp);
+
rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
if (seq_rtt_us < 0)
return false;
@@ -3528,6 +3565,21 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp,
(ack_seq == tp->snd_wl1 && (nwin > tp->snd_wnd || !nwin));
}
+static void tcp_snd_sne_update(struct tcp_sock *tp, u32 ack)
+{
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_info *ao;
+
+ if (!static_branch_unlikely(&tcp_ao_needed.key))
+ return;
+
+ ao = rcu_dereference_protected(tp->ao_info,
+ lockdep_sock_is_held((struct sock *)tp));
+ if (ao && ack < tp->snd_una)
+ ao->snd_sne++;
+#endif
+}
+
/* If we update tp->snd_una, also update tp->bytes_acked */
static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
{
@@ -3535,9 +3587,25 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
sock_owned_by_me((struct sock *)tp);
tp->bytes_acked += delta;
+ tcp_snd_sne_update(tp, ack);
tp->snd_una = ack;
}
+static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq)
+{
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_info *ao;
+
+ if (!static_branch_unlikely(&tcp_ao_needed.key))
+ return;
+
+ ao = rcu_dereference_protected(tp->ao_info,
+ lockdep_sock_is_held((struct sock *)tp));
+ if (ao && seq < tp->rcv_nxt)
+ ao->rcv_sne++;
+#endif
+}
+
/* If we update tp->rcv_nxt, also update tp->bytes_received */
static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
{
@@ -3545,6 +3613,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
sock_owned_by_me((struct sock *)tp);
tp->bytes_received += delta;
+ tcp_rcv_sne_update(tp, seq);
WRITE_ONCE(tp->rcv_nxt, seq);
}
@@ -3795,8 +3864,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* then we can probably ignore it.
*/
if (before(ack, prior_snd_una)) {
+ u32 max_window;
+
+ /* do not accept ACK for bytes we never sent. */
+ max_window = min_t(u64, tp->max_window, tp->bytes_acked);
/* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
- if (before(ack, prior_snd_una - tp->max_window)) {
+ if (before(ack, prior_snd_una - max_window)) {
if (!(flag & FLAG_NO_CHALLENGE_ACK))
tcp_send_challenge_ack(sk);
return -SKB_DROP_REASON_TCP_TOO_OLD_ACK;
@@ -4211,39 +4284,58 @@ static bool tcp_fast_parse_options(const struct net *net,
return true;
}
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
/*
- * Parse MD5 Signature option
+ * Parse Signature options
*/
-const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
+int tcp_do_parse_auth_options(const struct tcphdr *th,
+ const u8 **md5_hash, const u8 **ao_hash)
{
int length = (th->doff << 2) - sizeof(*th);
const u8 *ptr = (const u8 *)(th + 1);
+ unsigned int minlen = TCPOLEN_MD5SIG;
+
+ if (IS_ENABLED(CONFIG_TCP_AO))
+ minlen = sizeof(struct tcp_ao_hdr) + 1;
+
+ *md5_hash = NULL;
+ *ao_hash = NULL;
/* If not enough data remaining, we can short cut */
- while (length >= TCPOLEN_MD5SIG) {
+ while (length >= minlen) {
int opcode = *ptr++;
int opsize;
switch (opcode) {
case TCPOPT_EOL:
- return NULL;
+ return 0;
case TCPOPT_NOP:
length--;
continue;
default:
opsize = *ptr++;
if (opsize < 2 || opsize > length)
- return NULL;
- if (opcode == TCPOPT_MD5SIG)
- return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
+ return -EINVAL;
+ if (opcode == TCPOPT_MD5SIG) {
+ if (opsize != TCPOLEN_MD5SIG)
+ return -EINVAL;
+ if (unlikely(*md5_hash || *ao_hash))
+ return -EEXIST;
+ *md5_hash = ptr;
+ } else if (opcode == TCPOPT_AO) {
+ if (opsize <= sizeof(struct tcp_ao_hdr))
+ return -EINVAL;
+ if (unlikely(*md5_hash || *ao_hash))
+ return -EEXIST;
+ *ao_hash = ptr;
+ }
}
ptr += opsize - 2;
length -= opsize;
}
- return NULL;
+ return 0;
}
-EXPORT_SYMBOL(tcp_parse_md5sig_option);
+EXPORT_SYMBOL(tcp_do_parse_auth_options);
#endif
/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM
@@ -4269,6 +4361,23 @@ EXPORT_SYMBOL(tcp_parse_md5sig_option);
* up to bandwidth of 18Gigabit/sec. 8) ]
*/
+/* Estimates max number of increments of remote peer TSval in
+ * a replay window (based on our current RTO estimation).
+ */
+static u32 tcp_tsval_replay(const struct sock *sk)
+{
+ /* If we use usec TS resolution,
+ * then expect the remote peer to use the same resolution.
+ */
+ if (tcp_sk(sk)->tcp_usec_ts)
+ return inet_csk(sk)->icsk_rto * (USEC_PER_SEC / HZ);
+
+ /* RFC 7323 recommends a TSval clock between 1ms and 1sec.
+ * We know that some OS (including old linux) can use 1200 Hz.
+ */
+ return inet_csk(sk)->icsk_rto * 1200 / HZ;
+}
+
static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -4276,7 +4385,7 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
u32 seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
- return (/* 1. Pure ACK with correct sequence number. */
+ return /* 1. Pure ACK with correct sequence number. */
(th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
/* 2. ... and duplicate ACK. */
@@ -4286,7 +4395,8 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
!tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
/* 4. ... and sits in replay window. */
- (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
+ (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <=
+ tcp_tsval_replay(sk);
}
static inline bool tcp_paws_discard(const struct sock *sk,
@@ -4486,12 +4596,23 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
{
/* When the ACK path fails or drops most ACKs, the sender would
* timeout and spuriously retransmit the same segment repeatedly.
- * The receiver remembers and reflects via DSACKs. Leverage the
- * DSACK state and change the txhash to re-route speculatively.
+ * If it seems our ACKs are not reaching the other side,
+ * based on receiving a duplicate data segment with new flowlabel
+ * (suggesting the sender suffered an RTO), and we are not already
+ * repathing due to our own RTO, then rehash the socket to repath our
+ * packets.
*/
- if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
+#if IS_ENABLED(CONFIG_IPV6)
+ if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss &&
+ skb->protocol == htons(ETH_P_IPV6) &&
+ (tcp_sk(sk)->inet_conn.icsk_ack.lrcv_flowlabel !=
+ ntohl(ip6_flowlabel(ipv6_hdr(skb)))) &&
sk_rethink_txhash(sk))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
+
+ /* Save last flowlabel after a spurious retrans. */
+ tcp_save_lrcv_flowlabel(sk, skb);
+#endif
}
static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
@@ -4808,6 +4929,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
u32 seq, end_seq;
bool fragstolen;
+ tcp_save_lrcv_flowlabel(sk, skb);
tcp_ecn_check_ce(sk, skb);
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
@@ -5553,6 +5675,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
tcp_in_quickack_mode(sk) ||
/* Protocol state mandates a one-time immediate ACK */
inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) {
+ /* If we are running from __release_sock() in user context,
+ * Defer the ack until tcp_release_cb().
+ */
+ if (sock_owned_by_user_nocheck(sk) &&
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_backlog_ack_defer)) {
+ set_bit(TCP_ACK_DEFERRED, &sk->sk_tsq_flags);
+ return;
+ }
send_now:
tcp_send_ack(sk);
return;
@@ -6087,6 +6217,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ tcp_ao_finish_connect(sk, skb);
tcp_set_state(sk, TCP_ESTABLISHED);
icsk->icsk_ack.lrcvtime = tcp_jiffies32;
@@ -6235,7 +6366,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
- tcp_time_stamp(tp))) {
+ tcp_time_stamp_ts(tp))) {
NET_INC_STATS(sock_net(sk),
LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
@@ -6372,6 +6503,16 @@ consume:
* simultaneous connect with crossed SYNs.
* Particularly, it can be connect to self.
*/
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_info *ao;
+
+ ao = rcu_dereference_protected(tp->ao_info,
+ lockdep_sock_is_held(sk));
+ if (ao) {
+ WRITE_ONCE(ao->risn, th->seq);
+ ao->rcv_sne = 0;
+ }
+#endif
tcp_set_state(sk, TCP_SYN_RECV);
if (tp->rx_opt.saw_tstamp) {
@@ -6436,22 +6577,24 @@ reset_and_undo:
static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct request_sock *req;
/* If we are still handling the SYNACK RTO, see if timestamp ECR allows
* undo. If peer SACKs triggered fast recovery, we can't undo here.
*/
- if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
- tcp_try_undo_loss(sk, false);
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss && !tp->packets_out)
+ tcp_try_undo_recovery(sk);
/* Reset rtx states to prevent spurious retransmits_timed_out() */
- tcp_sk(sk)->retrans_stamp = 0;
+ tcp_update_rto_time(tp);
+ tp->retrans_stamp = 0;
inet_csk(sk)->icsk_retransmits = 0;
/* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
* we no longer need req so release it.
*/
- req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
+ req = rcu_dereference_protected(tp->fastopen_rsk,
lockdep_sock_is_held(sk));
reqsk_fastopen_remove(sk, req, false);
@@ -6582,6 +6725,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
skb);
WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
}
+ tcp_ao_established(sk);
smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
sk->sk_state_change(sk);
@@ -6958,6 +7102,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct flowi fl;
u8 syncookies;
+#ifdef CONFIG_TCP_AO
+ const struct tcp_ao_hdr *aoh;
+#endif
+
syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
/* TW buckets are converted to open requests without
@@ -6982,6 +7130,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
req->syncookie = want_cookie;
tcp_rsk(req)->af_specific = af_ops;
tcp_rsk(req)->ts_off = 0;
+ tcp_rsk(req)->req_usec_ts = false;
#if IS_ENABLED(CONFIG_MPTCP)
tcp_rsk(req)->is_mptcp = 0;
#endif
@@ -7009,9 +7158,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (!dst)
goto drop_and_free;
- if (tmp_opt.tstamp_ok)
+ if (tmp_opt.tstamp_ok) {
+ tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst);
tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
-
+ }
if (!want_cookie && !isn) {
int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
@@ -7043,6 +7193,18 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
inet_rsk(req)->ecn_ok = 0;
}
+#ifdef CONFIG_TCP_AO
+ if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
+ goto drop_and_release; /* Invalid TCP options */
+ if (aoh) {
+ tcp_rsk(req)->used_tcp_ao = true;
+ tcp_rsk(req)->ao_rcv_next = aoh->keyid;
+ tcp_rsk(req)->ao_keyid = aoh->rnext_keyid;
+
+ } else {
+ tcp_rsk(req)->used_tcp_ao = false;
+ }
+#endif
tcp_rsk(req)->snt_isn = isn;
tcp_rsk(req)->txhash = net_tx_rndhash();
tcp_rsk(req)->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 27140e5cdc06..0c50c5a32b84 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -194,7 +194,7 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
sock_owned_by_me(sk);
- return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, &addr_len);
}
/* This will initiate an outgoing connection. */
@@ -296,6 +296,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
rt = NULL;
goto failure;
}
+ tp->tcp_usec_ts = dst_tcp_usec_ts(&rt->dst);
/* OK, now commit destination to socket. */
sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->dst);
@@ -493,6 +494,8 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
return -ENOENT;
}
if (sk->sk_state == TCP_TIME_WAIT) {
+ /* To increase the counter of ignored icmps for TCP-AO */
+ tcp_ao_ignore_icmp(sk, AF_INET, type, code);
inet_twsk_put(inet_twsk(sk));
return 0;
}
@@ -506,6 +509,11 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
return 0;
}
+ if (tcp_ao_ignore_icmp(sk, AF_INET, type, code)) {
+ sock_put(sk);
+ return 0;
+ }
+
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
* servers this needs to be solved differently.
@@ -656,6 +664,52 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_v4_send_check);
+#define REPLY_OPTIONS_LEN (MAX_TCP_OPTION_SPACE / sizeof(__be32))
+
+static bool tcp_v4_ao_sign_reset(const struct sock *sk, struct sk_buff *skb,
+ const struct tcp_ao_hdr *aoh,
+ struct ip_reply_arg *arg, struct tcphdr *reply,
+ __be32 reply_options[REPLY_OPTIONS_LEN])
+{
+#ifdef CONFIG_TCP_AO
+ int sdif = tcp_v4_sdif(skb);
+ int dif = inet_iif(skb);
+ int l3index = sdif ? dif : 0;
+ bool allocated_traffic_key;
+ struct tcp_ao_key *key;
+ char *traffic_key;
+ bool drop = true;
+ u32 ao_sne = 0;
+ u8 keyid;
+
+ rcu_read_lock();
+ if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, ntohl(reply->seq),
+ &key, &traffic_key, &allocated_traffic_key,
+ &keyid, &ao_sne))
+ goto out;
+
+ reply_options[0] = htonl((TCPOPT_AO << 24) | (tcp_ao_len(key) << 16) |
+ (aoh->rnext_keyid << 8) | keyid);
+ arg->iov[0].iov_len += tcp_ao_len_aligned(key);
+ reply->doff = arg->iov[0].iov_len / 4;
+
+ if (tcp_ao_hash_hdr(AF_INET, (char *)&reply_options[1],
+ key, traffic_key,
+ (union tcp_ao_addr *)&ip_hdr(skb)->saddr,
+ (union tcp_ao_addr *)&ip_hdr(skb)->daddr,
+ reply, ao_sne))
+ goto out;
+ drop = false;
+out:
+ rcu_read_unlock();
+ if (allocated_traffic_key)
+ kfree(traffic_key);
+ return drop;
+#else
+ return true;
+#endif
+}
+
/*
* This routine will send an RST to the other tcp.
*
@@ -669,26 +723,21 @@ EXPORT_SYMBOL(tcp_v4_send_check);
* Exception: precedence violation. We do not implement it in any case.
*/
-#ifdef CONFIG_TCP_MD5SIG
-#define OPTION_BYTES TCPOLEN_MD5SIG_ALIGNED
-#else
-#define OPTION_BYTES sizeof(__be32)
-#endif
-
static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
{
const struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
- __be32 opt[OPTION_BYTES / sizeof(__be32)];
+ __be32 opt[REPLY_OPTIONS_LEN];
} rep;
+ const __u8 *md5_hash_location = NULL;
+ const struct tcp_ao_hdr *aoh;
struct ip_reply_arg arg;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key = NULL;
- const __u8 *hash_location = NULL;
unsigned char newhash[16];
- int genhash;
struct sock *sk1 = NULL;
+ int genhash;
#endif
u64 transmit_time = 0;
struct sock *ctl_sk;
@@ -725,9 +774,16 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
arg.iov[0].iov_len = sizeof(rep.th);
net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
+
+ /* Invalid TCP option size or twice included auth */
+ if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh))
+ return;
+
+ if (aoh && tcp_v4_ao_sign_reset(sk, skb, aoh, &arg, &rep.th, rep.opt))
+ return;
+
#ifdef CONFIG_TCP_MD5SIG
rcu_read_lock();
- hash_location = tcp_parse_md5sig_option(th);
if (sk && sk_fullsock(sk)) {
const union tcp_md5_addr *addr;
int l3index;
@@ -738,7 +794,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0;
addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
- } else if (hash_location) {
+ } else if (md5_hash_location) {
const union tcp_md5_addr *addr;
int sdif = tcp_v4_sdif(skb);
int dif = inet_iif(skb);
@@ -770,7 +826,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
- if (genhash || memcmp(hash_location, newhash, 16) != 0)
+ if (genhash || memcmp(md5_hash_location, newhash, 16) != 0)
goto out;
}
@@ -828,7 +884,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark;
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_priority : sk->sk_priority;
+ inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
transmit_time = tcp_transmit_time(sk);
xfrm_sk_clone_policy(ctl_sk, sk);
txhash = (sk->sk_state == TCP_TIME_WAIT) ?
@@ -862,17 +918,13 @@ out:
static void tcp_v4_send_ack(const struct sock *sk,
struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr, int oif,
- struct tcp_md5sig_key *key,
+ struct tcp_key *key,
int reply_flags, u8 tos, u32 txhash)
{
const struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
- __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
-#ifdef CONFIG_TCP_MD5SIG
- + (TCPOLEN_MD5SIG_ALIGNED >> 2)
-#endif
- ];
+ __be32 opt[(MAX_TCP_OPTION_SPACE >> 2)];
} rep;
struct net *net = sock_net(sk);
struct ip_reply_arg arg;
@@ -903,7 +955,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
rep.th.window = htons(win);
#ifdef CONFIG_TCP_MD5SIG
- if (key) {
+ if (tcp_key_is_md5(key)) {
int offset = (tsecr) ? 3 : 0;
rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
@@ -914,10 +966,28 @@ static void tcp_v4_send_ack(const struct sock *sk,
rep.th.doff = arg.iov[0].iov_len/4;
tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
- key, ip_hdr(skb)->saddr,
+ key->md5_key, ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr, &rep.th);
}
#endif
+#ifdef CONFIG_TCP_AO
+ if (tcp_key_is_ao(key)) {
+ int offset = (tsecr) ? 3 : 0;
+
+ rep.opt[offset++] = htonl((TCPOPT_AO << 24) |
+ (tcp_ao_len(key->ao_key) << 16) |
+ (key->ao_key->sndid << 8) |
+ key->rcv_next);
+ arg.iov[0].iov_len += tcp_ao_len_aligned(key->ao_key);
+ rep.th.doff = arg.iov[0].iov_len / 4;
+
+ tcp_ao_hash_hdr(AF_INET, (char *)&rep.opt[offset],
+ key->ao_key, key->traffic_key,
+ (union tcp_ao_addr *)&ip_hdr(skb)->saddr,
+ (union tcp_ao_addr *)&ip_hdr(skb)->daddr,
+ &rep.th, key->sne);
+ }
+#endif
arg.flags = reply_flags;
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr, /* XXX */
@@ -950,18 +1020,53 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
{
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+ struct tcp_key key = {};
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_info *ao_info;
+
+ if (static_branch_unlikely(&tcp_ao_needed.key)) {
+ /* FIXME: the segment to-be-acked is not verified yet */
+ ao_info = rcu_dereference(tcptw->ao_info);
+ if (ao_info) {
+ const struct tcp_ao_hdr *aoh;
+
+ if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) {
+ inet_twsk_put(tw);
+ return;
+ }
+
+ if (aoh)
+ key.ao_key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1);
+ }
+ }
+ if (key.ao_key) {
+ struct tcp_ao_key *rnext_key;
+
+ key.traffic_key = snd_other_key(key.ao_key);
+ key.sne = READ_ONCE(ao_info->snd_sne);
+ rnext_key = READ_ONCE(ao_info->rnext_key);
+ key.rcv_next = rnext_key->rcvid;
+ key.type = TCP_KEY_AO;
+#else
+ if (0) {
+#endif
+#ifdef CONFIG_TCP_MD5SIG
+ } else if (static_branch_unlikely(&tcp_md5_needed.key)) {
+ key.md5_key = tcp_twsk_md5_key(tcptw);
+ if (key.md5_key)
+ key.type = TCP_KEY_MD5;
+#endif
+ }
tcp_v4_send_ack(sk, skb,
tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
- tcp_time_stamp_raw() + tcptw->tw_ts_offset,
+ tcp_tw_tsval(tcptw),
tcptw->tw_ts_recent,
- tw->tw_bound_dev_if,
- tcp_twsk_md5_key(tcptw),
+ tw->tw_bound_dev_if, &key,
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
tw->tw_tos,
- tw->tw_txhash
- );
+ tw->tw_txhash);
inet_twsk_put(tw);
}
@@ -969,8 +1074,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
- const union tcp_md5_addr *addr;
- int l3index;
+ struct tcp_key key = {};
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
@@ -978,23 +1082,77 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
tcp_sk(sk)->snd_nxt;
+#ifdef CONFIG_TCP_AO
+ if (static_branch_unlikely(&tcp_ao_needed.key) &&
+ tcp_rsk_used_ao(req)) {
+ const union tcp_md5_addr *addr;
+ const struct tcp_ao_hdr *aoh;
+ int l3index;
+
+ /* Invalid TCP option size or twice included auth */
+ if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
+ return;
+ if (!aoh)
+ return;
+
+ addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
+ l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0;
+ key.ao_key = tcp_ao_do_lookup(sk, l3index, addr, AF_INET,
+ aoh->rnext_keyid, -1);
+ if (unlikely(!key.ao_key)) {
+ /* Send ACK with any matching MKT for the peer */
+ key.ao_key = tcp_ao_do_lookup(sk, l3index, addr, AF_INET, -1, -1);
+ /* Matching key disappeared (user removed the key?)
+ * let the handshake timeout.
+ */
+ if (!key.ao_key) {
+ net_info_ratelimited("TCP-AO key for (%pI4, %d)->(%pI4, %d) suddenly disappeared, won't ACK new connection\n",
+ addr,
+ ntohs(tcp_hdr(skb)->source),
+ &ip_hdr(skb)->daddr,
+ ntohs(tcp_hdr(skb)->dest));
+ return;
+ }
+ }
+ key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC);
+ if (!key.traffic_key)
+ return;
+
+ key.type = TCP_KEY_AO;
+ key.rcv_next = aoh->keyid;
+ tcp_v4_ao_calc_key_rsk(key.ao_key, key.traffic_key, req);
+#else
+ if (0) {
+#endif
+#ifdef CONFIG_TCP_MD5SIG
+ } else if (static_branch_unlikely(&tcp_md5_needed.key)) {
+ const union tcp_md5_addr *addr;
+ int l3index;
+
+ addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
+ l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0;
+ key.md5_key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
+ if (key.md5_key)
+ key.type = TCP_KEY_MD5;
+#endif
+ }
+
/* RFC 7323 2.3
* The window field (SEG.WND) of every outgoing segment, with the
* exception of <SYN> segments, MUST be right-shifted by
* Rcv.Wind.Shift bits:
*/
- addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
- l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0;
tcp_v4_send_ack(sk, skb, seq,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
- tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
+ tcp_rsk_tsval(tcp_rsk(req)),
READ_ONCE(req->ts_recent),
- 0,
- tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
+ 0, &key,
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos,
READ_ONCE(tcp_rsk(req)->txhash));
+ if (tcp_key_is_ao(&key))
+ kfree(key.traffic_key);
}
/*
@@ -1024,10 +1182,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
- tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
- (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
- (inet_sk(sk)->tos & INET_ECN_MASK) :
- inet_sk(sk)->tos;
+ tos = READ_ONCE(inet_sk(sk)->tos);
+
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
+ tos = (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+ (tos & INET_ECN_MASK);
if (!INET_ECN_is_capable(tos) &&
tcp_bpf_ca_needs_ecn((struct sock *)req))
@@ -1080,7 +1239,7 @@ static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *
/* Find the Key structure for an address. */
struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr,
- int family)
+ int family, bool any_l3index)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
@@ -1099,7 +1258,8 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
lockdep_sock_is_held(sk)) {
if (key->family != family)
continue;
- if (key->flags & TCP_MD5SIG_FLAG_IFINDEX && key->l3index != l3index)
+ if (!any_l3index && key->flags & TCP_MD5SIG_FLAG_IFINDEX &&
+ key->l3index != l3index)
continue;
if (family == AF_INET) {
mask = inet_make_mask(key->prefixlen);
@@ -1219,10 +1379,6 @@ static int __tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO);
if (!key)
return -ENOMEM;
- if (!tcp_alloc_md5sig_pool()) {
- sock_kfree_s(sk, key, sizeof(*key));
- return -ENOMEM;
- }
memcpy(key->key, newkey, newkeylen);
key->keylen = newkeylen;
@@ -1244,15 +1400,21 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
struct tcp_sock *tp = tcp_sk(sk);
if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) {
- if (tcp_md5sig_info_add(sk, GFP_KERNEL))
+ if (tcp_md5_alloc_sigpool())
return -ENOMEM;
+ if (tcp_md5sig_info_add(sk, GFP_KERNEL)) {
+ tcp_md5_release_sigpool();
+ return -ENOMEM;
+ }
+
if (!static_branch_inc(&tcp_md5_needed.key)) {
struct tcp_md5sig_info *md5sig;
md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk));
rcu_assign_pointer(tp->md5sig_info, NULL);
kfree_rcu(md5sig, rcu);
+ tcp_md5_release_sigpool();
return -EUSERS;
}
}
@@ -1269,8 +1431,12 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr,
struct tcp_sock *tp = tcp_sk(sk);
if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) {
- if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC)))
+ tcp_md5_add_sigpool();
+
+ if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC))) {
+ tcp_md5_release_sigpool();
return -ENOMEM;
+ }
if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) {
struct tcp_md5sig_info *md5sig;
@@ -1279,6 +1445,7 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr,
net_warn_ratelimited("Too many TCP-MD5 keys in the system\n");
rcu_assign_pointer(tp->md5sig_info, NULL);
kfree_rcu(md5sig, rcu);
+ tcp_md5_release_sigpool();
return -EUSERS;
}
}
@@ -1304,7 +1471,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
}
EXPORT_SYMBOL(tcp_md5_do_del);
-static void tcp_clear_md5_list(struct sock *sk)
+void tcp_clear_md5_list(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
@@ -1328,6 +1495,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
const union tcp_md5_addr *addr;
u8 prefixlen = 32;
int l3index = 0;
+ bool l3flag;
u8 flags;
if (optlen < sizeof(cmd))
@@ -1340,6 +1508,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
return -EINVAL;
flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
+ l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
if (optname == TCP_MD5SIG_EXT &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
@@ -1374,11 +1543,17 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
+ /* Don't allow keys for peers that have a matching TCP-AO key.
+ * See the comment in tcp_ao_add_cmd()
+ */
+ if (tcp_ao_required(sk, addr, AF_INET, l3flag ? l3index : -1, false))
+ return -EKEYREJECTED;
+
return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags,
cmd.tcpm_key, cmd.tcpm_keylen);
}
-static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
+static int tcp_v4_md5_hash_headers(struct tcp_sigpool *hp,
__be32 daddr, __be32 saddr,
const struct tcphdr *th, int nbytes)
{
@@ -1398,38 +1573,35 @@ static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
_th->check = 0;
sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
- ahash_request_set_crypt(hp->md5_req, &sg, NULL,
+ ahash_request_set_crypt(hp->req, &sg, NULL,
sizeof(*bp) + sizeof(*th));
- return crypto_ahash_update(hp->md5_req);
+ return crypto_ahash_update(hp->req);
}
static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
__be32 daddr, __be32 saddr, const struct tcphdr *th)
{
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
+ struct tcp_sigpool hp;
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
+ if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
+ goto clear_hash_nostart;
- if (crypto_ahash_init(req))
+ if (crypto_ahash_init(hp.req))
goto clear_hash;
- if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
+ if (tcp_v4_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2))
goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
+ if (tcp_md5_hash_key(&hp, key))
goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
+ ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(hp.req))
goto clear_hash;
- tcp_put_md5sig_pool();
+ tcp_sigpool_end(&hp);
return 0;
clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
+ tcp_sigpool_end(&hp);
+clear_hash_nostart:
memset(md5_hash, 0, 16);
return 1;
}
@@ -1438,9 +1610,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
const struct sock *sk,
const struct sk_buff *skb)
{
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
const struct tcphdr *th = tcp_hdr(skb);
+ struct tcp_sigpool hp;
__be32 saddr, daddr;
if (sk) { /* valid for establish/request sockets */
@@ -1452,30 +1623,28 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
daddr = iph->daddr;
}
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
+ if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
+ goto clear_hash_nostart;
- if (crypto_ahash_init(req))
+ if (crypto_ahash_init(hp.req))
goto clear_hash;
- if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
+ if (tcp_v4_md5_hash_headers(&hp, daddr, saddr, th, skb->len))
goto clear_hash;
- if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2))
goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
+ if (tcp_md5_hash_key(&hp, key))
goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
+ ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(hp.req))
goto clear_hash;
- tcp_put_md5sig_pool();
+ tcp_sigpool_end(&hp);
return 0;
clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
+ tcp_sigpool_end(&hp);
+clear_hash_nostart:
memset(md5_hash, 0, 16);
return 1;
}
@@ -1524,6 +1693,11 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.req_md5_lookup = tcp_v4_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
#endif
+#ifdef CONFIG_TCP_AO
+ .ao_lookup = tcp_v4_ao_lookup_rsk,
+ .ao_calc_key = tcp_v4_ao_calc_key_rsk,
+ .ao_synack_hash = tcp_v4_ao_synack_hash,
+#endif
#ifdef CONFIG_SYN_COOKIES
.cookie_init_seq = cookie_v4_init_sequence,
#endif
@@ -1625,12 +1799,16 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
/* Copy over the MD5 key from the original socket */
addr = (union tcp_md5_addr *)&newinet->inet_daddr;
key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
- if (key) {
+ if (key && !tcp_rsk_used_ao(req)) {
if (tcp_md5_key_copy(newsk, addr, AF_INET, 32, l3index, key))
goto put_and_exit;
sk_gso_disable(newsk);
}
#endif
+#ifdef CONFIG_TCP_AO
+ if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET))
+ goto put_and_exit; /* OOM, release back memory */
+#endif
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
@@ -1869,6 +2047,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
#ifdef CONFIG_TLS_DEVICE
tail->decrypted != skb->decrypted ||
#endif
+ !mptcp_skb_can_collapse(tail, skb) ||
thtail->doff != th->doff ||
memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
goto no_coalesce;
@@ -2040,9 +2219,9 @@ process:
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
drop_reason = SKB_DROP_REASON_XFRM_POLICY;
else
- drop_reason = tcp_inbound_md5_hash(sk, skb,
- &iph->saddr, &iph->daddr,
- AF_INET, dif, sdif);
+ drop_reason = tcp_inbound_hash(sk, req, skb,
+ &iph->saddr, &iph->daddr,
+ AF_INET, dif, sdif);
if (unlikely(drop_reason)) {
sk_drops_add(sk, skb);
reqsk_put(req);
@@ -2119,8 +2298,8 @@ process:
goto discard_and_relse;
}
- drop_reason = tcp_inbound_md5_hash(sk, skb, &iph->saddr,
- &iph->daddr, AF_INET, dif, sdif);
+ drop_reason = tcp_inbound_hash(sk, NULL, skb, &iph->saddr, &iph->daddr,
+ AF_INET, dif, sdif);
if (drop_reason)
goto discard_and_relse;
@@ -2267,11 +2446,19 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
};
EXPORT_SYMBOL(ipv4_specific);
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
+#ifdef CONFIG_TCP_MD5SIG
.md5_lookup = tcp_v4_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
.md5_parse = tcp_v4_parse_md5_keys,
+#endif
+#ifdef CONFIG_TCP_AO
+ .ao_lookup = tcp_v4_ao_lookup,
+ .calc_ao_hash = tcp_v4_ao_hash_skb,
+ .ao_parse = tcp_v4_parse_ao,
+ .ao_calc_key_sk = tcp_v4_ao_calc_key_sk,
+#endif
};
#endif
@@ -2286,13 +2473,25 @@ static int tcp_v4_init_sock(struct sock *sk)
icsk->icsk_af_ops = &ipv4_specific;
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
#endif
return 0;
}
+#ifdef CONFIG_TCP_MD5SIG
+static void tcp_md5sig_info_free_rcu(struct rcu_head *head)
+{
+ struct tcp_md5sig_info *md5sig;
+
+ md5sig = container_of(head, struct tcp_md5sig_info, rcu);
+ kfree(md5sig);
+ static_branch_slow_dec_deferred(&tcp_md5_needed);
+ tcp_md5_release_sigpool();
+}
+#endif
+
void tcp_v4_destroy_sock(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2317,12 +2516,15 @@ void tcp_v4_destroy_sock(struct sock *sk)
#ifdef CONFIG_TCP_MD5SIG
/* Clean up the MD5 key list, if any */
if (tp->md5sig_info) {
+ struct tcp_md5sig_info *md5sig;
+
+ md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
tcp_clear_md5_list(sk);
- kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
- tp->md5sig_info = NULL;
- static_branch_slow_dec_deferred(&tcp_md5_needed);
+ call_rcu(&md5sig->rcu, tcp_md5sig_info_free_rcu);
+ rcu_assign_pointer(tp->md5sig_info, NULL);
}
#endif
+ tcp_ao_destroy_sock(sk, false);
/* Clean up a referenced TCP bind bucket. */
if (inet_csk(sk)->icsk_bind_hash)
@@ -3263,6 +3465,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
net->ipv4.sysctl_tcp_comp_sack_nr = 44;
+ net->ipv4.sysctl_tcp_backlog_ack_defer = 1;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
atomic_set(&net->ipv4.tfo_active_disable_times, 0);
@@ -3286,6 +3489,8 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_syn_linear_timeouts = 4;
net->ipv4.sysctl_tcp_shrink_window = 0;
+ net->ipv4.sysctl_tcp_pingpong_thresh = 1;
+
return 0;
}
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index ae36780977d2..52fe17167460 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -272,7 +272,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
{
struct tcp_sock *tp = tcp_sk(sk);
struct lp *lp = inet_csk_ca(sk);
- u32 now = tcp_time_stamp(tp);
+ u32 now = tcp_time_stamp_ts(tp);
u32 delta;
if (sample->rtt_us > 0)
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c196759f1d3b..c2a925538542 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -470,11 +470,15 @@ void tcp_init_metrics(struct sock *sk)
u32 val, crtt = 0; /* cached RTT scaled by 8 */
sk_dst_confirm(sk);
+ /* ssthresh may have been reduced unnecessarily during.
+ * 3WHS. Restore it back to its initial default.
+ */
+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
if (!dst)
goto reset;
rcu_read_lock();
- tm = tcp_get_metrics(sk, dst, true);
+ tm = tcp_get_metrics(sk, dst, false);
if (!tm) {
rcu_read_unlock();
goto reset;
@@ -489,11 +493,6 @@ void tcp_init_metrics(struct sock *sk)
tp->snd_ssthresh = val;
if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
tp->snd_ssthresh = tp->snd_cwnd_clamp;
- } else {
- /* ssthresh may have been reduced unnecessarily during.
- * 3WHS. Restore it back to its initial default.
- */
- tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
}
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
if (val && tp->reordering != val)
@@ -899,22 +898,25 @@ static void tcp_metrics_flush_all(struct net *net)
unsigned int row;
for (row = 0; row < max_rows; row++, hb++) {
- struct tcp_metrics_block __rcu **pp;
+ struct tcp_metrics_block __rcu **pp = &hb->chain;
bool match;
+ if (!rcu_access_pointer(*pp))
+ continue;
+
spin_lock_bh(&tcp_metrics_lock);
- pp = &hb->chain;
for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
match = net ? net_eq(tm_net(tm), net) :
!refcount_read(&tm_net(tm)->ns.count);
if (match) {
- *pp = tm->tcpm_next;
+ rcu_assign_pointer(*pp, tm->tcpm_next);
kfree_rcu(tm, rcu_head);
} else {
pp = &tm->tcpm_next;
}
}
spin_unlock_bh(&tcp_metrics_lock);
+ cond_resched();
}
}
@@ -949,7 +951,7 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info)
if (addr_same(&tm->tcpm_daddr, &daddr) &&
(!src || addr_same(&tm->tcpm_saddr, &saddr)) &&
net_eq(tm_net(tm), net)) {
- *pp = tm->tcpm_next;
+ rcu_assign_pointer(*pp, tm->tcpm_next);
kfree_rcu(tm, rcu_head);
found = true;
} else {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b98d476f1594..9e85f2a0bddd 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -51,6 +51,18 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw,
return TCP_TW_SUCCESS;
}
+static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq)
+{
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_info *ao;
+
+ ao = rcu_dereference(tcptw->ao_info);
+ if (unlikely(ao && seq < tcptw->tw_rcv_nxt))
+ WRITE_ONCE(ao->rcv_sne, ao->rcv_sne + 1);
+#endif
+ tcptw->tw_rcv_nxt = seq;
+}
+
/*
* * Main purpose of TIME-WAIT state is to close connection gracefully,
* when one of ends sits in LAST-ACK or CLOSING retransmitting FIN
@@ -136,7 +148,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
/* FIN arrived, enter true time-wait state. */
tw->tw_substate = TCP_TIME_WAIT;
- tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq);
+
if (tmp_opt.saw_tstamp) {
tcptw->tw_ts_recent_stamp = ktime_get_seconds();
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
@@ -261,10 +274,9 @@ static void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw)
tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
if (!tcptw->tw_md5_key)
return;
- if (!tcp_alloc_md5sig_pool())
- goto out_free;
if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key))
goto out_free;
+ tcp_md5_add_sigpool();
}
return;
out_free:
@@ -280,7 +292,7 @@ out_free:
void tcp_time_wait(struct sock *sk, int state, int timeo)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- const struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
struct inet_timewait_sock *tw;
@@ -292,7 +304,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_transparent = inet_test_bit(TRANSPARENT, sk);
tw->tw_mark = sk->sk_mark;
- tw->tw_priority = sk->sk_priority;
+ tw->tw_priority = READ_ONCE(sk->sk_priority);
tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
tcptw->tw_rcv_nxt = tp->rcv_nxt;
tcptw->tw_snd_nxt = tp->snd_nxt;
@@ -300,6 +312,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcptw->tw_ts_recent = tp->rx_opt.ts_recent;
tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
tcptw->tw_ts_offset = tp->tsoffset;
+ tw->tw_usec_ts = tp->tcp_usec_ts;
tcptw->tw_last_oow_ack_time = 0;
tcptw->tw_tx_delay = tp->tcp_tx_delay;
tw->tw_txhash = sk->sk_txhash;
@@ -316,6 +329,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
#endif
tcp_time_wait_init(sk, tcptw);
+ tcp_ao_time_wait(tcptw, tp);
/* Get the TIME_WAIT timeout firing. */
if (timeo < rto)
@@ -348,18 +362,29 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
}
EXPORT_SYMBOL(tcp_time_wait);
+#ifdef CONFIG_TCP_MD5SIG
+static void tcp_md5_twsk_free_rcu(struct rcu_head *head)
+{
+ struct tcp_md5sig_key *key;
+
+ key = container_of(head, struct tcp_md5sig_key, rcu);
+ kfree(key);
+ static_branch_slow_dec_deferred(&tcp_md5_needed);
+ tcp_md5_release_sigpool();
+}
+#endif
+
void tcp_twsk_destructor(struct sock *sk)
{
#ifdef CONFIG_TCP_MD5SIG
if (static_branch_unlikely(&tcp_md5_needed.key)) {
struct tcp_timewait_sock *twsk = tcp_twsk(sk);
- if (twsk->tw_md5_key) {
- kfree_rcu(twsk->tw_md5_key, rcu);
- static_branch_slow_dec_deferred(&tcp_md5_needed);
- }
+ if (twsk->tw_md5_key)
+ call_rcu(&twsk->tw_md5_key->rcu, tcp_md5_twsk_free_rcu);
}
#endif
+ tcp_ao_destroy_sock(sk, true);
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
@@ -494,6 +519,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
const struct tcp_sock *oldtp;
struct tcp_sock *newtp;
u32 seq;
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_key *ao_key;
+#endif
if (!newsk)
return NULL;
@@ -554,22 +582,41 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->max_window = newtp->snd_wnd;
if (newtp->rx_opt.tstamp_ok) {
+ newtp->tcp_usec_ts = treq->req_usec_ts;
newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent);
newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
} else {
+ newtp->tcp_usec_ts = 0;
newtp->rx_opt.ts_recent_stamp = 0;
newtp->tcp_header_len = sizeof(struct tcphdr);
}
if (req->num_timeout) {
+ newtp->total_rto = req->num_timeout;
newtp->undo_marker = treq->snt_isn;
- newtp->retrans_stamp = div_u64(treq->snt_synack,
- USEC_PER_SEC / TCP_TS_HZ);
+ if (newtp->tcp_usec_ts) {
+ newtp->retrans_stamp = treq->snt_synack;
+ newtp->total_rto_time = (u32)(tcp_clock_us() -
+ newtp->retrans_stamp) / USEC_PER_MSEC;
+ } else {
+ newtp->retrans_stamp = div_u64(treq->snt_synack,
+ USEC_PER_SEC / TCP_TS_HZ);
+ newtp->total_rto_time = tcp_clock_ms() -
+ newtp->retrans_stamp;
+ }
+ newtp->total_rto_recoveries = 1;
}
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
#endif
+#ifdef CONFIG_TCP_AO
+ newtp->ao_info = NULL;
+ ao_key = treq->af_specific->ao_lookup(sk, req,
+ tcp_rsk(req)->ao_keyid, -1);
+ if (ao_key)
+ newtp->tcp_header_len += tcp_ao_len_aligned(ao_key);
+ #endif
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e6b4fbd642f7..e3167ad96567 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -170,15 +170,14 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
tp->lsndtime = now;
/* If it is a reply for ato after last received
- * packet, enter pingpong mode.
+ * packet, increase pingpong count.
*/
if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
- inet_csk_enter_pingpong_mode(sk);
+ inet_csk_inc_pingpong_cnt(sk);
}
/* Account for an ACK we sent. */
-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
- u32 rcv_nxt)
+static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -192,7 +191,7 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
if (unlikely(rcv_nxt != tp->rcv_nxt))
return; /* Special ACK sent by DCTCP to reflect ECN */
- tcp_dec_quickack_mode(sk, pkts);
+ tcp_dec_quickack_mode(sk);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
@@ -423,6 +422,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
#define OPTION_FAST_OPEN_COOKIE BIT(8)
#define OPTION_SMC BIT(9)
#define OPTION_MPTCP BIT(10)
+#define OPTION_AO BIT(11)
static void smc_options_write(__be32 *ptr, u16 *options)
{
@@ -601,6 +601,44 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
}
#endif
+static __be32 *process_tcp_ao_options(struct tcp_sock *tp,
+ const struct tcp_request_sock *tcprsk,
+ struct tcp_out_options *opts,
+ struct tcp_key *key, __be32 *ptr)
+{
+#ifdef CONFIG_TCP_AO
+ u8 maclen = tcp_ao_maclen(key->ao_key);
+
+ if (tcprsk) {
+ u8 aolen = maclen + sizeof(struct tcp_ao_hdr);
+
+ *ptr++ = htonl((TCPOPT_AO << 24) | (aolen << 16) |
+ (tcprsk->ao_keyid << 8) |
+ (tcprsk->ao_rcv_next));
+ } else {
+ struct tcp_ao_key *rnext_key;
+ struct tcp_ao_info *ao_info;
+
+ ao_info = rcu_dereference_check(tp->ao_info,
+ lockdep_sock_is_held(&tp->inet_conn.icsk_inet.sk));
+ rnext_key = READ_ONCE(ao_info->rnext_key);
+ if (WARN_ON_ONCE(!rnext_key))
+ return ptr;
+ *ptr++ = htonl((TCPOPT_AO << 24) |
+ (tcp_ao_len(key->ao_key) << 16) |
+ (key->ao_key->sndid << 8) |
+ (rnext_key->rcvid));
+ }
+ opts->hash_location = (__u8 *)ptr;
+ ptr += maclen / sizeof(*ptr);
+ if (unlikely(maclen % sizeof(*ptr))) {
+ memset(ptr, TCPOPT_NOP, sizeof(*ptr));
+ ptr++;
+ }
+#endif
+ return ptr;
+}
+
/* Write previously computed TCP options to the packet.
*
* Beware: Something in the Internet is very sensitive to the ordering of
@@ -615,19 +653,22 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
* (but it may well be that other scenarios fail similarly).
*/
static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
- struct tcp_out_options *opts)
+ const struct tcp_request_sock *tcprsk,
+ struct tcp_out_options *opts,
+ struct tcp_key *key)
{
__be32 *ptr = (__be32 *)(th + 1);
u16 options = opts->options; /* mungable copy */
- if (unlikely(OPTION_MD5 & options)) {
+ if (tcp_key_is_md5(key)) {
*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
/* overload cookie hash location */
opts->hash_location = (__u8 *)ptr;
ptr += 4;
+ } else if (tcp_key_is_ao(key)) {
+ ptr = process_tcp_ao_options(tp, tcprsk, opts, key, ptr);
}
-
if (unlikely(opts->mss)) {
*ptr++ = htonl((TCPOPT_MSS << 24) |
(TCPOLEN_MSS << 16) |
@@ -768,23 +809,25 @@ static void mptcp_set_option_cond(const struct request_sock *req,
*/
static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
- struct tcp_md5sig_key **md5)
+ struct tcp_key *key)
{
struct tcp_sock *tp = tcp_sk(sk);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
struct tcp_fastopen_request *fastopen = tp->fastopen_req;
+ bool timestamps;
- *md5 = NULL;
-#ifdef CONFIG_TCP_MD5SIG
- if (static_branch_unlikely(&tcp_md5_needed.key) &&
- rcu_access_pointer(tp->md5sig_info)) {
- *md5 = tp->af_specific->md5_lookup(sk, sk);
- if (*md5) {
- opts->options |= OPTION_MD5;
- remaining -= TCPOLEN_MD5SIG_ALIGNED;
+ /* Better than switch (key.type) as it has static branches */
+ if (tcp_key_is_md5(key)) {
+ timestamps = false;
+ opts->options |= OPTION_MD5;
+ remaining -= TCPOLEN_MD5SIG_ALIGNED;
+ } else {
+ timestamps = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps);
+ if (tcp_key_is_ao(key)) {
+ opts->options |= OPTION_AO;
+ remaining -= tcp_ao_len_aligned(key->ao_key);
}
}
-#endif
/* We always get an MSS option. The option bytes which will be seen in
* normal data packets should timestamps be used, must be in the MSS
@@ -798,9 +841,9 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
- if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) {
+ if (likely(timestamps)) {
opts->options |= OPTION_TS;
- opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
+ opts->tsval = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
@@ -851,7 +894,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
struct request_sock *req,
unsigned int mss, struct sk_buff *skb,
struct tcp_out_options *opts,
- const struct tcp_md5sig_key *md5,
+ const struct tcp_key *key,
struct tcp_fastopen_cookie *foc,
enum tcp_synack_type synack_type,
struct sk_buff *syn_skb)
@@ -859,8 +902,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
struct inet_request_sock *ireq = inet_rsk(req);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
-#ifdef CONFIG_TCP_MD5SIG
- if (md5) {
+ if (tcp_key_is_md5(key)) {
opts->options |= OPTION_MD5;
remaining -= TCPOLEN_MD5SIG_ALIGNED;
@@ -871,8 +913,11 @@ static unsigned int tcp_synack_options(const struct sock *sk,
*/
if (synack_type != TCP_SYNACK_COOKIE)
ireq->tstamp_ok &= !ireq->sack_ok;
+ } else if (tcp_key_is_ao(key)) {
+ opts->options |= OPTION_AO;
+ remaining -= tcp_ao_len_aligned(key->ao_key);
+ ireq->tstamp_ok &= !ireq->sack_ok;
}
-#endif
/* We always send an MSS option. */
opts->mss = mss;
@@ -885,7 +930,8 @@ static unsigned int tcp_synack_options(const struct sock *sk,
}
if (likely(ireq->tstamp_ok)) {
opts->options |= OPTION_TS;
- opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
+ opts->tsval = tcp_skb_timestamp_ts(tcp_rsk(req)->req_usec_ts, skb) +
+ tcp_rsk(req)->ts_off;
opts->tsecr = READ_ONCE(req->ts_recent);
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
@@ -922,7 +968,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
*/
static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
- struct tcp_md5sig_key **md5)
+ struct tcp_key *key)
{
struct tcp_sock *tp = tcp_sk(sk);
unsigned int size = 0;
@@ -930,21 +976,19 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
opts->options = 0;
- *md5 = NULL;
-#ifdef CONFIG_TCP_MD5SIG
- if (static_branch_unlikely(&tcp_md5_needed.key) &&
- rcu_access_pointer(tp->md5sig_info)) {
- *md5 = tp->af_specific->md5_lookup(sk, sk);
- if (*md5) {
- opts->options |= OPTION_MD5;
- size += TCPOLEN_MD5SIG_ALIGNED;
- }
+ /* Better than switch (key.type) as it has static branches */
+ if (tcp_key_is_md5(key)) {
+ opts->options |= OPTION_MD5;
+ size += TCPOLEN_MD5SIG_ALIGNED;
+ } else if (tcp_key_is_ao(key)) {
+ opts->options |= OPTION_AO;
+ size += tcp_ao_len_aligned(key->ao_key);
}
-#endif
if (likely(tp->rx_opt.tstamp_ok)) {
opts->options |= OPTION_TS;
- opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
+ opts->tsval = skb ? tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) +
+ tp->tsoffset : 0;
opts->tsecr = tp->rx_opt.ts_recent;
size += TCPOLEN_TSTAMP_ALIGNED;
}
@@ -1077,7 +1121,8 @@ static void tcp_tasklet_func(struct tasklet_struct *t)
#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \
TCPF_WRITE_TIMER_DEFERRED | \
TCPF_DELACK_TIMER_DEFERRED | \
- TCPF_MTU_REDUCED_DEFERRED)
+ TCPF_MTU_REDUCED_DEFERRED | \
+ TCPF_ACK_DEFERRED)
/**
* tcp_release_cb - tcp release_sock() callback
* @sk: socket
@@ -1101,16 +1146,6 @@ void tcp_release_cb(struct sock *sk)
tcp_tsq_write(sk);
__sock_put(sk);
}
- /* Here begins the tricky part :
- * We are called from release_sock() with :
- * 1) BH disabled
- * 2) sk_lock.slock spinlock held
- * 3) socket owned by us (sk->sk_lock.owned == 1)
- *
- * But following code is meant to be called from BH handlers,
- * so we should keep BH disabled, but early release socket ownership
- */
- sock_release_ownership(sk);
if (flags & TCPF_WRITE_TIMER_DEFERRED) {
tcp_write_timer_handler(sk);
@@ -1124,6 +1159,8 @@ void tcp_release_cb(struct sock *sk)
inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
__sock_put(sk);
}
+ if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk))
+ tcp_send_ack(sk);
}
EXPORT_SYMBOL(tcp_release_cb);
@@ -1208,7 +1245,7 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
if (sk->sk_pacing_status != SK_PACING_NONE) {
- unsigned long rate = sk->sk_pacing_rate;
+ unsigned long rate = READ_ONCE(sk->sk_pacing_rate);
/* Original sch_fq does not pace first 10 MSS
* Note that tp->data_segs_out overflows after 2^32 packets,
@@ -1251,7 +1288,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options opts;
unsigned int tcp_options_size, tcp_header_size;
struct sk_buff *oskb = NULL;
- struct tcp_md5sig_key *md5;
+ struct tcp_key key;
struct tcphdr *th;
u64 prior_wstamp;
int err;
@@ -1283,11 +1320,11 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tcb = TCP_SKB_CB(skb);
memset(&opts, 0, sizeof(opts));
+ tcp_get_current_key(sk, &key);
if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
- tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
+ tcp_options_size = tcp_syn_options(sk, skb, &opts, &key);
} else {
- tcp_options_size = tcp_established_options(sk, skb, &opts,
- &md5);
+ tcp_options_size = tcp_established_options(sk, skb, &opts, &key);
/* Force a PSH flag on all (GSO) packets to expedite GRO flush
* at receiver : This slightly improve GRO performance.
* Note that we do not force the PSH flag for non GSO packets,
@@ -1332,7 +1369,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
- skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
+ skb_set_dst_pending_confirm(skb, READ_ONCE(sk->sk_dst_pending_confirm));
/* Build TCP header and checksum it. */
th = (struct tcphdr *)skb->data;
@@ -1368,16 +1405,25 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
th->window = htons(min(tp->rcv_wnd, 65535U));
}
- tcp_options_write(th, tp, &opts);
+ tcp_options_write(th, tp, NULL, &opts, &key);
+ if (tcp_key_is_md5(&key)) {
#ifdef CONFIG_TCP_MD5SIG
- /* Calculate the MD5 hash, as we have all we need now */
- if (md5) {
+ /* Calculate the MD5 hash, as we have all we need now */
sk_gso_disable(sk);
tp->af_specific->calc_md5_hash(opts.hash_location,
- md5, sk, skb);
- }
+ key.md5_key, sk, skb);
#endif
+ } else if (tcp_key_is_ao(&key)) {
+ int err;
+
+ err = tcp_ao_transmit_skb(sk, skb, key.ao_key, th,
+ opts.hash_location);
+ if (err) {
+ kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED);
+ return -ENOMEM;
+ }
+ }
/* BPF prog is the last one writing header option */
bpf_skops_write_hdr_opt(sk, skb, NULL, NULL, 0, &opts);
@@ -1387,7 +1433,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK))
- tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
+ tcp_event_ack_sent(sk, rcv_nxt);
if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
@@ -1704,14 +1750,6 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
*/
mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
- /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */
- if (icsk->icsk_af_ops->net_frag_header_len) {
- const struct dst_entry *dst = __sk_dst_get(sk);
-
- if (dst && dst_allfrag(dst))
- mss_now -= icsk->icsk_af_ops->net_frag_header_len;
- }
-
/* Clamp it (mss_clamp does not include tcp options) */
if (mss_now > tp->rx_opt.mss_clamp)
mss_now = tp->rx_opt.mss_clamp;
@@ -1739,21 +1777,11 @@ int tcp_mss_to_mtu(struct sock *sk, int mss)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
- int mtu;
- mtu = mss +
+ return mss +
tp->tcp_header_len +
icsk->icsk_ext_hdr_len +
icsk->icsk_af_ops->net_header_len;
-
- /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */
- if (icsk->icsk_af_ops->net_frag_header_len) {
- const struct dst_entry *dst = __sk_dst_get(sk);
-
- if (dst && dst_allfrag(dst))
- mtu += icsk->icsk_af_ops->net_frag_header_len;
- }
- return mtu;
}
EXPORT_SYMBOL(tcp_mss_to_mtu);
@@ -1828,7 +1856,7 @@ unsigned int tcp_current_mss(struct sock *sk)
u32 mss_now;
unsigned int header_len;
struct tcp_out_options opts;
- struct tcp_md5sig_key *md5;
+ struct tcp_key key;
mss_now = tp->mss_cache;
@@ -1837,8 +1865,8 @@ unsigned int tcp_current_mss(struct sock *sk)
if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
mss_now = tcp_sync_mss(sk, mtu);
}
-
- header_len = tcp_established_options(sk, NULL, &opts, &md5) +
+ tcp_get_current_key(sk, &key);
+ header_len = tcp_established_options(sk, NULL, &opts, &key) +
sizeof(struct tcphdr);
/* The mss_cache is sized based on tp->tcp_header_len, which assumes
* some common options. If this is an odd packet (because we have SACK
@@ -1980,7 +2008,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
unsigned long bytes;
u32 r;
- bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift);
+ bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift);
r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log);
if (r < BITS_PER_TYPE(sk->sk_gso_max_size))
@@ -2457,6 +2485,7 @@ static int tcp_mtu_probe(struct sock *sk)
/* build the payload, and be prepared to abort if this fails. */
if (tcp_clone_payload(sk, nskb, probe_size)) {
+ tcp_skb_tsorted_anchor_cleanup(nskb);
consume_skb(nskb);
return -1;
}
@@ -2542,6 +2571,18 @@ static bool tcp_pacing_check(struct sock *sk)
return true;
}
+static bool tcp_rtx_queue_empty_or_single_skb(const struct sock *sk)
+{
+ const struct rb_node *node = sk->tcp_rtx_queue.rb_node;
+
+ /* No skb in the rtx queue. */
+ if (!node)
+ return true;
+
+ /* Only one skb in rtx queue. */
+ return !node->rb_left && !node->rb_right;
+}
+
/* TCP Small Queues :
* Control number of packets in qdisc/devices to two packets / or ~1 ms.
* (These limits are doubled for retransmits)
@@ -2560,7 +2601,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit = max_t(unsigned long,
2 * skb->truesize,
- sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
+ READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift));
if (sk->sk_pacing_status == SK_PACING_NONE)
limit = min_t(unsigned long, limit,
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
@@ -2568,7 +2609,8 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
tcp_sk(sk)->tcp_tx_delay) {
- u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay;
+ u64 extra_bytes = (u64)READ_ONCE(sk->sk_pacing_rate) *
+ tcp_sk(sk)->tcp_tx_delay;
/* TSQ is based on skb truesize sum (sk_wmem_alloc), so we
* approximate our needs assuming an ~100% skb->truesize overhead.
@@ -2579,12 +2621,12 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit += extra_bytes;
}
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
- /* Always send skb if rtx queue is empty.
+ /* Always send skb if rtx queue is empty or has one skb.
* No need to wait for TX completion to call us back,
* after softirq/tasklet schedule.
* This helps when TX completions are delayed too much.
*/
- if (tcp_rtx_queue_empty(sk))
+ if (tcp_rtx_queue_empty_or_single_skb(sk))
return false;
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
@@ -2788,7 +2830,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- u32 timeout, rto_delta_us;
+ u32 timeout, timeout_us, rto_delta_us;
int early_retrans;
/* Don't do any loss probe on a Fast Open connection before 3WHS
@@ -2812,11 +2854,12 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
* sample is available then probe after TCP_TIMEOUT_INIT.
*/
if (tp->srtt_us) {
- timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+ timeout_us = tp->srtt_us >> 2;
if (tp->packets_out == 1)
- timeout += TCP_RTO_MIN;
+ timeout_us += tcp_rto_min_us(sk);
else
- timeout += TCP_TIMEOUT_MIN;
+ timeout_us += TCP_TIMEOUT_MIN_US;
+ timeout = usecs_to_jiffies(timeout_us);
} else {
timeout = TCP_TIMEOUT_INIT;
}
@@ -3250,7 +3293,13 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
if (skb_still_in_host_queue(sk, skb))
return -EBUSY;
+start:
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
+ if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
+ TCP_SKB_CB(skb)->seq++;
+ goto start;
+ }
if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
WARN_ON_ONCE(1);
return -EINVAL;
@@ -3372,7 +3421,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
/* Save stamp of the first (attempted) retransmit. */
if (!tp->retrans_stamp)
- tp->retrans_stamp = tcp_skb_timestamp(skb);
+ tp->retrans_stamp = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb);
if (tp->undo_retrans < 0)
tp->undo_retrans = 0;
@@ -3474,7 +3523,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
if (delta <= 0)
return;
amt = sk_mem_pages(delta);
- sk->sk_forward_alloc += amt << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
sk_memory_allocated_add(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
@@ -3620,8 +3669,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
{
struct inet_request_sock *ireq = inet_rsk(req);
const struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_key *md5 = NULL;
struct tcp_out_options opts;
+ struct tcp_key key = {};
struct sk_buff *skb;
int tcp_header_size;
struct tcphdr *th;
@@ -3671,16 +3720,45 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
}
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
rcu_read_lock();
- md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
#endif
+ if (tcp_rsk_used_ao(req)) {
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_key *ao_key = NULL;
+ u8 keyid = tcp_rsk(req)->ao_keyid;
+
+ ao_key = tcp_sk(sk)->af_specific->ao_lookup(sk, req_to_sk(req),
+ keyid, -1);
+ /* If there is no matching key - avoid sending anything,
+ * especially usigned segments. It could try harder and lookup
+ * for another peer-matching key, but the peer has requested
+ * ao_keyid (RFC5925 RNextKeyID), so let's keep it simple here.
+ */
+ if (unlikely(!ao_key)) {
+ rcu_read_unlock();
+ kfree_skb(skb);
+ net_warn_ratelimited("TCP-AO: the keyid %u from SYN packet is not present - not sending SYNACK\n",
+ keyid);
+ return NULL;
+ }
+ key.ao_key = ao_key;
+ key.type = TCP_KEY_AO;
+#endif
+ } else {
+#ifdef CONFIG_TCP_MD5SIG
+ key.md5_key = tcp_rsk(req)->af_specific->req_md5_lookup(sk,
+ req_to_sk(req));
+ if (key.md5_key)
+ key.type = TCP_KEY_MD5;
+#endif
+ }
skb_set_hash(skb, READ_ONCE(tcp_rsk(req)->txhash), PKT_HASH_TYPE_L4);
/* bpf program will be interested in the tcp_flags */
TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK;
- tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
- foc, synack_type,
- syn_skb) + sizeof(*th);
+ tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts,
+ &key, foc, synack_type, syn_skb)
+ + sizeof(*th);
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
@@ -3700,15 +3778,24 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rsk_rcv_wnd, 65535U));
- tcp_options_write(th, NULL, &opts);
+ tcp_options_write(th, NULL, tcp_rsk(req), &opts, &key);
th->doff = (tcp_header_size >> 2);
TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
-#ifdef CONFIG_TCP_MD5SIG
/* Okay, we have all we need - do the md5 hash if needed */
- if (md5)
+ if (tcp_key_is_md5(&key)) {
+#ifdef CONFIG_TCP_MD5SIG
tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
- md5, req_to_sk(req), skb);
+ key.md5_key, req_to_sk(req), skb);
+#endif
+ } else if (tcp_key_is_ao(&key)) {
+#ifdef CONFIG_TCP_AO
+ tcp_rsk(req)->af_specific->ao_synack_hash(opts.hash_location,
+ key.ao_key, req, skb,
+ opts.hash_location - (u8 *)th, 0);
+#endif
+ }
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
rcu_read_unlock();
#endif
@@ -3756,6 +3843,8 @@ static void tcp_connect_init(struct sock *sk)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
+ tcp_ao_connect_init(sk);
+
/* If user gave his TCP_MAXSEG, record it to clamp */
if (tp->rx_opt.user_mss)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
@@ -3938,6 +4027,53 @@ int tcp_connect(struct sock *sk)
tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL);
+#if defined(CONFIG_TCP_MD5SIG) && defined(CONFIG_TCP_AO)
+ /* Has to be checked late, after setting daddr/saddr/ops.
+ * Return error if the peer has both a md5 and a tcp-ao key
+ * configured as this is ambiguous.
+ */
+ if (unlikely(rcu_dereference_protected(tp->md5sig_info,
+ lockdep_sock_is_held(sk)))) {
+ bool needs_ao = !!tp->af_specific->ao_lookup(sk, sk, -1, -1);
+ bool needs_md5 = !!tp->af_specific->md5_lookup(sk, sk);
+ struct tcp_ao_info *ao_info;
+
+ ao_info = rcu_dereference_check(tp->ao_info,
+ lockdep_sock_is_held(sk));
+ if (ao_info) {
+ /* This is an extra check: tcp_ao_required() in
+ * tcp_v{4,6}_parse_md5_keys() should prevent adding
+ * md5 keys on ao_required socket.
+ */
+ needs_ao |= ao_info->ao_required;
+ WARN_ON_ONCE(ao_info->ao_required && needs_md5);
+ }
+ if (needs_md5 && needs_ao)
+ return -EKEYREJECTED;
+
+ /* If we have a matching md5 key and no matching tcp-ao key
+ * then free up ao_info if allocated.
+ */
+ if (needs_md5) {
+ tcp_ao_destroy_sock(sk, false);
+ } else if (needs_ao) {
+ tcp_clear_md5_list(sk);
+ kfree(rcu_replace_pointer(tp->md5sig_info, NULL,
+ lockdep_sock_is_held(sk)));
+ }
+ }
+#endif
+#ifdef CONFIG_TCP_AO
+ if (unlikely(rcu_dereference_protected(tp->ao_info,
+ lockdep_sock_is_held(sk)))) {
+ /* Don't allow connecting if ao is configured but no
+ * matching key is found.
+ */
+ if (!tp->af_specific->ao_lookup(sk, sk, -1, -1))
+ return -EKEYREJECTED;
+ }
+#endif
+
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
return -EHOSTUNREACH; /* Routing failure or similar. */
@@ -3954,7 +4090,7 @@ int tcp_connect(struct sock *sk)
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
tcp_mstamp_refresh(tp);
- tp->retrans_stamp = tcp_time_stamp(tp);
+ tp->retrans_stamp = tcp_time_stamp_ts(tp);
tcp_connect_queue_skb(sk, buff);
tcp_ecn_send_syn(sk, buff);
tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
@@ -3984,6 +4120,20 @@ int tcp_connect(struct sock *sk)
}
EXPORT_SYMBOL(tcp_connect);
+u32 tcp_delack_max(const struct sock *sk)
+{
+ const struct dst_entry *dst = __sk_dst_get(sk);
+ u32 delack_max = inet_csk(sk)->icsk_delack_max;
+
+ if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) {
+ u32 rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
+ u32 delack_from_rto_min = max_t(int, 1, rto_min - 1);
+
+ delack_max = min_t(u32, delack_max, delack_from_rto_min);
+ }
+ return delack_max;
+}
+
/* Send out a delayed ack, the caller does the policy checking
* to see if we should even be here. See tcp_input.c:tcp_ack_snd_check()
* for details.
@@ -4019,7 +4169,7 @@ void tcp_send_delayed_ack(struct sock *sk)
ato = min(ato, max_ato);
}
- ato = min_t(u32, ato, inet_csk(sk)->icsk_delack_max);
+ ato = min_t(u32, ato, tcp_delack_max(sk));
/* Stay within the limit we were given */
timeout = jiffies + ato;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index acf4869c5d3b..bba10110fbbc 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -104,7 +104,7 @@ bool tcp_rack_mark_lost(struct sock *sk)
tp->rack.advanced = 0;
tcp_rack_detect_loss(sk, &timeout);
if (timeout) {
- timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
+ timeout = usecs_to_jiffies(timeout + TCP_TIMEOUT_MIN_US);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
timeout, inet_csk(sk)->icsk_rto);
}
diff --git a/net/ipv4/tcp_sigpool.c b/net/ipv4/tcp_sigpool.c
new file mode 100644
index 000000000000..8512cb09ebc0
--- /dev/null
+++ b/net/ipv4/tcp_sigpool.c
@@ -0,0 +1,357 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <crypto/hash.h>
+#include <linux/cpu.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/workqueue.h>
+#include <net/tcp.h>
+
+static size_t __scratch_size;
+static DEFINE_PER_CPU(void __rcu *, sigpool_scratch);
+
+struct sigpool_entry {
+ struct crypto_ahash *hash;
+ const char *alg;
+ struct kref kref;
+ uint16_t needs_key:1,
+ reserved:15;
+};
+
+#define CPOOL_SIZE (PAGE_SIZE / sizeof(struct sigpool_entry))
+static struct sigpool_entry cpool[CPOOL_SIZE];
+static unsigned int cpool_populated;
+static DEFINE_MUTEX(cpool_mutex);
+
+/* Slow-path */
+struct scratches_to_free {
+ struct rcu_head rcu;
+ unsigned int cnt;
+ void *scratches[];
+};
+
+static void free_old_scratches(struct rcu_head *head)
+{
+ struct scratches_to_free *stf;
+
+ stf = container_of(head, struct scratches_to_free, rcu);
+ while (stf->cnt--)
+ kfree(stf->scratches[stf->cnt]);
+ kfree(stf);
+}
+
+/**
+ * sigpool_reserve_scratch - re-allocates scratch buffer, slow-path
+ * @size: request size for the scratch/temp buffer
+ */
+static int sigpool_reserve_scratch(size_t size)
+{
+ struct scratches_to_free *stf;
+ size_t stf_sz = struct_size(stf, scratches, num_possible_cpus());
+ int cpu, err = 0;
+
+ lockdep_assert_held(&cpool_mutex);
+ if (__scratch_size >= size)
+ return 0;
+
+ stf = kmalloc(stf_sz, GFP_KERNEL);
+ if (!stf)
+ return -ENOMEM;
+ stf->cnt = 0;
+
+ size = max(size, __scratch_size);
+ cpus_read_lock();
+ for_each_possible_cpu(cpu) {
+ void *scratch, *old_scratch;
+
+ scratch = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
+ if (!scratch) {
+ err = -ENOMEM;
+ break;
+ }
+
+ old_scratch = rcu_replace_pointer(per_cpu(sigpool_scratch, cpu),
+ scratch, lockdep_is_held(&cpool_mutex));
+ if (!cpu_online(cpu) || !old_scratch) {
+ kfree(old_scratch);
+ continue;
+ }
+ stf->scratches[stf->cnt++] = old_scratch;
+ }
+ cpus_read_unlock();
+ if (!err)
+ __scratch_size = size;
+
+ call_rcu(&stf->rcu, free_old_scratches);
+ return err;
+}
+
+static void sigpool_scratch_free(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ kfree(rcu_replace_pointer(per_cpu(sigpool_scratch, cpu),
+ NULL, lockdep_is_held(&cpool_mutex)));
+ __scratch_size = 0;
+}
+
+static int __cpool_try_clone(struct crypto_ahash *hash)
+{
+ struct crypto_ahash *tmp;
+
+ tmp = crypto_clone_ahash(hash);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+
+ crypto_free_ahash(tmp);
+ return 0;
+}
+
+static int __cpool_alloc_ahash(struct sigpool_entry *e, const char *alg)
+{
+ struct crypto_ahash *cpu0_hash;
+ int ret;
+
+ e->alg = kstrdup(alg, GFP_KERNEL);
+ if (!e->alg)
+ return -ENOMEM;
+
+ cpu0_hash = crypto_alloc_ahash(alg, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(cpu0_hash)) {
+ ret = PTR_ERR(cpu0_hash);
+ goto out_free_alg;
+ }
+
+ e->needs_key = crypto_ahash_get_flags(cpu0_hash) & CRYPTO_TFM_NEED_KEY;
+
+ ret = __cpool_try_clone(cpu0_hash);
+ if (ret)
+ goto out_free_cpu0_hash;
+ e->hash = cpu0_hash;
+ kref_init(&e->kref);
+ return 0;
+
+out_free_cpu0_hash:
+ crypto_free_ahash(cpu0_hash);
+out_free_alg:
+ kfree(e->alg);
+ e->alg = NULL;
+ return ret;
+}
+
+/**
+ * tcp_sigpool_alloc_ahash - allocates pool for ahash requests
+ * @alg: name of async hash algorithm
+ * @scratch_size: reserve a tcp_sigpool::scratch buffer of this size
+ */
+int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size)
+{
+ int i, ret;
+
+ /* slow-path */
+ mutex_lock(&cpool_mutex);
+ ret = sigpool_reserve_scratch(scratch_size);
+ if (ret)
+ goto out;
+ for (i = 0; i < cpool_populated; i++) {
+ if (!cpool[i].alg)
+ continue;
+ if (strcmp(cpool[i].alg, alg))
+ continue;
+
+ /* pairs with tcp_sigpool_release() */
+ if (!kref_get_unless_zero(&cpool[i].kref))
+ kref_init(&cpool[i].kref);
+ ret = i;
+ goto out;
+ }
+
+ for (i = 0; i < cpool_populated; i++) {
+ if (!cpool[i].alg)
+ break;
+ }
+ if (i >= CPOOL_SIZE) {
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ ret = __cpool_alloc_ahash(&cpool[i], alg);
+ if (!ret) {
+ ret = i;
+ if (i == cpool_populated)
+ cpool_populated++;
+ }
+out:
+ mutex_unlock(&cpool_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tcp_sigpool_alloc_ahash);
+
+static void __cpool_free_entry(struct sigpool_entry *e)
+{
+ crypto_free_ahash(e->hash);
+ kfree(e->alg);
+ memset(e, 0, sizeof(*e));
+}
+
+static void cpool_cleanup_work_cb(struct work_struct *work)
+{
+ bool free_scratch = true;
+ unsigned int i;
+
+ mutex_lock(&cpool_mutex);
+ for (i = 0; i < cpool_populated; i++) {
+ if (kref_read(&cpool[i].kref) > 0) {
+ free_scratch = false;
+ continue;
+ }
+ if (!cpool[i].alg)
+ continue;
+ __cpool_free_entry(&cpool[i]);
+ }
+ if (free_scratch)
+ sigpool_scratch_free();
+ mutex_unlock(&cpool_mutex);
+}
+
+static DECLARE_WORK(cpool_cleanup_work, cpool_cleanup_work_cb);
+static void cpool_schedule_cleanup(struct kref *kref)
+{
+ schedule_work(&cpool_cleanup_work);
+}
+
+/**
+ * tcp_sigpool_release - decreases number of users for a pool. If it was
+ * the last user of the pool, releases any memory that was consumed.
+ * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash()
+ */
+void tcp_sigpool_release(unsigned int id)
+{
+ if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg))
+ return;
+
+ /* slow-path */
+ kref_put(&cpool[id].kref, cpool_schedule_cleanup);
+}
+EXPORT_SYMBOL_GPL(tcp_sigpool_release);
+
+/**
+ * tcp_sigpool_get - increases number of users (refcounter) for a pool
+ * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash()
+ */
+void tcp_sigpool_get(unsigned int id)
+{
+ if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg))
+ return;
+ kref_get(&cpool[id].kref);
+}
+EXPORT_SYMBOL_GPL(tcp_sigpool_get);
+
+int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c) __cond_acquires(RCU_BH)
+{
+ struct crypto_ahash *hash;
+
+ rcu_read_lock_bh();
+ if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) {
+ rcu_read_unlock_bh();
+ return -EINVAL;
+ }
+
+ hash = crypto_clone_ahash(cpool[id].hash);
+ if (IS_ERR(hash)) {
+ rcu_read_unlock_bh();
+ return PTR_ERR(hash);
+ }
+
+ c->req = ahash_request_alloc(hash, GFP_ATOMIC);
+ if (!c->req) {
+ crypto_free_ahash(hash);
+ rcu_read_unlock_bh();
+ return -ENOMEM;
+ }
+ ahash_request_set_callback(c->req, 0, NULL, NULL);
+
+ /* Pairs with tcp_sigpool_reserve_scratch(), scratch area is
+ * valid (allocated) until tcp_sigpool_end().
+ */
+ c->scratch = rcu_dereference_bh(*this_cpu_ptr(&sigpool_scratch));
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tcp_sigpool_start);
+
+void tcp_sigpool_end(struct tcp_sigpool *c) __releases(RCU_BH)
+{
+ struct crypto_ahash *hash = crypto_ahash_reqtfm(c->req);
+
+ rcu_read_unlock_bh();
+ ahash_request_free(c->req);
+ crypto_free_ahash(hash);
+}
+EXPORT_SYMBOL_GPL(tcp_sigpool_end);
+
+/**
+ * tcp_sigpool_algo - return algorithm of tcp_sigpool
+ * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash()
+ * @buf: buffer to return name of algorithm
+ * @buf_len: size of @buf
+ */
+size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len)
+{
+ if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg))
+ return -EINVAL;
+
+ return strscpy(buf, cpool[id].alg, buf_len);
+}
+EXPORT_SYMBOL_GPL(tcp_sigpool_algo);
+
+/**
+ * tcp_sigpool_hash_skb_data - hash data in skb with initialized tcp_sigpool
+ * @hp: tcp_sigpool pointer
+ * @skb: buffer to add sign for
+ * @header_len: TCP header length for this segment
+ */
+int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp,
+ const struct sk_buff *skb,
+ unsigned int header_len)
+{
+ const unsigned int head_data_len = skb_headlen(skb) > header_len ?
+ skb_headlen(skb) - header_len : 0;
+ const struct skb_shared_info *shi = skb_shinfo(skb);
+ const struct tcphdr *tp = tcp_hdr(skb);
+ struct ahash_request *req = hp->req;
+ struct sk_buff *frag_iter;
+ struct scatterlist sg;
+ unsigned int i;
+
+ sg_init_table(&sg, 1);
+
+ sg_set_buf(&sg, ((u8 *)tp) + header_len, head_data_len);
+ ahash_request_set_crypt(req, &sg, NULL, head_data_len);
+ if (crypto_ahash_update(req))
+ return 1;
+
+ for (i = 0; i < shi->nr_frags; ++i) {
+ const skb_frag_t *f = &shi->frags[i];
+ unsigned int offset = skb_frag_off(f);
+ struct page *page;
+
+ page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
+ sg_set_page(&sg, page, skb_frag_size(f), offset_in_page(offset));
+ ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
+ if (crypto_ahash_update(req))
+ return 1;
+ }
+
+ skb_walk_frags(skb, frag_iter)
+ if (tcp_sigpool_hash_skb_data(hp, frag_iter, 0))
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(tcp_sigpool_hash_skb_data);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Per-CPU pool of crypto requests");
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 984ab4a0421e..d1ad20ce1c8c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -26,14 +26,18 @@
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- u32 elapsed, start_ts, user_timeout;
+ const struct tcp_sock *tp = tcp_sk(sk);
+ u32 elapsed, user_timeout;
s32 remaining;
- start_ts = tcp_sk(sk)->retrans_stamp;
user_timeout = READ_ONCE(icsk->icsk_user_timeout);
if (!user_timeout)
return icsk->icsk_rto;
- elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
+
+ elapsed = tcp_time_stamp_ts(tp) - tp->retrans_stamp;
+ if (tp->tcp_usec_ts)
+ elapsed /= USEC_PER_MSEC;
+
remaining = user_timeout - elapsed;
if (remaining <= 0)
return 1; /* user timeout has passed; fire ASAP */
@@ -212,12 +216,13 @@ static bool retransmits_timed_out(struct sock *sk,
unsigned int boundary,
unsigned int timeout)
{
- unsigned int start_ts;
+ struct tcp_sock *tp = tcp_sk(sk);
+ unsigned int start_ts, delta;
if (!inet_csk(sk)->icsk_retransmits)
return false;
- start_ts = tcp_sk(sk)->retrans_stamp;
+ start_ts = tp->retrans_stamp;
if (likely(timeout == 0)) {
unsigned int rto_base = TCP_RTO_MIN;
@@ -226,7 +231,12 @@ static bool retransmits_timed_out(struct sock *sk,
timeout = tcp_model_timeout(sk, boundary, rto_base);
}
- return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
+ if (tp->tcp_usec_ts) {
+ /* delta maybe off up to a jiffy due to timer granularity. */
+ delta = tp->tcp_mstamp - start_ts + jiffies_to_usecs(1);
+ return (s32)(delta - timeout * USEC_PER_MSEC) >= 0;
+ }
+ return (s32)(tcp_time_stamp_ts(tp) - start_ts - timeout) >= 0;
}
/* A write timeout has occurred. Process the after effects. */
@@ -322,7 +332,7 @@ void tcp_delack_timer_handler(struct sock *sk)
if (inet_csk_ack_scheduled(sk)) {
if (!inet_csk_in_pingpong_mode(sk)) {
/* Delayed ACK missed: inflate ATO. */
- icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
+ icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, icsk->icsk_rto);
} else {
/* Delayed ACK missed: leave pingpong mode and
* deflate ATO.
@@ -394,7 +404,7 @@ static void tcp_probe_timer(struct sock *sk)
if (user_timeout &&
(s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
msecs_to_jiffies(user_timeout))
- goto abort;
+ goto abort;
}
max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
@@ -415,6 +425,19 @@ abort: tcp_write_err(sk);
}
}
+static void tcp_update_rto_stats(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (!icsk->icsk_retransmits) {
+ tp->total_rto_recoveries++;
+ tp->rto_stamp = tcp_time_stamp_ms(tp);
+ }
+ icsk->icsk_retransmits++;
+ tp->total_rto++;
+}
+
/*
* Timer for Fast Open socket to retransmit SYNACK. Note that the
* sk here is the child socket, not the parent (listener) socket.
@@ -447,28 +470,26 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
*/
inet_rtx_syn_ack(sk, req);
req->num_timeout++;
- icsk->icsk_retransmits++;
+ tcp_update_rto_stats(sk);
if (!tp->retrans_stamp)
- tp->retrans_stamp = tcp_time_stamp(tp);
+ tp->retrans_stamp = tcp_time_stamp_ts(tp);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
req->timeout << req->num_timeout, TCP_RTO_MAX);
}
static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
- const struct sk_buff *skb)
+ const struct sk_buff *skb,
+ u32 rtx_delta)
{
const struct tcp_sock *tp = tcp_sk(sk);
const int timeout = TCP_RTO_MAX * 2;
- u32 rcv_delta, rtx_delta;
+ u32 rcv_delta;
rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp;
if (rcv_delta <= timeout)
return false;
- rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp(tp) -
- (tp->retrans_stamp ?: tcp_skb_timestamp(skb)));
-
- return rtx_delta > timeout;
+ return msecs_to_jiffies(rtx_delta) > timeout;
}
/**
@@ -521,7 +542,11 @@ void tcp_retransmit_timer(struct sock *sk)
struct inet_sock *inet = inet_sk(sk);
u32 rtx_delta;
- rtx_delta = tcp_time_stamp(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp(skb));
+ rtx_delta = tcp_time_stamp_ts(tp) - (tp->retrans_stamp ?:
+ tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb));
+ if (tp->tcp_usec_ts)
+ rtx_delta /= USEC_PER_MSEC;
+
if (sk->sk_family == AF_INET) {
net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n",
&inet->inet_daddr, ntohs(inet->inet_dport),
@@ -538,7 +563,7 @@ void tcp_retransmit_timer(struct sock *sk)
rtx_delta);
}
#endif
- if (tcp_rtx_probe0_timed_out(sk, skb)) {
+ if (tcp_rtx_probe0_timed_out(sk, skb, rtx_delta)) {
tcp_write_err(sk);
goto out;
}
@@ -575,7 +600,7 @@ void tcp_retransmit_timer(struct sock *sk)
tcp_enter_loss(sk);
- icsk->icsk_retransmits++;
+ tcp_update_rto_stats(sk);
if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
/* Retransmission failed because of local congestion,
* Let senders fight for local resources conservatively.
@@ -601,7 +626,6 @@ void tcp_retransmit_timer(struct sock *sk)
* implemented ftp to mars will work nicely. We will have to fix
* the 120 second clamps though!
*/
- icsk->icsk_backoff++;
out_reset_timer:
/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
@@ -622,11 +646,12 @@ out_reset_timer:
tcp_rto_min(sk),
TCP_RTO_MAX);
} else if (sk->sk_state != TCP_SYN_SENT ||
- icsk->icsk_backoff >
+ tp->total_rto >
READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
/* Use normal (exponential) backoff unless linear timeouts are
* activated.
*/
+ icsk->icsk_backoff++;
icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0794a2c46a56..f631b0a21af4 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -714,7 +714,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
iph->saddr, uh->source, skb->dev->ifindex,
inet_sdif(skb), udptable, NULL);
- if (!sk || udp_sk(sk)->encap_type) {
+ if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) {
/* No socket for error: try tunnels before discarding */
if (static_branch_unlikely(&udp_encap_needed_key)) {
sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb,
@@ -750,7 +750,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
case ICMP_DEST_UNREACH:
if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
ipv4_sk_update_pmtu(skb, sk, info);
- if (inet->pmtudisc != IP_PMTUDISC_DONT) {
+ if (READ_ONCE(inet->pmtudisc) != IP_PMTUDISC_DONT) {
err = EMSGSIZE;
harderr = 1;
break;
@@ -805,7 +805,7 @@ void udp_flush_pending_frames(struct sock *sk)
if (up->pending) {
up->len = 0;
- up->pending = 0;
+ WRITE_ONCE(up->pending, 0);
ip_flush_pending_frames(sk);
}
}
@@ -993,7 +993,7 @@ int udp_push_pending_frames(struct sock *sk)
out:
up->len = 0;
- up->pending = 0;
+ WRITE_ONCE(up->pending, 0);
return err;
}
EXPORT_SYMBOL(udp_push_pending_frames);
@@ -1051,10 +1051,11 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
u8 tos, scope;
__be16 dport;
int err, is_udplite = IS_UDPLITE(sk);
- int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
+ int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE;
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
struct sk_buff *skb;
struct ip_options_data opt_copy;
+ int uc_index;
if (len > 0xFFFF)
return -EMSGSIZE;
@@ -1069,7 +1070,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
fl4 = &inet->cork.fl.u.ip4;
- if (up->pending) {
+ if (READ_ONCE(up->pending)) {
/*
* There are pending frames.
* The socket lock must be held while it's corked.
@@ -1143,7 +1144,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
- (struct sockaddr *)usin, &ipc.addr);
+ (struct sockaddr *)usin,
+ &msg->msg_namelen,
+ &ipc.addr);
if (err)
goto out_free;
if (usin) {
@@ -1173,25 +1176,26 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (scope == RT_SCOPE_LINK)
connected = 0;
+ uc_index = READ_ONCE(inet->uc_index);
if (ipv4_is_multicast(daddr)) {
if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
- ipc.oif = inet->mc_index;
+ ipc.oif = READ_ONCE(inet->mc_index);
if (!saddr)
- saddr = inet->mc_addr;
+ saddr = READ_ONCE(inet->mc_addr);
connected = 0;
} else if (!ipc.oif) {
- ipc.oif = inet->uc_index;
- } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
+ ipc.oif = uc_index;
+ } else if (ipv4_is_lbcast(daddr) && uc_index) {
/* oif is set, packet is to local broadcast and
* uc_index is set. oif is most likely set
* by sk_bound_dev_if. If uc_index != oif check if the
* oif is an L3 master and uc_index is an L3 slave.
* If so, we want to allow the send using the uc_index.
*/
- if (ipc.oif != inet->uc_index &&
+ if (ipc.oif != uc_index &&
ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
- inet->uc_index)) {
- ipc.oif = inet->uc_index;
+ uc_index)) {
+ ipc.oif = uc_index;
}
}
@@ -1265,7 +1269,7 @@ back_from_confirm:
fl4->saddr = saddr;
fl4->fl4_dport = dport;
fl4->fl4_sport = inet->inet_sport;
- up->pending = AF_INET;
+ WRITE_ONCE(up->pending, AF_INET);
do_append_data:
up->len += ulen;
@@ -1277,7 +1281,7 @@ do_append_data:
else if (!corkreq)
err = udp_push_pending_frames(sk);
else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
- up->pending = 0;
+ WRITE_ONCE(up->pending, 0);
release_sock(sk);
out:
@@ -1315,11 +1319,11 @@ void udp_splice_eof(struct socket *sock)
struct sock *sk = sock->sk;
struct udp_sock *up = udp_sk(sk);
- if (!up->pending || READ_ONCE(up->corkflag))
+ if (!READ_ONCE(up->pending) || udp_test_bit(CORK, sk))
return;
lock_sock(sk);
- if (up->pending && !READ_ONCE(up->corkflag))
+ if (up->pending && !udp_test_bit(CORK, sk))
udp_push_pending_frames(sk);
release_sock(sk);
}
@@ -1414,9 +1418,9 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
spin_lock(&sk_queue->lock);
- sk->sk_forward_alloc += size;
+ sk_forward_alloc_add(sk, size);
amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
- sk->sk_forward_alloc -= amt;
+ sk_forward_alloc_add(sk, -amt);
if (amt)
__sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT);
@@ -1527,7 +1531,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
goto uncharge_drop;
}
- sk->sk_forward_alloc -= size;
+ sk_forward_alloc_add(sk, -size);
/* no need to setup a destructor, we will explicitly release the
* forward allocated memory on dequeue
@@ -1865,10 +1869,11 @@ try_again:
*addr_len = sizeof(*sin);
BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
- (struct sockaddr *)sin);
+ (struct sockaddr *)sin,
+ addr_len);
}
- if (udp_sk(sk)->gro_enabled)
+ if (udp_test_bit(GRO_ENABLED, sk))
udp_cmsg_recv(msg, sk, skb);
if (inet_cmsg_flags(inet))
@@ -1904,7 +1909,7 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
- return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len);
}
EXPORT_SYMBOL(udp_pre_connect);
@@ -2081,7 +2086,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
}
nf_reset_ct(skb);
- if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
+ if (static_branch_unlikely(&udp_encap_needed_key) &&
+ READ_ONCE(up->encap_type)) {
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
/*
@@ -2119,7 +2125,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
/*
* UDP-Lite specific tests, ignored on UDP sockets
*/
- if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
+ if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) {
+ u16 pcrlen = READ_ONCE(up->pcrlen);
/*
* MIB statistics other than incrementing the error count are
@@ -2132,7 +2139,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
* delivery of packets with coverage values less than a value
* provided by the application."
*/
- if (up->pcrlen == 0) { /* full coverage was set */
+ if (pcrlen == 0) { /* full coverage was set */
net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n",
UDP_SKB_CB(skb)->cscov, skb->len);
goto drop;
@@ -2143,9 +2150,9 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
* that it wants x while sender emits packets of smaller size y.
* Therefore the above ...()->partial_cov statement is essential.
*/
- if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
+ if (UDP_SKB_CB(skb)->cscov < pcrlen) {
net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n",
- UDP_SKB_CB(skb)->cscov, up->pcrlen);
+ UDP_SKB_CB(skb)->cscov, pcrlen);
goto drop;
}
}
@@ -2162,7 +2169,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
udp_csum_pull_header(skb);
- ipv4_pktinfo_prepare(sk, skb);
+ ipv4_pktinfo_prepare(sk, skb, true);
return __udp_queue_rcv_skb(sk, skb);
csum_error:
@@ -2618,11 +2625,24 @@ void udp_destroy_sock(struct sock *sk)
if (encap_destroy)
encap_destroy(sk);
}
- if (up->encap_enabled)
+ if (udp_test_bit(ENCAP_ENABLED, sk))
static_branch_dec(&udp_encap_needed_key);
}
}
+static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
+ struct sock *sk)
+{
+#ifdef CONFIG_XFRM
+ if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) {
+ if (family == AF_INET)
+ WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv);
+ else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6)
+ WRITE_ONCE(udp_sk(sk)->gro_receive, ipv6_stub->xfrm6_gro_udp_encap_rcv);
+ }
+#endif
+}
+
/*
* Socket option code for UDP
*/
@@ -2658,9 +2678,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case UDP_CORK:
if (val != 0) {
- WRITE_ONCE(up->corkflag, 1);
+ udp_set_bit(CORK, sk);
} else {
- WRITE_ONCE(up->corkflag, 0);
+ udp_clear_bit(CORK, sk);
lock_sock(sk);
push_pending_frames(sk);
release_sock(sk);
@@ -2672,20 +2692,22 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
case 0:
#ifdef CONFIG_XFRM
case UDP_ENCAP_ESPINUDP:
+ set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk);
+ fallthrough;
case UDP_ENCAP_ESPINUDP_NON_IKE:
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
- up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
+ WRITE_ONCE(up->encap_rcv,
+ ipv6_stub->xfrm6_udp_encap_rcv);
else
#endif
- up->encap_rcv = xfrm4_udp_encap_rcv;
+ WRITE_ONCE(up->encap_rcv,
+ xfrm4_udp_encap_rcv);
#endif
fallthrough;
case UDP_ENCAP_L2TPINUDP:
- up->encap_type = val;
- lock_sock(sk);
- udp_tunnel_encap_enable(sk->sk_socket);
- release_sock(sk);
+ WRITE_ONCE(up->encap_type, val);
+ udp_tunnel_encap_enable(sk);
break;
default:
err = -ENOPROTOOPT;
@@ -2694,11 +2716,11 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
break;
case UDP_NO_CHECK6_TX:
- up->no_check6_tx = valbool;
+ udp_set_no_check6_tx(sk, valbool);
break;
case UDP_NO_CHECK6_RX:
- up->no_check6_rx = valbool;
+ udp_set_no_check6_rx(sk, valbool);
break;
case UDP_SEGMENT:
@@ -2708,14 +2730,13 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
break;
case UDP_GRO:
- lock_sock(sk);
/* when enabling GRO, accept the related GSO packet type */
if (valbool)
- udp_tunnel_encap_enable(sk->sk_socket);
- up->gro_enabled = valbool;
- up->accept_udp_l4 = valbool;
- release_sock(sk);
+ udp_tunnel_encap_enable(sk);
+ udp_assign_bit(GRO_ENABLED, sk, valbool);
+ udp_assign_bit(ACCEPT_L4, sk, valbool);
+ set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk);
break;
/*
@@ -2730,8 +2751,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
val = 8;
else if (val > USHRT_MAX)
val = USHRT_MAX;
- up->pcslen = val;
- up->pcflag |= UDPLITE_SEND_CC;
+ WRITE_ONCE(up->pcslen, val);
+ udp_set_bit(UDPLITE_SEND_CC, sk);
break;
/* The receiver specifies a minimum checksum coverage value. To make
@@ -2744,8 +2765,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
val = 8;
else if (val > USHRT_MAX)
val = USHRT_MAX;
- up->pcrlen = val;
- up->pcflag |= UDPLITE_RECV_CC;
+ WRITE_ONCE(up->pcrlen, val);
+ udp_set_bit(UDPLITE_RECV_CC, sk);
break;
default:
@@ -2783,19 +2804,19 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case UDP_CORK:
- val = READ_ONCE(up->corkflag);
+ val = udp_test_bit(CORK, sk);
break;
case UDP_ENCAP:
- val = up->encap_type;
+ val = READ_ONCE(up->encap_type);
break;
case UDP_NO_CHECK6_TX:
- val = up->no_check6_tx;
+ val = udp_get_no_check6_tx(sk);
break;
case UDP_NO_CHECK6_RX:
- val = up->no_check6_rx;
+ val = udp_get_no_check6_rx(sk);
break;
case UDP_SEGMENT:
@@ -2803,17 +2824,17 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
break;
case UDP_GRO:
- val = up->gro_enabled;
+ val = udp_test_bit(GRO_ENABLED, sk);
break;
/* The following two cannot be changed on UDP sockets, the return is
* always 0 (which corresponds to the full checksum coverage of UDP). */
case UDPLITE_SEND_CSCOV:
- val = up->pcslen;
+ val = READ_ONCE(up->pcslen);
break;
case UDPLITE_RECV_CSCOV:
- val = up->pcrlen;
+ val = READ_ONCE(up->pcrlen);
break;
default:
@@ -3116,16 +3137,18 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
struct bpf_udp_iter_state *iter = seq->private;
struct udp_iter_state *state = &iter->state;
struct net *net = seq_file_net(seq);
+ int resume_bucket, resume_offset;
struct udp_table *udptable;
unsigned int batch_sks = 0;
bool resized = false;
struct sock *sk;
+ resume_bucket = state->bucket;
+ resume_offset = iter->offset;
+
/* The current batch is done, so advance the bucket. */
- if (iter->st_bucket_done) {
+ if (iter->st_bucket_done)
state->bucket++;
- iter->offset = 0;
- }
udptable = udp_get_table_seq(seq, net);
@@ -3145,19 +3168,19 @@ again:
for (; state->bucket <= udptable->mask; state->bucket++) {
struct udp_hslot *hslot2 = &udptable->hash2[state->bucket];
- if (hlist_empty(&hslot2->head)) {
- iter->offset = 0;
+ if (hlist_empty(&hslot2->head))
continue;
- }
+ iter->offset = 0;
spin_lock_bh(&hslot2->lock);
udp_portaddr_for_each_entry(sk, &hslot2->head) {
if (seq_sk_match(seq, sk)) {
/* Resume from the last iterated socket at the
* offset in the bucket before iterator was stopped.
*/
- if (iter->offset) {
- --iter->offset;
+ if (state->bucket == resume_bucket &&
+ iter->offset < resume_offset) {
+ ++iter->offset;
continue;
}
if (iter->end_sk < iter->max_sk) {
@@ -3171,9 +3194,6 @@ again:
if (iter->end_sk)
break;
-
- /* Reset the current bucket's offset before moving to the next bucket. */
- iter->offset = 0;
}
/* All done: no batch made. */
@@ -3192,7 +3212,6 @@ again:
/* After allocating a larger batch, retry one more time to grab
* the whole bucket.
*/
- state->bucket--;
goto again;
}
done:
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index de3f2d31f510..dc41a22ee80e 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -296,5 +296,6 @@ static void __exit udp_diag_exit(void)
module_init(udp_diag_init);
module_exit(udp_diag_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("UDP socket monitoring via SOCK_DIAG");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-17 /* AF_INET - IPPROTO_UDP */);
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-136 /* AF_INET - IPPROTO_UDPLITE */);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0f46b3c2e4ac..6c95d28d0c4a 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -557,10 +557,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
NAPI_GRO_CB(skb)->is_flist = 0;
if (!sk || !udp_sk(sk)->gro_receive) {
if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
- NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1;
+ NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1;
if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) ||
- (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist)
+ (sk && udp_test_bit(GRO_ENABLED, sk)) || NAPI_GRO_CB(skb)->is_flist)
return call_gro_receive(udp_gro_receive_segment, head, skb);
/* no GRO, be sure flush the current packet */
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index 9b18f371af0d..a87defb2b167 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -78,7 +78,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
udp_sk(sk)->gro_receive = cfg->gro_receive;
udp_sk(sk)->gro_complete = cfg->gro_complete;
- udp_tunnel_encap_enable(sock);
+ udp_tunnel_encap_enable(sk);
}
EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
@@ -204,4 +204,53 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
}
EXPORT_SYMBOL_GPL(udp_tun_rx_dst);
+struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
+ struct net_device *dev,
+ struct net *net, int oif,
+ __be32 *saddr,
+ const struct ip_tunnel_key *key,
+ __be16 sport, __be16 dport, u8 tos,
+ struct dst_cache *dst_cache)
+{
+ struct rtable *rt = NULL;
+ struct flowi4 fl4;
+
+#ifdef CONFIG_DST_CACHE
+ if (dst_cache) {
+ rt = dst_cache_get_ip4(dst_cache, saddr);
+ if (rt)
+ return rt;
+ }
+#endif
+
+ memset(&fl4, 0, sizeof(fl4));
+ fl4.flowi4_mark = skb->mark;
+ fl4.flowi4_proto = IPPROTO_UDP;
+ fl4.flowi4_oif = oif;
+ fl4.daddr = key->u.ipv4.dst;
+ fl4.saddr = key->u.ipv4.src;
+ fl4.fl4_dport = dport;
+ fl4.fl4_sport = sport;
+ fl4.flowi4_tos = RT_TOS(tos);
+ fl4.flowi4_flags = key->flow_flags;
+
+ rt = ip_route_output_key(net, &fl4);
+ if (IS_ERR(rt)) {
+ netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
+ return ERR_PTR(-ENETUNREACH);
+ }
+ if (rt->dst.dev == dev) { /* is this necessary? */
+ netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr);
+ ip_rt_put(rt);
+ return ERR_PTR(-ELOOP);
+ }
+#ifdef CONFIG_DST_CACHE
+ if (dst_cache)
+ dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
+#endif
+ *saddr = fl4.saddr;
+ return rt;
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_dst_lookup);
+
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index 029219749785..b6d2d16189c0 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -47,7 +47,7 @@ struct udp_tunnel_nic {
unsigned int n_tables;
unsigned long missed;
- struct udp_tunnel_nic_table_entry **entries;
+ struct udp_tunnel_nic_table_entry *entries[] __counted_by(n_tables);
};
/* We ensure all work structs are done using driver state, but not the code.
@@ -725,16 +725,12 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
struct udp_tunnel_nic *utn;
unsigned int i;
- utn = kzalloc(sizeof(*utn), GFP_KERNEL);
+ utn = kzalloc(struct_size(utn, entries, n_tables), GFP_KERNEL);
if (!utn)
return NULL;
utn->n_tables = n_tables;
INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work);
- utn->entries = kmalloc_array(n_tables, sizeof(void *), GFP_KERNEL);
- if (!utn->entries)
- goto err_free_utn;
-
for (i = 0; i < n_tables; i++) {
utn->entries[i] = kcalloc(info->tables[i].n_entries,
sizeof(*utn->entries[i]), GFP_KERNEL);
@@ -747,8 +743,6 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
err_free_prev_entries:
while (i--)
kfree(utn->entries[i]);
- kfree(utn->entries);
-err_free_utn:
kfree(utn);
return NULL;
}
@@ -759,7 +753,6 @@ static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn)
for (i = 0; i < utn->n_tables; i++)
kfree(utn->entries[i]);
- kfree(utn->entries);
kfree(utn);
}
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 39ecdad1b50c..af37af3ab727 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -21,7 +21,6 @@ EXPORT_SYMBOL(udplite_table);
static int udplite_sk_init(struct sock *sk)
{
udp_init_sock(sk);
- udp_sk(sk)->pcflag = UDPLITE_BIT;
pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, "
"please contact the netdev mailing list\n");
return 0;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index eac206a290d0..c54676998eb6 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -17,6 +17,8 @@
#include <linux/netfilter_ipv4.h>
#include <net/ip.h>
#include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>
static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
struct sk_buff *skb)
@@ -72,24 +74,17 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
return 0;
}
-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
{
struct udp_sock *up = udp_sk(sk);
struct udphdr *uh;
struct iphdr *iph;
int iphlen, len;
-
__u8 *udpdata;
__be32 *udpdata32;
- __u16 encap_type = up->encap_type;
+ u16 encap_type;
+ encap_type = READ_ONCE(up->encap_type);
/* if this is not encapsulated socket, then just return now */
if (!encap_type)
return 1;
@@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP:
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
- goto drop;
+ return -EINVAL;
} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
/* ESP Packet without Non-ESP header */
len = sizeof(struct udphdr);
@@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP_NON_IKE:
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
- goto drop;
+ return -EINVAL;
} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
udpdata32[0] == 0 && udpdata32[1] == 0) {
@@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
* protocol to ESP, and then call into the transform receiver.
*/
if (skb_unclone(skb, GFP_ATOMIC))
- goto drop;
+ return -EINVAL;
/* Now we can update and verify the packet length... */
iph = ip_hdr(skb);
@@ -147,25 +142,89 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
iph->tot_len = htons(ntohs(iph->tot_len) - len);
if (skb->len < iphlen + len) {
/* packet is too small!?! */
- goto drop;
+ return -EINVAL;
}
/* pull the data buffer up to the ESP header and set the
* transport header to point to ESP. Keep UDP on the stack
* for later.
*/
- __skb_pull(skb, len);
- skb_reset_transport_header(skb);
+ if (pull) {
+ __skb_pull(skb, len);
+ skb_reset_transport_header(skb);
+ } else {
+ skb_set_transport_header(skb, len);
+ }
/* process ESP */
- return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
- kfree_skb(skb);
return 0;
}
+
+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ int ret;
+
+ ret = __xfrm4_udp_encap_rcv(sk, skb, true);
+ if (!ret)
+ return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
+ udp_sk(sk)->encap_type);
+
+ if (ret < 0) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ return ret;
+}
EXPORT_SYMBOL(xfrm4_udp_encap_rcv);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+ struct sk_buff *skb)
+{
+ int offset = skb_gro_offset(skb);
+ const struct net_offload *ops;
+ struct sk_buff *pp = NULL;
+ int ret;
+
+ offset = offset - sizeof(struct udphdr);
+
+ if (!pskb_pull(skb, offset))
+ return NULL;
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
+ if (!ops || !ops->callbacks.gro_receive)
+ goto out;
+
+ ret = __xfrm4_udp_encap_rcv(sk, skb, false);
+ if (ret)
+ goto out;
+
+ skb_push(skb, offset);
+ NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+ pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+ rcu_read_unlock();
+
+ return pp;
+
+out:
+ rcu_read_unlock();
+ skb_push(skb, offset);
+ NAPI_GRO_CB(skb)->same_flow = 0;
+ NAPI_GRO_CB(skb)->flush = 1;
+
+ return NULL;
+}
+EXPORT_SYMBOL(xfrm4_gro_udp_encap_rcv);
+
int xfrm4_rcv(struct sk_buff *skb)
{
return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 3036a45e8a1e..d283c59df4c1 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -52,4 +52,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
ifneq ($(CONFIG_IPV6),)
obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
obj-y += mcast_snoop.o
+obj-$(CONFIG_TCP_AO) += tcp_ao.o
endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 967913ad65e5..733ace18806c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -236,6 +236,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ioam6_id = IOAM6_DEFAULT_IF_ID,
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
.ndisc_evict_nocarrier = 1,
+ .ra_honor_pio_life = 0,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -297,6 +298,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ioam6_id = IOAM6_DEFAULT_IF_ID,
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
.ndisc_evict_nocarrier = 1,
+ .ra_honor_pio_life = 0,
};
/* Check if link is ready: is it up and is a valid qdisc available */
@@ -1378,7 +1380,7 @@ retry:
* idev->desync_factor if it's larger
*/
cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
- max_desync_factor = min_t(__u32,
+ max_desync_factor = min_t(long,
idev->cnf.max_desync_factor,
cnf_temp_preferred_lft - regen_advance);
@@ -1397,6 +1399,7 @@ retry:
idev->cnf.temp_valid_lft + age);
cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor;
cfg.preferred_lft = min_t(__u32, ifp->prefered_lft, cfg.preferred_lft);
+ cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft);
cfg.plen = ifp->prefix_len;
tmp_tstamp = ifp->tstamp;
@@ -2657,22 +2660,23 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
else
stored_lft = 0;
- if (!create && stored_lft) {
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = !create && stored_lft;
+
+ if (update_lft && !in6_dev->cnf.ra_honor_pio_life) {
const u32 minimum_lft = min_t(u32,
stored_lft, MIN_VALID_LIFETIME);
valid_lft = max(valid_lft, minimum_lft);
-
- /* RFC4862 Section 5.5.3e:
- * "Note that the preferred lifetime of the
- * corresponding address is always reset to
- * the Preferred Lifetime in the received
- * Prefix Information option, regardless of
- * whether the valid lifetime is also reset or
- * ignored."
- *
- * So we should always update prefered_lft here.
- */
- update_lft = 1;
}
if (update_lft) {
@@ -6137,11 +6141,7 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
pmsg->prefix_len = pinfo->prefix_len;
pmsg->prefix_type = pinfo->type;
pmsg->prefix_pad3 = 0;
- pmsg->prefix_flags = 0;
- if (pinfo->onlink)
- pmsg->prefix_flags |= IF_PREFIX_ONLINK;
- if (pinfo->autoconf)
- pmsg->prefix_flags |= IF_PREFIX_AUTOCONF;
+ pmsg->prefix_flags = pinfo->flags;
if (nla_put(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix))
goto nla_put_failure;
@@ -6846,6 +6846,15 @@ static const struct ctl_table addrconf_sysctl[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "ra_honor_pio_life",
+ .data = &ipv6_devconf.ra_honor_pio_life,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
#ifdef CONFIG_IPV6_ROUTER_PREF
{
.procname = "accept_ra_rtr_pref",
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 507a8353a6bd..c008d21925d7 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -220,19 +220,26 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
EXPORT_SYMBOL_GPL(ipv6_stub);
/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
-const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_LOOPBACK_INIT;
EXPORT_SYMBOL(in6addr_loopback);
-const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_ANY_INIT;
EXPORT_SYMBOL(in6addr_any);
-const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
EXPORT_SYMBOL(in6addr_linklocal_allnodes);
-const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_linklocal_allrouters);
-const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
EXPORT_SYMBOL(in6addr_interfacelocal_allnodes);
-const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_interfacelocal_allrouters);
-const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_sitelocal_allrouters);
static void snmp6_free_dev(struct inet6_dev *idev)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 368824fe9719..959bfd9f6344 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -199,6 +199,9 @@ lookup_protocol:
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
+ if (INET_PROTOSW_ICSK & answer_flags)
+ inet_init_csk_locks(sk);
+
inet = inet_sk(sk);
inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
@@ -217,10 +220,11 @@ lookup_protocol:
inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
np->hop_limit = -1;
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
- np->mc_loop = 1;
- np->mc_all = 1;
+ inet6_set_bit(MC6_LOOP, sk);
+ inet6_set_bit(MC6_ALL, sk);
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED;
+ inet6_assign_bit(REPFLOW, sk, net->ipv6.sysctl.flowlabel_reflect &
+ FLOWLABEL_REFLECT_ESTABLISHED);
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
@@ -453,7 +457,7 @@ int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* BPF prog is run before any checks are done so that if the prog
* changes context in a wrong way it will be caught.
*/
- err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+ err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len,
CGROUP_INET6_BIND, &flags);
if (err)
return err;
@@ -519,6 +523,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer)
{
struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
+ int sin_addr_len = sizeof(*sin);
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -536,9 +541,9 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
}
sin->sin6_port = inet->inet_dport;
sin->sin6_addr = sk->sk_v6_daddr;
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
sin->sin6_flowinfo = np->flow_label;
- BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
CGROUP_INET6_GETPEERNAME);
} else {
if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
@@ -546,13 +551,13 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
else
sin->sin6_addr = sk->sk_v6_rcv_saddr;
sin->sin6_port = inet->inet_sport;
- BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
CGROUP_INET6_GETSOCKNAME);
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
sk->sk_bound_dev_if);
release_sock(sk);
- return sizeof(*sin);
+ return sin_addr_len;
}
EXPORT_SYMBOL(inet6_getname);
@@ -1048,6 +1053,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
#if IS_ENABLED(CONFIG_XFRM)
.xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
+ .xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv,
.xfrm6_rcv_encap = xfrm6_rcv_encap,
#endif
.nd_tbl = &nd_tbl,
@@ -1060,6 +1066,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
.udp6_lib_lookup = __udp6_lib_lookup,
.ipv6_setsockopt = do_ipv6_setsockopt,
.ipv6_getsockopt = do_ipv6_getsockopt,
+ .ipv6_dev_get_saddr = ipv6_dev_get_saddr,
};
static int __init inet6_init(void)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 01005035ad10..2016e90e6e1d 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -13,8 +13,8 @@
#define pr_fmt(fmt) "IPv6: " fmt
-#include <crypto/algapi.h>
#include <crypto/hash.h>
+#include <crypto/utils.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <net/ip.h>
@@ -51,9 +51,7 @@ static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags,
{
unsigned int len;
- len = size + crypto_ahash_digestsize(ahash) +
- (crypto_ahash_alignmask(ahash) &
- ~(crypto_tfm_ctx_alignment() - 1));
+ len = size + crypto_ahash_digestsize(ahash);
len = ALIGN(len, crypto_tfm_ctx_alignment());
@@ -75,10 +73,9 @@ static inline u8 *ah_tmp_auth(u8 *tmp, unsigned int offset)
return tmp + offset;
}
-static inline u8 *ah_tmp_icv(struct crypto_ahash *ahash, void *tmp,
- unsigned int offset)
+static inline u8 *ah_tmp_icv(void *tmp, unsigned int offset)
{
- return PTR_ALIGN((u8 *)tmp + offset, crypto_ahash_alignmask(ahash) + 1);
+ return tmp + offset;
}
static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash,
@@ -299,7 +296,7 @@ static void ah6_output_done(void *data, int err)
iph_base = AH_SKB_CB(skb)->tmp;
iph_ext = ah_tmp_ext(iph_base);
- icv = ah_tmp_icv(ahp->ahash, iph_ext, extlen);
+ icv = ah_tmp_icv(iph_ext, extlen);
memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
@@ -362,7 +359,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
iph_ext = ah_tmp_ext(iph_base);
seqhi = (__be32 *)((char *)iph_ext + extlen);
- icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
+ icv = ah_tmp_icv(seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
seqhisg = sg + nfrags;
@@ -468,7 +465,7 @@ static void ah6_input_done(void *data, int err)
work_iph = AH_SKB_CB(skb)->tmp;
auth_data = ah_tmp_auth(work_iph, hdr_len);
- icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
+ icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len);
err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0;
if (err)
@@ -576,7 +573,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len);
seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
- icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
+ icv = ah_tmp_icv(seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
seqhisg = sg + nfrags;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 41ebc4e57473..fff78496803d 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -60,9 +60,9 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6,
if (!oif) {
if (ipv6_addr_is_multicast(&fl6->daddr))
- oif = np->mcast_oif;
+ oif = READ_ONCE(np->mcast_oif);
else
- oif = np->ucast_oif;
+ oif = READ_ONCE(np->ucast_oif);
}
fl6->flowi6_oif = oif;
@@ -80,7 +80,8 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr)
struct flowi6 fl6;
int err = 0;
- if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) {
+ if (inet6_test_bit(SNDFLOW, sk) &&
+ (np->flow_label & IPV6_FLOWLABEL_MASK)) {
flowlabel = fl6_sock_lookup(sk, np->flow_label);
if (IS_ERR(flowlabel))
return -EINVAL;
@@ -163,7 +164,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
if (usin->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
if (ipv6_addr_any(&usin->sin6_addr)) {
@@ -228,7 +229,7 @@ ipv4_connected:
}
if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST))
- WRITE_ONCE(sk->sk_bound_dev_if, np->mcast_oif);
+ WRITE_ONCE(sk->sk_bound_dev_if, READ_ONCE(np->mcast_oif));
/* Connect to link-local address requires an interface */
if (!sk->sk_bound_dev_if) {
@@ -305,11 +306,10 @@ static void ipv6_icmp_error_rfc4884(const struct sk_buff *skb,
void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
struct icmp6hdr *icmph = icmp6_hdr(skb);
struct sock_exterr_skb *serr;
- if (!np->recverr)
+ if (!inet6_test_bit(RECVERR6, sk))
return;
skb = skb_clone(skb, GFP_ATOMIC);
@@ -332,7 +332,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__skb_pull(skb, payload - skb->data);
- if (inet6_sk(sk)->recverr_rfc4884)
+ if (inet6_test_bit(RECVERR6_RFC4884, sk))
ipv6_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
skb_reset_transport_header(skb);
@@ -344,12 +344,11 @@ EXPORT_SYMBOL_GPL(ipv6_icmp_error);
void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
{
- const struct ipv6_pinfo *np = inet6_sk(sk);
struct sock_exterr_skb *serr;
struct ipv6hdr *iph;
struct sk_buff *skb;
- if (!np->recverr)
+ if (!inet6_test_bit(RECVERR6, sk))
return;
skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
@@ -493,7 +492,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset),
struct ipv6hdr, daddr);
sin->sin6_addr = ip6h->daddr;
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
sin->sin6_flowinfo = ip6_flowinfo(ip6h);
sin->sin6_scope_id =
ipv6_iface_scope_id(&sin->sin6_addr,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index fddd0cbdede1..2cc1a45742d8 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -770,7 +770,9 @@ static inline int esp_remove_trailer(struct sk_buff *skb)
skb->csum = csum_block_sub(skb->csum, csumdiff,
skb->len - trimlen);
}
- pskb_trim(skb, skb->len - trimlen);
+ ret = pskb_trim(skb, skb->len - trimlen);
+ if (unlikely(ret))
+ return ret;
ret = nexthdr[1];
@@ -831,7 +833,7 @@ int esp6_input_done2(struct sk_buff *skb, int err)
/*
* 1) if the NAT-T peer's IP or port changed then
- * advertize the change to the keying daemon.
+ * advertise the change to the keying daemon.
* This is an inbound SA, so just compare
* SRC ports.
*/
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index a189e08370a5..527b7caddbc6 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -34,7 +34,9 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
int off = sizeof(struct ipv6hdr);
struct ipv6_opt_hdr *exthdr;
- if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+ /* ESP or ESPINUDP */
+ if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP ||
+ ipv6_hdr->nexthdr == NEXTHDR_UDP))
return offsetof(struct ipv6hdr, nexthdr);
while (off < nhlen) {
@@ -54,10 +56,14 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
int offset = skb_gro_offset(skb);
struct xfrm_offload *xo;
struct xfrm_state *x;
+ int encap_type = 0;
__be32 seq;
__be32 spi;
int nhoff;
+ if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
+ encap_type = UDP_ENCAP_ESPINUDP;
+
if (!pskb_pull(skb, offset))
return NULL;
@@ -104,7 +110,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
/* We don't need to handle errors from xfrm_input, it does all
* the error handling and frees the resources on error. */
- xfrm_input(skb, IPPROTO_ESP, spi, -2);
+ xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
return ERR_PTR(-EINPROGRESS);
out_reset:
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
index 06750d65d480..4c00398f4dca 100644
--- a/net/ipv6/exthdrs_offload.c
+++ b/net/ipv6/exthdrs_offload.c
@@ -16,6 +16,10 @@ static const struct net_offload dstopt_offload = {
.flags = INET6_PROTO_GSO_EXTHDR,
};
+static const struct net_offload hbh_offload = {
+ .flags = INET6_PROTO_GSO_EXTHDR,
+};
+
int __init ipv6_exthdrs_offload_init(void)
{
int ret;
@@ -28,9 +32,16 @@ int __init ipv6_exthdrs_offload_init(void)
if (ret)
goto out_rt;
+ ret = inet6_add_offload(&hbh_offload, IPPROTO_HOPOPTS);
+ if (ret)
+ goto out_dstopts;
+
out:
return ret;
+out_dstopts:
+ inet6_del_offload(&dstopt_offload, IPPROTO_DSTOPTS);
+
out_rt:
inet6_del_offload(&rthdr_offload, IPPROTO_ROUTING);
goto out;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 7c2003833010..7523c4baef35 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -475,11 +475,11 @@ static int __net_init fib6_rules_net_init(struct net *net)
if (IS_ERR(ops))
return PTR_ERR(ops);
- err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL, 0);
+ err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL);
if (err)
goto out_fib6_rules_ops;
- err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN, 0);
+ err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN);
if (err)
goto out_fib6_rules_ops;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 93a594a901d1..1635da07285f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -385,7 +385,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
return dst;
}
- err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
+ err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6);
if (err)
goto relookup_failed;
@@ -584,11 +584,11 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
tmp_hdr.icmp6_pointer = htonl(info);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
- ipcm6_init_sk(&ipc6, np);
+ ipcm6_init_sk(&ipc6, sk);
ipc6.sockc.mark = mark;
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
@@ -770,9 +770,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
np = inet6_sk(sk);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
if (ip6_dst_lookup(net, sk, &dst, &fl6))
goto out;
@@ -791,7 +791,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
msg.offset = 0;
msg.type = type;
- ipcm6_init_sk(&ipc6, np);
+ ipcm6_init_sk(&ipc6, sk);
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
ipc6.sockc.mark = mark;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 0c50dcd35fe8..80043e46117c 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -133,7 +133,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
fl6.daddr = sk->sk_v6_daddr;
res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt),
- np->tclass, sk->sk_priority);
+ np->tclass, READ_ONCE(sk->sk_priority));
rcu_read_unlock();
return res;
}
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index f6f5b83dd954..7563f8c6aa87 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -46,7 +46,7 @@ struct ioam6_lwt {
struct ioam6_lwt_encap tuninfo;
};
-static struct netlink_range_validation freq_range = {
+static const struct netlink_range_validation freq_range = {
.min = IOAM6_IPTUNNEL_FREQ_MIN,
.max = IOAM6_IPTUNNEL_FREQ_MAX,
};
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 28b01a068412..4fc2cae0d116 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -160,8 +160,6 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
- INIT_HLIST_NODE(&f6i->gc_link);
-
return f6i;
}
@@ -248,7 +246,6 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
net->ipv6.fib6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
- INIT_HLIST_HEAD(&table->tb6_gc_hlist);
}
return table;
@@ -1060,8 +1057,6 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
lockdep_is_held(&table->tb6_lock));
}
}
-
- fib6_clean_expires_locked(rt);
}
/*
@@ -1123,10 +1118,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST;
if (!(rt->fib6_flags & RTF_EXPIRES))
- fib6_clean_expires_locked(iter);
+ fib6_clean_expires(iter);
else
- fib6_set_expires_locked(iter,
- rt->expires);
+ fib6_set_expires(iter, rt->expires);
if (rt->fib6_pmtu)
fib6_metric_set(iter, RTAX_MTU,
@@ -1485,10 +1479,6 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (rt->nh)
list_add(&rt->nh_list, &rt->nh->f6i_list);
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
-
- if (fib6_has_expires(rt))
- hlist_add_head(&rt->gc_link, &table->tb6_gc_hlist);
-
fib6_start_gc(info->nl_net, rt);
}
@@ -1511,13 +1501,9 @@ out:
if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
pn_leaf = fib6_find_prefix(info->nl_net, table,
pn);
-#if RT6_DEBUG >= 2
- if (!pn_leaf) {
- WARN_ON(!pn_leaf);
+ if (!pn_leaf)
pn_leaf =
info->nl_net->ipv6.fib6_null_entry;
- }
-#endif
fib6_info_hold(pn_leaf);
rcu_assign_pointer(pn->leaf, pn_leaf);
}
@@ -2295,8 +2281,9 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
+static int fib6_age(struct fib6_info *rt, void *arg)
{
+ struct fib6_gc_args *gc_args = arg;
unsigned long now = jiffies;
/*
@@ -2304,7 +2291,7 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
* Routes are expired even if they are in use.
*/
- if (fib6_has_expires(rt) && rt->expires) {
+ if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
if (time_after(now, rt->expires)) {
RT6_TRACE("expiring %p\n", rt);
return -1;
@@ -2321,40 +2308,6 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
return 0;
}
-static void fib6_gc_table(struct net *net,
- struct fib6_table *tb6,
- struct fib6_gc_args *gc_args)
-{
- struct fib6_info *rt;
- struct hlist_node *n;
- struct nl_info info = {
- .nl_net = net,
- .skip_notify = false,
- };
-
- hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
- if (fib6_age(rt, gc_args) == -1)
- fib6_del(rt, &info);
-}
-
-static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
-{
- struct fib6_table *table;
- struct hlist_head *head;
- unsigned int h;
-
- rcu_read_lock();
- for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
- head = &net->ipv6.fib_table_hash[h];
- hlist_for_each_entry_rcu(table, head, tb6_hlist) {
- spin_lock_bh(&table->tb6_lock);
- fib6_gc_table(net, table, gc_args);
- spin_unlock_bh(&table->tb6_lock);
- }
- }
- rcu_read_unlock();
-}
-
void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
struct fib6_gc_args gc_args;
@@ -2370,7 +2323,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
net->ipv6.sysctl.ip6_rt_gc_interval;
gc_args.more = 0;
- fib6_gc_all(net, &gc_args);
+ fib6_clean_all(net, fib6_age, &gc_args);
now = jiffies;
net->ipv6.ip6_rt_last_gc = now;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b3ca4beb4405..eca07e10e21f 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -513,7 +513,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
return 0;
}
- if (np->repflow) {
+ if (inet6_test_bit(REPFLOW, sk)) {
freq->flr_label = np->flow_label;
return 0;
}
@@ -551,10 +551,10 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
if (freq->flr_flags & IPV6_FL_F_REFLECT) {
if (sk->sk_protocol != IPPROTO_TCP)
return -ENOPROTOOPT;
- if (!np->repflow)
+ if (!inet6_test_bit(REPFLOW, sk))
return -ESRCH;
np->flow_label = 0;
- np->repflow = 0;
+ inet6_clear_bit(REPFLOW, sk);
return 0;
}
@@ -626,7 +626,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
if (sk->sk_protocol != IPPROTO_TCP)
return -ENOPROTOOPT;
- np->repflow = 1;
+ inet6_set_bit(REPFLOW, sk);
return 0;
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index d94041bb4287..b8378814532c 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -99,7 +99,8 @@ static bool ip6_can_use_hint(const struct sk_buff *skb,
static struct sk_buff *ip6_extract_route_hint(const struct net *net,
struct sk_buff *skb)
{
- if (fib6_routes_require_src(net) || fib6_has_custom_rules(net))
+ if (fib6_routes_require_src(net) || fib6_has_custom_rules(net) ||
+ IP6CB(skb)->flags & IP6SKB_MULTIPATH)
return NULL;
return skb;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index d6314287338d..cca64c7809be 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -37,6 +37,40 @@
INDIRECT_CALL_L4(cb, f2, f1, head, skb); \
})
+static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
+{
+ const struct net_offload *ops = NULL;
+ struct ipv6_opt_hdr *opth;
+
+ for (;;) {
+ int len;
+
+ ops = rcu_dereference(inet6_offloads[proto]);
+
+ if (unlikely(!ops))
+ break;
+
+ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+
+ opth = skb_gro_header(skb, off + sizeof(*opth), off);
+ if (unlikely(!opth))
+ break;
+
+ len = ipv6_optlen(opth);
+
+ opth = skb_gro_header(skb, off + len, off);
+ if (unlikely(!opth))
+ break;
+ proto = opth->nexthdr;
+
+ off += len;
+ }
+
+ skb_gro_pull(skb, off - skb_network_offset(skb));
+ return proto;
+}
+
static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
{
const struct net_offload *ops = NULL;
@@ -45,15 +79,13 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
struct ipv6_opt_hdr *opth;
int len;
- if (proto != NEXTHDR_HOP) {
- ops = rcu_dereference(inet6_offloads[proto]);
+ ops = rcu_dereference(inet6_offloads[proto]);
- if (unlikely(!ops))
- break;
+ if (unlikely(!ops))
+ break;
- if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
- break;
- }
+ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
if (unlikely(!pskb_may_pull(skb, 8)))
break;
@@ -171,13 +203,12 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph,
proto = iph->nexthdr;
for (;;) {
- if (proto != NEXTHDR_HOP) {
- *opps = rcu_dereference(inet6_offloads[proto]);
- if (unlikely(!(*opps)))
- break;
- if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR))
- break;
- }
+ *opps = rcu_dereference(inet6_offloads[proto]);
+ if (unlikely(!(*opps)))
+ break;
+ if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+
opth = (void *)opth + optlen;
optlen = ipv6_optlen(opth);
len += optlen;
@@ -206,28 +237,25 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
goto out;
skb_set_network_header(skb, off);
- skb_gro_pull(skb, sizeof(*iph));
- skb_set_transport_header(skb, skb_gro_offset(skb));
- flush += ntohs(iph->payload_len) != skb_gro_len(skb);
+ flush += ntohs(iph->payload_len) != skb->len - hlen;
proto = iph->nexthdr;
ops = rcu_dereference(inet6_offloads[proto]);
if (!ops || !ops->callbacks.gro_receive) {
- pskb_pull(skb, skb_gro_offset(skb));
- skb_gro_frag0_invalidate(skb);
- proto = ipv6_gso_pull_exthdrs(skb, proto);
- skb_gro_pull(skb, -skb_transport_offset(skb));
- skb_reset_transport_header(skb);
- __skb_push(skb, skb_gro_offset(skb));
+ proto = ipv6_gro_pull_exthdrs(skb, hlen, proto);
ops = rcu_dereference(inet6_offloads[proto]);
if (!ops || !ops->callbacks.gro_receive)
goto out;
- iph = ipv6_hdr(skb);
+ iph = skb_gro_network_header(skb);
+ } else {
+ skb_gro_pull(skb, sizeof(*iph));
}
+ skb_set_transport_header(skb, skb_gro_offset(skb));
+
NAPI_GRO_CB(skb)->proto = proto;
flush--;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0665e8b09968..a722a43dd668 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -117,6 +117,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
return res;
}
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+
rcu_read_lock();
nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
@@ -162,7 +164,13 @@ ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
int err;
skb_mark_not_on_list(segs);
- err = ip6_fragment(net, sk, segs, ip6_finish_output2);
+ /* Last GSO segment can be smaller than gso_size (and MTU).
+ * Adding a fragment header would produce an "atomic fragment",
+ * which is considered harmful (RFC-8021). Avoid that.
+ */
+ err = segs->len > mtu ?
+ ip6_fragment(net, sk, segs, ip6_finish_output2) :
+ ip6_finish_output2(net, sk, segs);
if (err && ret == 0)
ret = err;
}
@@ -170,6 +178,16 @@ ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
return ret;
}
+static int ip6_finish_output_gso(struct net *net, struct sock *sk,
+ struct sk_buff *skb, unsigned int mtu)
+{
+ if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
+ !skb_gso_validate_network_len(skb, mtu))
+ return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
+
+ return ip6_finish_output2(net, sk, skb);
+}
+
static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
unsigned int mtu;
@@ -183,17 +201,14 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff
#endif
mtu = ip6_skb_dst_mtu(skb);
- if (skb_is_gso(skb) &&
- !(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
- !skb_gso_validate_network_len(skb, mtu))
- return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
+ if (skb_is_gso(skb))
+ return ip6_finish_output_gso(net, sk, skb, mtu);
- if ((skb->len > mtu && !skb_is_gso(skb)) ||
- dst_allfrag(skb_dst(skb)) ||
+ if (skb->len > mtu ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
return ip6_fragment(net, sk, skb, ip6_finish_output2);
- else
- return ip6_finish_output2(net, sk, skb);
+
+ return ip6_finish_output2(net, sk, skb);
}
static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -232,12 +247,11 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(ip6_output);
-bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+bool ip6_autoflowlabel(struct net *net, const struct sock *sk)
{
- if (!np->autoflowlabel_set)
+ if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk))
return ip6_default_np_autolabel(net);
- else
- return np->autoflowlabel;
+ return inet6_test_bit(AUTOFLOWLABEL, sk);
}
/*
@@ -309,12 +323,12 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
* Fill in the IPv6 header
*/
if (np)
- hlimit = np->hop_limit;
+ hlimit = READ_ONCE(np->hop_limit);
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
- ip6_autoflowlabel(net, np), fl6));
+ ip6_autoflowlabel(net, sk), fl6));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -329,7 +343,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
mtu = dst_mtu(dst);
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
/* if egress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -369,9 +383,8 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
if (sk && ra->sel == sel &&
(!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == skb->dev->ifindex)) {
- struct ipv6_pinfo *np = inet6_sk(sk);
- if (np && np->rtalert_isolate &&
+ if (inet6_test_bit(RTALERT_ISOLATE, sk) &&
!net_eq(sock_net(sk), dev_net(skb->dev))) {
continue;
}
@@ -448,11 +461,6 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
static inline int ip6_forward_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
-
- __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
- __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
-
#ifdef CONFIG_NET_SWITCHDEV
if (skb->offload_l3_fwd_mark) {
consume_skb(skb);
@@ -620,6 +628,8 @@ int ip6_forward(struct sk_buff *skb)
}
}
+ __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
+
mtu = ip6_dst_mtu_maybe_forward(dst, true);
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
@@ -882,9 +892,11 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
mtu = IPV6_MIN_MTU;
}
- if (np && np->frag_size < mtu) {
- if (np->frag_size)
- mtu = np->frag_size;
+ if (np) {
+ u32 frag_size = READ_ONCE(np->frag_size);
+
+ if (frag_size && frag_size < mtu)
+ mtu = frag_size;
}
if (mtu < hlen + sizeof(struct frag_hdr) + 8)
goto fail_toobig;
@@ -1018,9 +1030,6 @@ slow_path:
return err;
fail_toobig:
- if (skb->sk && dst_allfrag(skb_dst(skb)))
- sk_gso_disable(skb->sk);
-
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
err = -EMSGSIZE;
@@ -1114,7 +1123,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
rcu_read_lock();
from = rt ? rcu_dereference(rt->from) : NULL;
err = ip6_route_get_saddr(net, from, &fl6->daddr,
- sk ? inet6_sk(sk)->srcprefs : 0,
+ sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0,
&fl6->saddr);
rcu_read_unlock();
@@ -1284,74 +1293,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
}
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
-/**
- * ip6_dst_lookup_tunnel - perform route lookup on tunnel
- * @skb: Packet for which lookup is done
- * @dev: Tunnel device
- * @net: Network namespace of tunnel device
- * @sock: Socket which provides route info
- * @saddr: Memory to store the src ip address
- * @info: Tunnel information
- * @protocol: IP protocol
- * @use_cache: Flag to enable cache usage
- * This function performs a route lookup on a tunnel
- *
- * It returns a valid dst pointer and stores src address to be used in
- * tunnel in param saddr on success, else a pointer encoded error code.
- */
-
-struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
- struct net_device *dev,
- struct net *net,
- struct socket *sock,
- struct in6_addr *saddr,
- const struct ip_tunnel_info *info,
- u8 protocol,
- bool use_cache)
-{
- struct dst_entry *dst = NULL;
-#ifdef CONFIG_DST_CACHE
- struct dst_cache *dst_cache;
-#endif
- struct flowi6 fl6;
- __u8 prio;
-
-#ifdef CONFIG_DST_CACHE
- dst_cache = (struct dst_cache *)&info->dst_cache;
- if (use_cache) {
- dst = dst_cache_get_ip6(dst_cache, saddr);
- if (dst)
- return dst;
- }
-#endif
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_mark = skb->mark;
- fl6.flowi6_proto = protocol;
- fl6.daddr = info->key.u.ipv6.dst;
- fl6.saddr = info->key.u.ipv6.src;
- prio = info->key.tos;
- fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label);
-
- dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
- NULL);
- if (IS_ERR(dst)) {
- netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
- return ERR_PTR(-ENETUNREACH);
- }
- if (dst->dev == dev) { /* is this necessary? */
- netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
- dst_release(dst);
- return ERR_PTR(-ELOOP);
- }
-#ifdef CONFIG_DST_CACHE
- if (use_cache)
- dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
-#endif
- *saddr = fl6.saddr;
- return dst;
-}
-EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
-
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
gfp_t gfp)
{
@@ -1393,7 +1334,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
struct rt6_info *rt)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- unsigned int mtu;
+ unsigned int mtu, frag_size;
struct ipv6_txoptions *nopt, *opt = ipc6->opt;
/* callers pass dst together with a reference, set it first so
@@ -1437,25 +1378,23 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
v6_cork->hop_limit = ipc6->hlimit;
v6_cork->tclass = ipc6->tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
- mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+ mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
else
- mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+ mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
- if (np->frag_size < mtu) {
- if (np->frag_size)
- mtu = np->frag_size;
- }
+
+ frag_size = READ_ONCE(np->frag_size);
+ if (frag_size && frag_size < mtu)
+ mtu = frag_size;
+
cork->base.fragsize = mtu;
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
cork->base.mark = ipc6->sockc.mark;
sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
- if (dst_allfrag(xfrm_dst_path(&rt->dst)))
- cork->base.flags |= IPCORK_ALLFRAG;
cork->base.length = 0;
-
cork->base.transmit_time = ipc6->sockc.transmit_time;
return 0;
@@ -1502,7 +1441,7 @@ static int __ip6_append_data(struct sock *sk,
orig_mtu = mtu;
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1512,8 +1451,6 @@ static int __ip6_append_data(struct sock *sk,
headersize = sizeof(struct ipv6hdr) +
(opt ? opt->opt_flen + opt->opt_nflen : 0) +
- (dst_allfrag(&rt->dst) ?
- sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
if (mtu <= fragheaderlen ||
@@ -1623,7 +1560,7 @@ emsgsize:
while (length > 0) {
/* Check if the remaining data fits into current packet. */
- copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
+ copy = (cork->length <= mtu ? mtu : maxfraglen) - skb->len;
if (copy < length)
copy = maxfraglen - skb->len;
@@ -1654,7 +1591,7 @@ alloc_new_skb:
*/
datalen = length + fraggap;
- if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
+ if (datalen > (cork->length <= mtu ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
fraglen = datalen + fragheaderlen;
pagedlen = 0;
@@ -1903,7 +1840,6 @@ static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork)
struct dst_entry *dst = cork->base.dst;
cork->base.dst = NULL;
- cork->base.flags &= ~IPCORK_ALLFRAG;
skb_dst_set(skb, dst);
}
@@ -1924,7 +1860,6 @@ static void ip6_cork_release(struct inet_cork_full *cork,
if (cork->base.dst) {
dst_release(cork->base.dst);
cork->base.dst = NULL;
- cork->base.flags &= ~IPCORK_ALLFRAG;
}
}
@@ -1936,7 +1871,6 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
struct in6_addr *final_dst;
- struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
struct ipv6hdr *hdr;
struct ipv6_txoptions *opt = v6_cork->opt;
@@ -1979,18 +1913,18 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
ip6_flow_hdr(hdr, v6_cork->tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel,
- ip6_autoflowlabel(net, np), fl6));
+ ip6_autoflowlabel(net, sk), fl6));
hdr->hop_limit = v6_cork->hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
hdr->daddr = *final_dst;
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = cork->base.mark;
skb->tstamp = cork->base.transmit_time;
ip6_cork_steal_dst(skb, cork);
- IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
+ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
if (proto == IPPROTO_ICMPV6) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
u8 icmp6_type;
@@ -2092,7 +2026,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
return ERR_PTR(err);
}
if (ipc6->dontfrag < 0)
- ipc6->dontfrag = inet6_sk(sk)->dontfrag;
+ ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk);
err = __ip6_append_data(sk, &queue, cork, &v6_cork,
&current->task_frag, getfrag, from,
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5e80e517f071..9bbabf750a21 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -399,7 +399,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
unsigned int nhoff = raw - skb->data;
unsigned int off = nhoff + sizeof(*ipv6h);
- u8 next, nexthdr = ipv6h->nexthdr;
+ u8 nexthdr = ipv6h->nexthdr;
while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
struct ipv6_opt_hdr *hdr;
@@ -410,25 +410,25 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
hdr = (struct ipv6_opt_hdr *)(skb->data + off);
if (nexthdr == NEXTHDR_FRAGMENT) {
- struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
- if (frag_hdr->frag_off)
- break;
optlen = 8;
} else if (nexthdr == NEXTHDR_AUTH) {
optlen = ipv6_authlen(hdr);
} else {
optlen = ipv6_optlen(hdr);
}
- /* cache hdr->nexthdr, since pskb_may_pull() might
- * invalidate hdr
- */
- next = hdr->nexthdr;
- if (nexthdr == NEXTHDR_DEST) {
- u16 i = 2;
- /* Remember : hdr is no longer valid at this point. */
- if (!pskb_may_pull(skb, off + optlen))
+ if (!pskb_may_pull(skb, off + optlen))
+ break;
+
+ hdr = (struct ipv6_opt_hdr *)(skb->data + off);
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ struct frag_hdr *frag_hdr = (struct frag_hdr *)hdr;
+
+ if (frag_hdr->frag_off)
break;
+ }
+ if (nexthdr == NEXTHDR_DEST) {
+ u16 i = 2;
while (1) {
struct ipv6_tlv_tnl_enc_lim *tel;
@@ -449,7 +449,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
i++;
}
}
- nexthdr = next;
+ nexthdr = hdr->nexthdr;
off += optlen;
}
return 0;
@@ -796,8 +796,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
struct sk_buff *skb),
bool log_ecn_err)
{
- const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- int err;
+ const struct ipv6hdr *ipv6h;
+ int nh, err;
if ((!(tpi->flags & TUNNEL_CSUM) &&
(tunnel->parms.i_flags & TUNNEL_CSUM)) ||
@@ -829,7 +829,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
goto drop;
}
- ipv6h = ipv6_hdr(skb);
skb->protocol = eth_type_trans(skb, tunnel->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
} else {
@@ -837,7 +836,23 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
skb_reset_mac_header(skb);
}
+ /* Save offset of outer header relative to skb->head,
+ * because we are going to reset the network header to the inner header
+ * and might change skb->head.
+ */
+ nh = skb_network_header(skb) - skb->head;
+
skb_reset_network_header(skb);
+
+ if (!pskb_inet_may_pull(skb)) {
+ DEV_STATS_INC(tunnel->dev, rx_length_errors);
+ DEV_STATS_INC(tunnel->dev, rx_errors);
+ goto drop;
+ }
+
+ /* Get the outer header. */
+ ipv6h = (struct ipv6hdr *)(skb->head + nh);
+
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index cdc4d4ee2420..a7bf0327b380 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -1,3 +1,4 @@
+
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/module.h>
#include <linux/errno.h>
@@ -75,8 +76,9 @@ EXPORT_SYMBOL_GPL(udp_sock_create6);
int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb,
- struct net_device *dev, struct in6_addr *saddr,
- struct in6_addr *daddr,
+ struct net_device *dev,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
__u8 prio, __u8 ttl, __be32 label,
__be16 src_port, __be16 dst_port, bool nocheck)
{
@@ -111,4 +113,73 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
}
EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
+/**
+ * udp_tunnel6_dst_lookup - perform route lookup on UDP tunnel
+ * @skb: Packet for which lookup is done
+ * @dev: Tunnel device
+ * @net: Network namespace of tunnel device
+ * @sock: Socket which provides route info
+ * @oif: Index of the output interface
+ * @saddr: Memory to store the src ip address
+ * @key: Tunnel information
+ * @sport: UDP source port
+ * @dport: UDP destination port
+ * @dsfield: The traffic class field
+ * @dst_cache: The dst cache to use for lookup
+ * This function performs a route lookup on a UDP tunnel
+ *
+ * It returns a valid dst pointer and stores src address to be used in
+ * tunnel in param saddr on success, else a pointer encoded error code.
+ */
+
+struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb,
+ struct net_device *dev,
+ struct net *net,
+ struct socket *sock,
+ int oif,
+ struct in6_addr *saddr,
+ const struct ip_tunnel_key *key,
+ __be16 sport, __be16 dport, u8 dsfield,
+ struct dst_cache *dst_cache)
+{
+ struct dst_entry *dst = NULL;
+ struct flowi6 fl6;
+
+#ifdef CONFIG_DST_CACHE
+ if (dst_cache) {
+ dst = dst_cache_get_ip6(dst_cache, saddr);
+ if (dst)
+ return dst;
+ }
+#endif
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = IPPROTO_UDP;
+ fl6.flowi6_oif = oif;
+ fl6.daddr = key->u.ipv6.dst;
+ fl6.saddr = key->u.ipv6.src;
+ fl6.fl6_sport = sport;
+ fl6.fl6_dport = dport;
+ fl6.flowlabel = ip6_make_flowinfo(dsfield, key->label);
+
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
+ NULL);
+ if (IS_ERR(dst)) {
+ netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
+ return ERR_PTR(-ENETUNREACH);
+ }
+ if (dst->dev == dev) { /* is this necessary? */
+ netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
+ dst_release(dst);
+ return ERR_PTR(-ELOOP);
+ }
+#ifdef CONFIG_DST_CACHE
+ if (dst_cache)
+ dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
+#endif
+ *saddr = fl6.saddr;
+ return dst;
+}
+EXPORT_SYMBOL_GPL(udp_tunnel6_dst_lookup);
+
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 73c85d4e0e9c..e550240c85e1 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -569,11 +569,11 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_err;
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
- xfrm_decode_session(skb, &fl, AF_INET6);
+ xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
break;
case htons(ETH_P_IP):
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- xfrm_decode_session(skb, &fl, AF_INET);
+ xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
break;
default:
goto tx_err;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 67a3b8f6e72b..9782c180fee6 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -242,7 +242,7 @@ static int __net_init ip6mr_rules_init(struct net *net)
goto err1;
}
- err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
+ err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT);
if (err < 0)
goto err2;
@@ -2010,8 +2010,6 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
{
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTOCTETS, skb->len);
return dst_output(net, sk, skb);
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0e2a0847b387..56c3c467f9de 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > READ_ONCE(sysctl_optmem_max))
+ if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
+ if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -415,6 +415,154 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (ip6_mroute_opt(optname))
return ip6_mroute_setsockopt(sk, optname, optval, optlen);
+ /* Handle options that can be set without locking the socket. */
+ switch (optname) {
+ case IPV6_UNICAST_HOPS:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val > 255 || val < -1)
+ return -EINVAL;
+ WRITE_ONCE(np->hop_limit, val);
+ return 0;
+ case IPV6_MULTICAST_LOOP:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val != valbool)
+ return -EINVAL;
+ inet6_assign_bit(MC6_LOOP, sk, valbool);
+ return 0;
+ case IPV6_MULTICAST_HOPS:
+ if (sk->sk_type == SOCK_STREAM)
+ return retv;
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val > 255 || val < -1)
+ return -EINVAL;
+ WRITE_ONCE(np->mcast_hops,
+ val == -1 ? IPV6_DEFAULT_MCASTHOPS : val);
+ return 0;
+ case IPV6_MTU:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val && val < IPV6_MIN_MTU)
+ return -EINVAL;
+ WRITE_ONCE(np->frag_size, val);
+ return 0;
+ case IPV6_MINHOPCOUNT:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val < 0 || val > 255)
+ return -EINVAL;
+
+ if (val)
+ static_branch_enable(&ip6_min_hopcount);
+
+ /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount
+ * while we are changing it.
+ */
+ WRITE_ONCE(np->min_hopcount, val);
+ return 0;
+ case IPV6_RECVERR_RFC4884:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val < 0 || val > 1)
+ return -EINVAL;
+ inet6_assign_bit(RECVERR6_RFC4884, sk, valbool);
+ return 0;
+ case IPV6_MULTICAST_ALL:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ inet6_assign_bit(MC6_ALL, sk, valbool);
+ return 0;
+ case IPV6_AUTOFLOWLABEL:
+ inet6_assign_bit(AUTOFLOWLABEL, sk, valbool);
+ inet6_set_bit(AUTOFLOWLABEL_SET, sk);
+ return 0;
+ case IPV6_DONTFRAG:
+ inet6_assign_bit(DONTFRAG, sk, valbool);
+ return 0;
+ case IPV6_RECVERR:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ inet6_assign_bit(RECVERR6, sk, valbool);
+ if (!val)
+ skb_errqueue_purge(&sk->sk_error_queue);
+ return 0;
+ case IPV6_ROUTER_ALERT_ISOLATE:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ inet6_assign_bit(RTALERT_ISOLATE, sk, valbool);
+ return 0;
+ case IPV6_MTU_DISCOVER:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
+ return -EINVAL;
+ WRITE_ONCE(np->pmtudisc, val);
+ return 0;
+ case IPV6_FLOWINFO_SEND:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ inet6_assign_bit(SNDFLOW, sk, valbool);
+ return 0;
+ case IPV6_ADDR_PREFERENCES:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ return ip6_sock_set_addr_preferences(sk, val);
+ case IPV6_MULTICAST_IF:
+ if (sk->sk_type == SOCK_STREAM)
+ return -ENOPROTOOPT;
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val) {
+ struct net_device *dev;
+ int bound_dev_if, midx;
+
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, val);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+ midx = l3mdev_master_ifindex_rcu(dev);
+
+ rcu_read_unlock();
+
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if &&
+ bound_dev_if != val &&
+ (!midx || midx != bound_dev_if))
+ return -EINVAL;
+ }
+ WRITE_ONCE(np->mcast_oif, val);
+ return 0;
+ case IPV6_UNICAST_IF:
+ {
+ struct net_device *dev;
+ int ifindex;
+
+ if (optlen != sizeof(int))
+ return -EINVAL;
+
+ ifindex = (__force int)ntohl((__force __be32)val);
+ if (!ifindex) {
+ WRITE_ONCE(np->ucast_oif, 0);
+ return 0;
+ }
+
+ dev = dev_get_by_index(net, ifindex);
+ if (!dev)
+ return -EADDRNOTAVAIL;
+ dev_put(dev);
+
+ if (READ_ONCE(sk->sk_bound_dev_if))
+ return -EINVAL;
+
+ WRITE_ONCE(np->ucast_oif, ifindex);
+ return 0;
+ }
+ }
if (needs_rtnl)
rtnl_lock();
sockopt_lock_sock(sk);
@@ -733,95 +881,7 @@ done:
}
break;
}
- case IPV6_UNICAST_HOPS:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val > 255 || val < -1)
- goto e_inval;
- np->hop_limit = val;
- retv = 0;
- break;
-
- case IPV6_MULTICAST_HOPS:
- if (sk->sk_type == SOCK_STREAM)
- break;
- if (optlen < sizeof(int))
- goto e_inval;
- if (val > 255 || val < -1)
- goto e_inval;
- np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val);
- retv = 0;
- break;
-
- case IPV6_MULTICAST_LOOP:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val != valbool)
- goto e_inval;
- np->mc_loop = valbool;
- retv = 0;
- break;
-
- case IPV6_UNICAST_IF:
- {
- struct net_device *dev = NULL;
- int ifindex;
-
- if (optlen != sizeof(int))
- goto e_inval;
-
- ifindex = (__force int)ntohl((__force __be32)val);
- if (ifindex == 0) {
- np->ucast_oif = 0;
- retv = 0;
- break;
- }
-
- dev = dev_get_by_index(net, ifindex);
- retv = -EADDRNOTAVAIL;
- if (!dev)
- break;
- dev_put(dev);
-
- retv = -EINVAL;
- if (sk->sk_bound_dev_if)
- break;
-
- np->ucast_oif = ifindex;
- retv = 0;
- break;
- }
- case IPV6_MULTICAST_IF:
- if (sk->sk_type == SOCK_STREAM)
- break;
- if (optlen < sizeof(int))
- goto e_inval;
-
- if (val) {
- struct net_device *dev;
- int midx;
-
- rcu_read_lock();
-
- dev = dev_get_by_index_rcu(net, val);
- if (!dev) {
- rcu_read_unlock();
- retv = -ENODEV;
- break;
- }
- midx = l3mdev_master_ifindex_rcu(dev);
-
- rcu_read_unlock();
-
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != val &&
- (!midx || midx != sk->sk_bound_dev_if))
- goto e_inval;
- }
- np->mcast_oif = val;
- retv = 0;
- break;
case IPV6_ADD_MEMBERSHIP:
case IPV6_DROP_MEMBERSHIP:
{
@@ -862,13 +922,6 @@ done:
retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
break;
}
- case IPV6_MULTICAST_ALL:
- if (optlen < sizeof(int))
- goto e_inval;
- np->mc_all = valbool;
- retv = 0;
- break;
-
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
if (in_compat_syscall())
@@ -896,42 +949,6 @@ done:
goto e_inval;
retv = ip6_ra_control(sk, val);
break;
- case IPV6_ROUTER_ALERT_ISOLATE:
- if (optlen < sizeof(int))
- goto e_inval;
- np->rtalert_isolate = valbool;
- retv = 0;
- break;
- case IPV6_MTU_DISCOVER:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
- goto e_inval;
- np->pmtudisc = val;
- retv = 0;
- break;
- case IPV6_MTU:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val && val < IPV6_MIN_MTU)
- goto e_inval;
- np->frag_size = val;
- retv = 0;
- break;
- case IPV6_RECVERR:
- if (optlen < sizeof(int))
- goto e_inval;
- np->recverr = valbool;
- if (!val)
- skb_errqueue_purge(&sk->sk_error_queue);
- retv = 0;
- break;
- case IPV6_FLOWINFO_SEND:
- if (optlen < sizeof(int))
- goto e_inval;
- np->sndflow = valbool;
- retv = 0;
- break;
case IPV6_FLOWLABEL_MGR:
retv = ipv6_flowlabel_opt(sk, optval, optlen);
break;
@@ -943,47 +960,10 @@ done:
retv = xfrm_user_policy(sk, optname, optval, optlen);
break;
- case IPV6_ADDR_PREFERENCES:
- if (optlen < sizeof(int))
- goto e_inval;
- retv = __ip6_sock_set_addr_preferences(sk, val);
- break;
- case IPV6_MINHOPCOUNT:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val < 0 || val > 255)
- goto e_inval;
-
- if (val)
- static_branch_enable(&ip6_min_hopcount);
-
- /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount
- * while we are changing it.
- */
- WRITE_ONCE(np->min_hopcount, val);
- retv = 0;
- break;
- case IPV6_DONTFRAG:
- np->dontfrag = valbool;
- retv = 0;
- break;
- case IPV6_AUTOFLOWLABEL:
- np->autoflowlabel = valbool;
- np->autoflowlabel_set = 1;
- retv = 0;
- break;
case IPV6_RECVFRAGSIZE:
np->rxopt.bits.recvfragsize = valbool;
retv = 0;
break;
- case IPV6_RECVERR_RFC4884:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val < 0 || val > 1)
- goto e_inval;
- np->recverr_rfc4884 = valbool;
- retv = 0;
- break;
}
unlock:
@@ -1173,14 +1153,17 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
sockopt_release_sock(sk);
if (!skb) {
if (np->rxopt.bits.rxinfo) {
+ int mcast_oif = READ_ONCE(np->mcast_oif);
struct in6_pktinfo src_info;
- src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
+
+ src_info.ipi6_ifindex = mcast_oif ? :
np->sticky_pktinfo.ipi6_ifindex;
- src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr;
+ src_info.ipi6_addr = mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr;
put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxhlim) {
- int hlim = np->mcast_hops;
+ int hlim = READ_ONCE(np->mcast_hops);
+
put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
}
if (np->rxopt.bits.rxtclass) {
@@ -1189,15 +1172,18 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
}
if (np->rxopt.bits.rxoinfo) {
+ int mcast_oif = READ_ONCE(np->mcast_oif);
struct in6_pktinfo src_info;
- src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
+
+ src_info.ipi6_ifindex = mcast_oif ? :
np->sticky_pktinfo.ipi6_ifindex;
- src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr :
- np->sticky_pktinfo.ipi6_addr;
+ src_info.ipi6_addr = mcast_oif ? sk->sk_v6_daddr :
+ np->sticky_pktinfo.ipi6_addr;
put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxohlim) {
- int hlim = np->mcast_hops;
+ int hlim = READ_ONCE(np->mcast_hops);
+
put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
}
if (np->rxopt.bits.rxflow) {
@@ -1347,9 +1333,9 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
struct dst_entry *dst;
if (optname == IPV6_UNICAST_HOPS)
- val = np->hop_limit;
+ val = READ_ONCE(np->hop_limit);
else
- val = np->mcast_hops;
+ val = READ_ONCE(np->mcast_hops);
if (val < 0) {
rcu_read_lock();
@@ -1365,31 +1351,31 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
case IPV6_MULTICAST_LOOP:
- val = np->mc_loop;
+ val = inet6_test_bit(MC6_LOOP, sk);
break;
case IPV6_MULTICAST_IF:
- val = np->mcast_oif;
+ val = READ_ONCE(np->mcast_oif);
break;
case IPV6_MULTICAST_ALL:
- val = np->mc_all;
+ val = inet6_test_bit(MC6_ALL, sk);
break;
case IPV6_UNICAST_IF:
- val = (__force int)htonl((__u32) np->ucast_oif);
+ val = (__force int)htonl((__u32) READ_ONCE(np->ucast_oif));
break;
case IPV6_MTU_DISCOVER:
- val = np->pmtudisc;
+ val = READ_ONCE(np->pmtudisc);
break;
case IPV6_RECVERR:
- val = np->recverr;
+ val = inet6_test_bit(RECVERR6, sk);
break;
case IPV6_FLOWINFO_SEND:
- val = np->sndflow;
+ val = inet6_test_bit(SNDFLOW, sk);
break;
case IPV6_FLOWLABEL_MGR:
@@ -1424,33 +1410,35 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
case IPV6_ADDR_PREFERENCES:
+ {
+ u8 srcprefs = READ_ONCE(np->srcprefs);
val = 0;
- if (np->srcprefs & IPV6_PREFER_SRC_TMP)
+ if (srcprefs & IPV6_PREFER_SRC_TMP)
val |= IPV6_PREFER_SRC_TMP;
- else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC)
+ else if (srcprefs & IPV6_PREFER_SRC_PUBLIC)
val |= IPV6_PREFER_SRC_PUBLIC;
else {
/* XXX: should we return system default? */
val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT;
}
- if (np->srcprefs & IPV6_PREFER_SRC_COA)
+ if (srcprefs & IPV6_PREFER_SRC_COA)
val |= IPV6_PREFER_SRC_COA;
else
val |= IPV6_PREFER_SRC_HOME;
break;
-
+ }
case IPV6_MINHOPCOUNT:
- val = np->min_hopcount;
+ val = READ_ONCE(np->min_hopcount);
break;
case IPV6_DONTFRAG:
- val = np->dontfrag;
+ val = inet6_test_bit(DONTFRAG, sk);
break;
case IPV6_AUTOFLOWLABEL:
- val = ip6_autoflowlabel(sock_net(sk), np);
+ val = ip6_autoflowlabel(sock_net(sk), sk);
break;
case IPV6_RECVFRAGSIZE:
@@ -1458,11 +1446,11 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_ROUTER_ALERT_ISOLATE:
- val = np->rtalert_isolate;
+ val = inet6_test_bit(RTALERT_ISOLATE, sk);
break;
case IPV6_RECVERR_RFC4884:
- val = np->recverr_rfc4884;
+ val = inet6_test_bit(RECVERR6_RFC4884, sk);
break;
default:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ce25bcb9974..bc6e0a0bad3c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -642,7 +642,7 @@ bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr,
}
if (!mc) {
rcu_read_unlock();
- return np->mc_all;
+ return inet6_test_bit(MC6_ALL, sk);
}
psl = rcu_dereference(mc->sflist);
if (!psl) {
@@ -1716,7 +1716,7 @@ static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb,
hdr->payload_len = htons(len);
hdr->nexthdr = proto;
- hdr->hop_limit = inet6_sk(sk)->hop_limit;
+ hdr->hop_limit = READ_ONCE(inet6_sk(sk)->hop_limit);
hdr->saddr = *saddr;
hdr->daddr = *daddr;
@@ -1789,7 +1789,7 @@ static void mld_sendpack(struct sk_buff *skb)
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) -
sizeof(*pip6);
@@ -2147,8 +2147,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
full_len = sizeof(struct ipv6hdr) + payload_len;
rcu_read_lock();
- IP6_UPD_PO_STATS(net, __in6_dev_get(dev),
- IPSTATS_MIB_OUT, full_len);
+ IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS);
rcu_read_unlock();
skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err);
@@ -2723,8 +2722,12 @@ void ipv6_mc_down(struct inet6_dev *idev)
synchronize_net();
mld_query_stop_work(idev);
mld_report_stop_work(idev);
+
+ mutex_lock(&idev->mc_lock);
mld_ifc_stop_work(idev);
mld_gq_stop_work(idev);
+ mutex_unlock(&idev->mc_lock);
+
mld_dad_stop_work(idev);
}
@@ -3011,8 +3014,6 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
continue;
state->im = rcu_dereference(state->idev->mc_list);
}
- if (!state->im)
- break;
psf = rcu_dereference(state->im->mca_sources);
}
out:
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 553c8664e0a7..a19999b30bc0 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -500,11 +500,11 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
csum_partial(icmp6h,
skb->len, 0));
- ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len);
+ ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
rcu_read_lock();
idev = __in6_dev_get(dst->dev);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, dst->dev,
@@ -1996,7 +1996,7 @@ static int __net_init ndisc_net_init(struct net *net)
np = inet6_sk(sk);
np->hop_limit = 255;
/* Do not loopback ndisc messages */
- np->mc_loop = 0;
+ inet6_clear_bit(MC6_LOOP, sk);
return 0;
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 857713d7a38a..53d255838e6a 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -61,7 +61,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
#ifdef CONFIG_XFRM
if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
- xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
+ xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
if (IS_ERR(dst))
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index a88b2ce4a3cb..8dd4cd0c47bd 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -31,10 +31,10 @@ static const struct xt_table packet_mangler = {
static unsigned int
ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
- unsigned int ret;
struct in6_addr saddr, daddr;
- u_int8_t hop_limit;
- u_int32_t flowlabel, mark;
+ unsigned int ret, verdict;
+ u32 flowlabel, mark;
+ u8 hop_limit;
int err;
/* save source/dest address, mark, hoplimit, flowlabel, priority, */
@@ -47,8 +47,9 @@ ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *sta
flowlabel = *((u_int32_t *)ipv6_hdr(skb));
ret = ip6t_do_table(priv, skb, state);
+ verdict = ret & NF_VERDICT_MASK;
- if (ret != NF_DROP && ret != NF_STOLEN &&
+ if (verdict != NF_DROP && verdict != NF_STOLEN &&
(!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) ||
skb->mark != mark ||
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index bf3cb3a13600..52cf104e3478 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -170,3 +170,4 @@ module_init(ip6table_nat_init);
module_exit(ip6table_nat_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Ip6tables legacy nat table");
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 08861d5d1f4d..fc9f6754028f 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -106,3 +106,4 @@ static void __exit ip6table_raw_fini(void)
module_init(ip6table_raw_init);
module_exit(ip6table_raw_fini);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Ip6tables legacy raw table");
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index d59b296b4f51..be7817fbc024 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -182,3 +182,4 @@ module_init(nf_defrag_init);
module_exit(nf_defrag_fini);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6 defragmentation support");
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 58ccdb08c0fd..196dd4ecb5e2 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -278,7 +278,6 @@ static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in)
void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
int hook)
{
- struct net_device *br_indev __maybe_unused;
struct sk_buff *nskb;
struct tcphdr _otcph;
const struct tcphdr *otcph;
@@ -354,9 +353,15 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
- br_indev = nf_bridge_get_physindev(oldskb);
- if (br_indev) {
+ if (nf_bridge_info_exists(oldskb)) {
struct ethhdr *oeth = eth_hdr(oldskb);
+ struct net_device *br_indev;
+
+ br_indev = nf_bridge_get_physindev(oldskb, net);
+ if (!br_indev) {
+ kfree_skb(nskb);
+ return;
+ }
nskb->dev = br_indev;
nskb->protocol = htons(ETH_P_IPV6);
@@ -413,3 +418,4 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in,
EXPORT_SYMBOL_GPL(nf_send_unreach6);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6 packet rejection core");
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 1b2772834972..ef2059c88955 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -56,7 +56,7 @@ static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
- return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
}
static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
@@ -89,7 +89,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EAFNOSUPPORT;
}
daddr = &(u->sin6_addr);
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
fl6.flowlabel = u->sin6_flowinfo & IPV6_FLOWINFO_MASK;
if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr)))
oif = u->sin6_scope_id;
@@ -107,9 +107,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
oif = np->sticky_pktinfo.ipi6_ifindex;
if (!oif && ipv6_addr_is_multicast(daddr))
- oif = np->mcast_oif;
+ oif = READ_ONCE(np->mcast_oif);
else if (!oif)
- oif = np->ucast_oif;
+ oif = READ_ONCE(np->ucast_oif);
addr_type = ipv6_addr_type(daddr);
if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
@@ -118,8 +118,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if))
return -EINVAL;
- ipcm6_init_sk(&ipc6, np);
- ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipcm6_init_sk(&ipc6, sk);
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_oif = oif;
@@ -157,9 +157,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rt = (struct rt6_info *) dst;
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
pfh.icmph.type = user_icmph.icmp6_type;
pfh.icmph.code = user_icmph.icmp6_code;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index e20b3705c2d2..6d1d9221649d 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -61,7 +61,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
SNMP_MIB_ITEM("Ip6InDiscards", IPSTATS_MIB_INDISCARDS),
SNMP_MIB_ITEM("Ip6InDelivers", IPSTATS_MIB_INDELIVERS),
SNMP_MIB_ITEM("Ip6OutForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS),
- SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTPKTS),
+ SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTREQUESTS),
SNMP_MIB_ITEM("Ip6OutDiscards", IPSTATS_MIB_OUTDISCARDS),
SNMP_MIB_ITEM("Ip6OutNoRoutes", IPSTATS_MIB_OUTNOROUTES),
SNMP_MIB_ITEM("Ip6ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT),
@@ -84,6 +84,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
SNMP_MIB_ITEM("Ip6InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS),
+ SNMP_MIB_ITEM("Ip6OutTransmits", IPSTATS_MIB_OUTPKTS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0eae7661a85c..03dbb874c363 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -291,6 +291,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
+ bool recverr = inet6_test_bit(RECVERR6, sk);
struct ipv6_pinfo *np = inet6_sk(sk);
int err;
int harderr;
@@ -300,26 +301,26 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
2. Socket is connected (otherwise the error indication
is useless without recverr and error is hard.
*/
- if (!np->recverr && sk->sk_state != TCP_ESTABLISHED)
+ if (!recverr && sk->sk_state != TCP_ESTABLISHED)
return;
harderr = icmpv6_err_convert(type, code, &err);
if (type == ICMPV6_PKT_TOOBIG) {
ip6_sk_update_pmtu(skb, sk, info);
- harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
+ harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO);
}
if (type == NDISC_REDIRECT) {
ip6_sk_redirect(skb, sk);
return;
}
- if (np->recverr) {
+ if (recverr) {
u8 *payload = skb->data;
if (!inet_test_bit(HDRINCL, sk))
payload += offset;
ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
}
- if (np->recverr || harderr) {
+ if (recverr || harderr) {
sk->sk_err = err;
sk_error_report(sk);
}
@@ -587,7 +588,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
struct flowi6 *fl6, struct dst_entry **dstp,
unsigned int flags, const struct sockcm_cookie *sockc)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
struct ipv6hdr *iph;
struct sk_buff *skb;
@@ -651,7 +651,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
* have been queued for deletion.
*/
rcu_read_lock();
- IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
+ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, rt->dst.dev, dst_output);
if (err > 0)
@@ -668,7 +668,7 @@ out:
error:
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
error_check:
- if (err == -ENOBUFS && !np->recverr)
+ if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk))
err = 0;
return err;
}
@@ -772,7 +772,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_uid = sk->sk_uid;
ipcm6_init(&ipc6);
- ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = fl6.flowi6_mark;
if (sin6) {
@@ -795,7 +795,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EINVAL;
daddr = &sin6->sin6_addr;
- if (np->sndflow) {
+ if (inet6_test_bit(SNDFLOW, sk)) {
fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
@@ -876,9 +876,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
final_p = fl6_update_dst(&fl6, opt, &final);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
if (hdrincl)
@@ -898,7 +898,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (ipc6.dontfrag < 0)
- ipc6.dontfrag = np->dontfrag;
+ ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d15a9e3aa24a..ea1dec8448fc 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -341,7 +341,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
int flags)
{
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
- 1, DST_OBSOLETE_FORCE_CHK, flags);
+ DST_OBSOLETE_FORCE_CHK, flags);
if (rt) {
rt6_info_init(rt);
@@ -423,6 +423,9 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
if (match->nh && have_oif_match && res->nh)
return;
+ if (skb)
+ IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
+
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
*/
@@ -2619,7 +2622,7 @@ static struct dst_entry *ip6_route_output_flags_noref(struct net *net,
if (!any_src)
flags |= RT6_LOOKUP_F_HAS_SADDR;
else if (sk)
- flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
+ flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs));
return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
}
@@ -2652,7 +2655,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
struct net_device *loopback_dev = net->loopback_dev;
struct dst_entry *new = NULL;
- rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
+ rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev,
DST_OBSOLETE_DEAD, 0);
if (rt) {
rt6_info_init(rt);
@@ -3760,10 +3763,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->dst_nocount = true;
if (cfg->fc_flags & RTF_EXPIRES)
- fib6_set_expires_locked(rt, jiffies +
- clock_t_to_jiffies(cfg->fc_expires));
+ fib6_set_expires(rt, jiffies +
+ clock_t_to_jiffies(cfg->fc_expires));
else
- fib6_clean_expires_locked(rt);
+ fib6_clean_expires(rt);
if (cfg->fc_protocol == RTPROT_UNSPEC)
cfg->fc_protocol = RTPROT_BOOT;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 5014aa663452..c8d2ca27220c 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -114,80 +114,88 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp)
return __cookie_v6_init_sequence(iph, th, mssp);
}
-int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
- __u32 cookie)
+int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th)
{
+ __u32 cookie = ntohl(th->ack_seq) - 1;
__u32 seq = ntohl(th->seq) - 1;
- __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
- th->source, th->dest, seq);
+ __u32 mssind;
+
+ mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
+ th->source, th->dest, seq);
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
EXPORT_SYMBOL_GPL(__cookie_v6_check);
-struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct tcp_options_received tcp_opt;
- struct inet_request_sock *ireq;
- struct tcp_request_sock *treq;
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- const struct tcphdr *th = tcp_hdr(skb);
- __u32 cookie = ntohl(th->ack_seq) - 1;
- struct sock *ret = sk;
- struct request_sock *req;
- int full_space, mss;
- struct dst_entry *dst;
- __u8 rcv_wscale;
u32 tsoff = 0;
-
- if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
- !th->ack || th->rst)
- goto out;
+ int mss;
if (tcp_synq_no_recent_overflow(sk))
goto out;
- mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie);
- if (mss == 0) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
+ mss = __cookie_v6_check(ipv6_hdr(skb), tcp_hdr(skb));
+ if (!mss) {
+ __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
+ __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV);
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+ tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
- tsoff = secure_tcpv6_ts_off(sock_net(sk),
+ tsoff = secure_tcpv6_ts_off(net,
ipv6_hdr(skb)->daddr.s6_addr32,
ipv6_hdr(skb)->saddr.s6_addr32);
tcp_opt.rcv_tsecr -= tsoff;
}
- if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
+ if (!cookie_timestamp_decode(net, &tcp_opt))
goto out;
- ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops,
- &tcp_request_sock_ipv6_ops, sk, skb);
- if (!req)
+ return cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb,
+ &tcp_opt, mss, tsoff);
+out:
+ return ERR_PTR(-EINVAL);
+}
+
+struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_request_sock *ireq;
+ struct net *net = sock_net(sk);
+ struct request_sock *req;
+ struct dst_entry *dst;
+ struct sock *ret = sk;
+ __u8 rcv_wscale;
+ int full_space;
+
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
+ req = cookie_tcp_check(net, sk, skb);
+ if (IS_ERR(req))
+ goto out;
+ if (!req)
+ goto out_drop;
+
ireq = inet_rsk(req);
- treq = tcp_rsk(req);
- treq->tfo_listener = false;
+
+ ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
if (security_inet_conn_request(sk, skb, req))
goto out_free;
- req->mss = mss;
- ireq->ir_rmt_port = th->source;
- ireq->ir_num = ntohs(th->dest);
- ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
- ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -195,27 +203,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->pktopts = skb;
}
- ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb);
- ireq->ir_mark = inet_request_mark(sk, skb);
-
- req->num_retrans = 0;
- ireq->snd_wscale = tcp_opt.snd_wscale;
- ireq->sack_ok = tcp_opt.sack_ok;
- ireq->wscale_ok = tcp_opt.wscale_ok;
- ireq->tstamp_ok = tcp_opt.saw_tstamp;
- req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
- treq->snt_synack = 0;
- treq->rcv_isn = ntohl(th->seq) - 1;
- treq->snt_isn = cookie;
- treq->ts_off = 0;
- treq->txhash = net_tx_rndhash();
- if (IS_ENABLED(CONFIG_SMC))
- ireq->smc_ok = 0;
+ tcp_ao_syncookie(sk, skb, req, AF_INET6);
/*
* We need to lookup the dst_entry to get the correct window size.
@@ -237,7 +230,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
fl6.flowi6_uid = sk->sk_uid;
security_req_classify_flow(req, flowi6_to_flowi_common(&fl6));
- dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
if (IS_ERR(dst))
goto out_free;
}
@@ -255,12 +248,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
dst_metric(dst, RTAX_INITRWND));
ireq->rcv_wscale = rcv_wscale;
- ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
+ ireq->ecn_ok &= cookie_ecn_ok(net, dst);
- ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff);
+ ret = tcp_get_cookie_sock(sk, skb, req, dst);
out:
return ret;
out_free:
reqsk_free(req);
+out_drop:
return NULL;
}
diff --git a/net/ipv6/tcp_ao.c b/net/ipv6/tcp_ao.c
new file mode 100644
index 000000000000..3c09ac26206e
--- /dev/null
+++ b/net/ipv6/tcp_ao.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * INET An implementation of the TCP Authentication Option (TCP-AO).
+ * See RFC5925.
+ *
+ * Authors: Dmitry Safonov <dima@arista.com>
+ * Francesco Ruggeri <fruggeri@arista.com>
+ * Salam Noureddine <noureddine@arista.com>
+ */
+#include <crypto/hash.h>
+#include <linux/tcp.h>
+
+#include <net/tcp.h>
+#include <net/ipv6.h>
+
+static int tcp_v6_ao_calc_key(struct tcp_ao_key *mkt, u8 *key,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __be16 sport, __be16 dport,
+ __be32 sisn, __be32 disn)
+{
+ struct kdf_input_block {
+ u8 counter;
+ u8 label[6];
+ struct tcp6_ao_context ctx;
+ __be16 outlen;
+ } __packed * tmp;
+ struct tcp_sigpool hp;
+ int err;
+
+ err = tcp_sigpool_start(mkt->tcp_sigpool_id, &hp);
+ if (err)
+ return err;
+
+ tmp = hp.scratch;
+ tmp->counter = 1;
+ memcpy(tmp->label, "TCP-AO", 6);
+ tmp->ctx.saddr = *saddr;
+ tmp->ctx.daddr = *daddr;
+ tmp->ctx.sport = sport;
+ tmp->ctx.dport = dport;
+ tmp->ctx.sisn = sisn;
+ tmp->ctx.disn = disn;
+ tmp->outlen = htons(tcp_ao_digest_size(mkt) * 8); /* in bits */
+
+ err = tcp_ao_calc_traffic_key(mkt, key, tmp, sizeof(*tmp), &hp);
+ tcp_sigpool_end(&hp);
+
+ return err;
+}
+
+int tcp_v6_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key,
+ const struct sk_buff *skb,
+ __be32 sisn, __be32 disn)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+
+ return tcp_v6_ao_calc_key(mkt, key, &iph->saddr,
+ &iph->daddr, th->source,
+ th->dest, sisn, disn);
+}
+
+int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key,
+ const struct sock *sk, __be32 sisn,
+ __be32 disn, bool send)
+{
+ if (send)
+ return tcp_v6_ao_calc_key(mkt, key, &sk->sk_v6_rcv_saddr,
+ &sk->sk_v6_daddr, htons(sk->sk_num),
+ sk->sk_dport, sisn, disn);
+ else
+ return tcp_v6_ao_calc_key(mkt, key, &sk->sk_v6_daddr,
+ &sk->sk_v6_rcv_saddr, sk->sk_dport,
+ htons(sk->sk_num), disn, sisn);
+}
+
+int tcp_v6_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key,
+ struct request_sock *req)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ return tcp_v6_ao_calc_key(mkt, key,
+ &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr,
+ htons(ireq->ir_num), ireq->ir_rmt_port,
+ htonl(tcp_rsk(req)->snt_isn),
+ htonl(tcp_rsk(req)->rcv_isn));
+}
+
+struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk,
+ struct sock *addr_sk,
+ int sndid, int rcvid)
+{
+ int l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
+ addr_sk->sk_bound_dev_if);
+ struct in6_addr *addr = &addr_sk->sk_v6_daddr;
+
+ return tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr,
+ AF_INET6, sndid, rcvid);
+}
+
+struct tcp_ao_key *tcp_v6_ao_lookup_rsk(const struct sock *sk,
+ struct request_sock *req,
+ int sndid, int rcvid)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+ struct in6_addr *addr = &ireq->ir_v6_rmt_addr;
+ int l3index;
+
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
+ return tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr,
+ AF_INET6, sndid, rcvid);
+}
+
+int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr, int nbytes)
+{
+ struct tcp6_pseudohdr *bp;
+ struct scatterlist sg;
+
+ bp = hp->scratch;
+ /* 1. TCP pseudo-header (RFC2460) */
+ bp->saddr = *saddr;
+ bp->daddr = *daddr;
+ bp->len = cpu_to_be32(nbytes);
+ bp->protocol = cpu_to_be32(IPPROTO_TCP);
+
+ sg_init_one(&sg, bp, sizeof(*bp));
+ ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp));
+ return crypto_ahash_update(hp->req);
+}
+
+int tcp_v6_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key,
+ const struct sock *sk, const struct sk_buff *skb,
+ const u8 *tkey, int hash_offset, u32 sne)
+{
+ return tcp_ao_hash_skb(AF_INET6, ao_hash, key, sk, skb, tkey,
+ hash_offset, sne);
+}
+
+int tcp_v6_parse_ao(struct sock *sk, int cmd,
+ sockptr_t optval, int optlen)
+{
+ return tcp_parse_ao(sk, cmd, AF_INET6, optval, optlen);
+}
+
+int tcp_v6_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key,
+ struct request_sock *req, const struct sk_buff *skb,
+ int hash_offset, u32 sne)
+{
+ void *hash_buf = NULL;
+ int err;
+
+ hash_buf = kmalloc(tcp_ao_digest_size(ao_key), GFP_ATOMIC);
+ if (!hash_buf)
+ return -ENOMEM;
+
+ err = tcp_v6_ao_calc_key_rsk(ao_key, hash_buf, req);
+ if (err)
+ goto out;
+
+ err = tcp_ao_hash_skb(AF_INET6, ao_hash, ao_key, req_to_sk(req), skb,
+ hash_buf, hash_offset, sne);
+out:
+ kfree(hash_buf);
+ return err;
+}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3a88545a265d..57b25b1fc9d9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -76,16 +76,9 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
static const struct inet_connection_sock_af_ops ipv6_mapped;
const struct inet_connection_sock_af_ops ipv6_specific;
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
-#else
-static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
- const struct in6_addr *addr,
- int l3index)
-{
- return NULL;
-}
#endif
/* Helper returning the inet6 address from a given tcp socket.
@@ -135,7 +128,7 @@ static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
sock_owned_by_me(sk);
- return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
}
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -163,7 +156,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
memset(&fl6, 0, sizeof(fl6));
- if (np->sndflow) {
+ if (inet6_test_bit(SNDFLOW, sk)) {
fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
IP6_ECN_flow_init(fl6.flowlabel);
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
@@ -239,7 +232,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (sk_is_mptcp(sk))
mptcpv6_handle_mapped(sk, true);
sk->sk_backlog_rcv = tcp_v4_do_rcv;
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
tp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif
@@ -252,7 +245,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (sk_is_mptcp(sk))
mptcpv6_handle_mapped(sk, false);
sk->sk_backlog_rcv = tcp_v6_do_rcv;
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
tp->af_specific = &tcp_sock_ipv6_specific;
#endif
goto failure;
@@ -286,6 +279,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
goto failure;
}
+ tp->tcp_usec_ts = dst_tcp_usec_ts(dst);
tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (!saddr) {
@@ -402,6 +396,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
if (sk->sk_state == TCP_TIME_WAIT) {
+ /* To increase the counter of ignored icmps for TCP-AO */
+ tcp_ao_ignore_icmp(sk, AF_INET6, type, code);
inet_twsk_put(inet_twsk(sk));
return 0;
}
@@ -412,6 +408,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
+ if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) {
+ sock_put(sk);
+ return 0;
+ }
+
bh_lock_sock(sk);
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
@@ -508,7 +509,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
tcp_ld_RTO_revert(sk, seq);
}
- if (!sock_owned_by_user(sk) && np->recverr) {
+ if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk);
} else {
@@ -548,7 +549,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
&ireq->ir_v6_rmt_addr);
fl6->daddr = ireq->ir_v6_rmt_addr;
- if (np->repflow && ireq->pktopts)
+ if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
@@ -565,7 +566,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
if (!opt)
opt = rcu_dereference(np->opt);
err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
- opt, tclass, sk->sk_priority);
+ opt, tclass, READ_ONCE(sk->sk_priority));
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -606,8 +607,10 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
{
struct tcp_md5sig cmd;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
+ union tcp_ao_addr *addr;
int l3index = 0;
u8 prefixlen;
+ bool l3flag;
u8 flags;
if (optlen < sizeof(cmd))
@@ -620,6 +623,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
return -EINVAL;
flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
+ l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
if (optname == TCP_MD5SIG_EXT &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
@@ -660,17 +664,33 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
- if (ipv6_addr_v4mapped(&sin6->sin6_addr))
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+ if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
+ addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3];
+
+ /* Don't allow keys for peers that have a matching TCP-AO key.
+ * See the comment in tcp_ao_add_cmd()
+ */
+ if (tcp_ao_required(sk, addr, AF_INET,
+ l3flag ? l3index : -1, false))
+ return -EKEYREJECTED;
+ return tcp_md5_do_add(sk, addr,
AF_INET, prefixlen, l3index, flags,
cmd.tcpm_key, cmd.tcpm_keylen);
+ }
+
+ addr = (union tcp_md5_addr *)&sin6->sin6_addr;
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen, l3index, flags,
+ /* Don't allow keys for peers that have a matching TCP-AO key.
+ * See the comment in tcp_ao_add_cmd()
+ */
+ if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false))
+ return -EKEYREJECTED;
+
+ return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags,
cmd.tcpm_key, cmd.tcpm_keylen);
}
-static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
+static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp,
const struct in6_addr *daddr,
const struct in6_addr *saddr,
const struct tcphdr *th, int nbytes)
@@ -691,39 +711,36 @@ static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
_th->check = 0;
sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
- ahash_request_set_crypt(hp->md5_req, &sg, NULL,
+ ahash_request_set_crypt(hp->req, &sg, NULL,
sizeof(*bp) + sizeof(*th));
- return crypto_ahash_update(hp->md5_req);
+ return crypto_ahash_update(hp->req);
}
static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
const struct in6_addr *daddr, struct in6_addr *saddr,
const struct tcphdr *th)
{
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
+ struct tcp_sigpool hp;
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
+ if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
+ goto clear_hash_nostart;
- if (crypto_ahash_init(req))
+ if (crypto_ahash_init(hp.req))
goto clear_hash;
- if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
+ if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2))
goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
+ if (tcp_md5_hash_key(&hp, key))
goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
+ ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(hp.req))
goto clear_hash;
- tcp_put_md5sig_pool();
+ tcp_sigpool_end(&hp);
return 0;
clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
+ tcp_sigpool_end(&hp);
+clear_hash_nostart:
memset(md5_hash, 0, 16);
return 1;
}
@@ -733,10 +750,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
const struct sock *sk,
const struct sk_buff *skb)
{
- const struct in6_addr *saddr, *daddr;
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
const struct tcphdr *th = tcp_hdr(skb);
+ const struct in6_addr *saddr, *daddr;
+ struct tcp_sigpool hp;
if (sk) { /* valid for establish/request sockets */
saddr = &sk->sk_v6_rcv_saddr;
@@ -747,34 +763,31 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
daddr = &ip6h->daddr;
}
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
+ if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
+ goto clear_hash_nostart;
- if (crypto_ahash_init(req))
+ if (crypto_ahash_init(hp.req))
goto clear_hash;
- if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
+ if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len))
goto clear_hash;
- if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2))
goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
+ if (tcp_md5_hash_key(&hp, key))
goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
+ ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(hp.req))
goto clear_hash;
- tcp_put_md5sig_pool();
+ tcp_sigpool_end(&hp);
return 0;
clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
+ tcp_sigpool_end(&hp);
+clear_hash_nostart:
memset(md5_hash, 0, 16);
return 1;
}
-
#endif
static void tcp_v6_init_req(struct request_sock *req,
@@ -797,7 +810,7 @@ static void tcp_v6_init_req(struct request_sock *req,
(ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo ||
np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
- np->rxopt.bits.rxohlim || np->repflow)) {
+ np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
refcount_inc(&skb->users);
ireq->pktopts = skb;
}
@@ -833,6 +846,11 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.req_md5_lookup = tcp_v6_md5_lookup,
.calc_md5_hash = tcp_v6_md5_hash_skb,
#endif
+#ifdef CONFIG_TCP_AO
+ .ao_lookup = tcp_v6_ao_lookup_rsk,
+ .ao_calc_key = tcp_v6_ao_calc_key_rsk,
+ .ao_synack_hash = tcp_v6_ao_synack_hash,
+#endif
#ifdef CONFIG_SYN_COOKIES
.cookie_init_seq = cookie_v6_init_sequence,
#endif
@@ -844,8 +862,8 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr,
- int oif, struct tcp_md5sig_key *key, int rst,
- u8 tclass, __be32 label, u32 priority, u32 txhash)
+ int oif, int rst, u8 tclass, __be32 label,
+ u32 priority, u32 txhash, struct tcp_key *key)
{
const struct tcphdr *th = tcp_hdr(skb);
struct tcphdr *t1;
@@ -860,13 +878,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
-#ifdef CONFIG_TCP_MD5SIG
- if (key)
+ if (tcp_key_is_md5(key))
tot_len += TCPOLEN_MD5SIG_ALIGNED;
-#endif
+ if (tcp_key_is_ao(key))
+ tot_len += tcp_ao_len_aligned(key->ao_key);
#ifdef CONFIG_MPTCP
- if (rst && !key) {
+ if (rst && !tcp_key_is_md5(key)) {
mrst = mptcp_reset_option(skb);
if (mrst)
@@ -907,14 +925,28 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
*topt++ = mrst;
#ifdef CONFIG_TCP_MD5SIG
- if (key) {
+ if (tcp_key_is_md5(key)) {
*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
- tcp_v6_md5_hash_hdr((__u8 *)topt, key,
+ tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key,
&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, t1);
}
#endif
+#ifdef CONFIG_TCP_AO
+ if (tcp_key_is_ao(key)) {
+ *topt++ = htonl((TCPOPT_AO << 24) |
+ (tcp_ao_len(key->ao_key) << 16) |
+ (key->ao_key->sndid << 8) |
+ (key->rcv_next));
+
+ tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key,
+ key->traffic_key,
+ (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr,
+ (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr,
+ t1, key->sne);
+ }
+#endif
memset(&fl6, 0, sizeof(fl6));
fl6.daddr = ipv6_hdr(skb)->saddr;
@@ -977,19 +1009,23 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
{
const struct tcphdr *th = tcp_hdr(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- u32 seq = 0, ack_seq = 0;
- struct tcp_md5sig_key *key = NULL;
-#ifdef CONFIG_TCP_MD5SIG
- const __u8 *hash_location = NULL;
- unsigned char newhash[16];
- int genhash;
- struct sock *sk1 = NULL;
+ const __u8 *md5_hash_location = NULL;
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
+ bool allocated_traffic_key = false;
#endif
+ const struct tcp_ao_hdr *aoh;
+ struct tcp_key key = {};
+ u32 seq = 0, ack_seq = 0;
__be32 label = 0;
u32 priority = 0;
struct net *net;
u32 txhash = 0;
int oif = 0;
+#ifdef CONFIG_TCP_MD5SIG
+ unsigned char newhash[16];
+ int genhash;
+ struct sock *sk1 = NULL;
+#endif
if (th->rst)
return;
@@ -1001,9 +1037,13 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
return;
net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
-#ifdef CONFIG_TCP_MD5SIG
+ /* Invalid TCP option size or twice included auth */
+ if (tcp_parse_auth_options(th, &md5_hash_location, &aoh))
+ return;
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
rcu_read_lock();
- hash_location = tcp_parse_md5sig_option(th);
+#endif
+#ifdef CONFIG_TCP_MD5SIG
if (sk && sk_fullsock(sk)) {
int l3index;
@@ -1011,8 +1051,10 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
* in an L3 domain and inet_iif is set to it.
*/
l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
- key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
- } else if (hash_location) {
+ key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
+ if (key.md5_key)
+ key.type = TCP_KEY_MD5;
+ } else if (md5_hash_location) {
int dif = tcp_v6_iif_l3_slave(skb);
int sdif = tcp_v6_sdif(skb);
int l3index;
@@ -1036,12 +1078,13 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
*/
l3index = tcp_v6_sdif(skb) ? dif : 0;
- key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
- if (!key)
+ key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
+ if (!key.md5_key)
goto out;
+ key.type = TCP_KEY_MD5;
- genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
- if (genhash || memcmp(hash_location, newhash, 16) != 0)
+ genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb);
+ if (genhash || memcmp(md5_hash_location, newhash, 16) != 0)
goto out;
}
#endif
@@ -1052,15 +1095,27 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
(th->doff << 2);
+#ifdef CONFIG_TCP_AO
+ if (aoh) {
+ int l3index;
+
+ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
+ if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq,
+ &key.ao_key, &key.traffic_key,
+ &allocated_traffic_key,
+ &key.rcv_next, &key.sne))
+ goto out;
+ key.type = TCP_KEY_AO;
+ }
+#endif
+
if (sk) {
oif = sk->sk_bound_dev_if;
if (sk_fullsock(sk)) {
- const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
-
trace_tcp_send_reset(sk, skb);
- if (np->repflow)
+ if (inet6_test_bit(REPFLOW, sk))
label = ip6_flowlabel(ipv6h);
- priority = sk->sk_priority;
+ priority = READ_ONCE(sk->sk_priority);
txhash = sk->sk_txhash;
}
if (sk->sk_state == TCP_TIME_WAIT) {
@@ -1073,45 +1128,141 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
label = ip6_flowlabel(ipv6h);
}
- tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
- ipv6_get_dsfield(ipv6h), label, priority, txhash);
+ tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
+ ipv6_get_dsfield(ipv6h), label, priority, txhash,
+ &key);
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
out:
+ if (allocated_traffic_key)
+ kfree(key.traffic_key);
rcu_read_unlock();
#endif
}
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
- struct tcp_md5sig_key *key, u8 tclass,
+ struct tcp_key *key, u8 tclass,
__be32 label, u32 priority, u32 txhash)
{
- tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
- tclass, label, priority, txhash);
+ tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0,
+ tclass, label, priority, txhash, key);
}
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
{
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+ struct tcp_key key = {};
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_info *ao_info;
+
+ if (static_branch_unlikely(&tcp_ao_needed.key)) {
+
+ /* FIXME: the segment to-be-acked is not verified yet */
+ ao_info = rcu_dereference(tcptw->ao_info);
+ if (ao_info) {
+ const struct tcp_ao_hdr *aoh;
+
+ /* Invalid TCP option size or twice included auth */
+ if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
+ goto out;
+ if (aoh)
+ key.ao_key = tcp_ao_established_key(ao_info,
+ aoh->rnext_keyid, -1);
+ }
+ }
+ if (key.ao_key) {
+ struct tcp_ao_key *rnext_key;
+
+ key.traffic_key = snd_other_key(key.ao_key);
+ /* rcv_next switches to our rcv_next */
+ rnext_key = READ_ONCE(ao_info->rnext_key);
+ key.rcv_next = rnext_key->rcvid;
+ key.sne = READ_ONCE(ao_info->snd_sne);
+ key.type = TCP_KEY_AO;
+#else
+ if (0) {
+#endif
+#ifdef CONFIG_TCP_MD5SIG
+ } else if (static_branch_unlikely(&tcp_md5_needed.key)) {
+ key.md5_key = tcp_twsk_md5_key(tcptw);
+ if (key.md5_key)
+ key.type = TCP_KEY_MD5;
+#endif
+ }
tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
- tcp_time_stamp_raw() + tcptw->tw_ts_offset,
- tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
+ tcp_tw_tsval(tcptw),
+ tcptw->tw_ts_recent, tw->tw_bound_dev_if, &key,
tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
tw->tw_txhash);
+#ifdef CONFIG_TCP_AO
+out:
+#endif
inet_twsk_put(tw);
}
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
- int l3index;
+ struct tcp_key key = {};
- l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
+#ifdef CONFIG_TCP_AO
+ if (static_branch_unlikely(&tcp_ao_needed.key) &&
+ tcp_rsk_used_ao(req)) {
+ const struct in6_addr *addr = &ipv6_hdr(skb)->saddr;
+ const struct tcp_ao_hdr *aoh;
+ int l3index;
+
+ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
+ /* Invalid TCP option size or twice included auth */
+ if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
+ return;
+ if (!aoh)
+ return;
+ key.ao_key = tcp_ao_do_lookup(sk, l3index,
+ (union tcp_ao_addr *)addr,
+ AF_INET6, aoh->rnext_keyid, -1);
+ if (unlikely(!key.ao_key)) {
+ /* Send ACK with any matching MKT for the peer */
+ key.ao_key = tcp_ao_do_lookup(sk, l3index,
+ (union tcp_ao_addr *)addr,
+ AF_INET6, -1, -1);
+ /* Matching key disappeared (user removed the key?)
+ * let the handshake timeout.
+ */
+ if (!key.ao_key) {
+ net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n",
+ addr,
+ ntohs(tcp_hdr(skb)->source),
+ &ipv6_hdr(skb)->daddr,
+ ntohs(tcp_hdr(skb)->dest));
+ return;
+ }
+ }
+ key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC);
+ if (!key.traffic_key)
+ return;
+
+ key.type = TCP_KEY_AO;
+ key.rcv_next = aoh->keyid;
+ tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req);
+#else
+ if (0) {
+#endif
+#ifdef CONFIG_TCP_MD5SIG
+ } else if (static_branch_unlikely(&tcp_md5_needed.key)) {
+ int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
+
+ key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr,
+ l3index);
+ if (key.md5_key)
+ key.type = TCP_KEY_MD5;
+#endif
+ }
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
@@ -1125,12 +1276,13 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
- tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
+ tcp_rsk_tsval(tcp_rsk(req)),
READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
- ipv6_get_dsfield(ipv6_hdr(skb)), 0,
+ &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
READ_ONCE(sk->sk_priority),
READ_ONCE(tcp_rsk(req)->txhash));
+ if (tcp_key_is_ao(&key))
+ kfree(key.traffic_key);
}
@@ -1235,7 +1387,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
if (sk_is_mptcp(newsk))
mptcpv6_handle_mapped(newsk, true);
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif
@@ -1247,7 +1399,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->mcast_oif = inet_iif(skb);
newnp->mcast_hops = ip_hdr(skb)->ttl;
newnp->rcv_flowinfo = 0;
- if (np->repflow)
+ if (inet6_test_bit(REPFLOW, sk))
newnp->flow_label = 0;
/*
@@ -1320,7 +1472,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->mcast_oif = tcp_v6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
- if (np->repflow)
+ if (inet6_test_bit(REPFLOW, sk))
newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
/* Set ToS of the new socket based upon the value of incoming SYN.
@@ -1360,19 +1512,26 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
#ifdef CONFIG_TCP_MD5SIG
l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
- /* Copy over the MD5 key from the original socket */
- key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
- if (key) {
- const union tcp_md5_addr *addr;
-
- addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
- if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
- inet_csk_prepare_forced_close(newsk);
- tcp_done(newsk);
- goto out;
+ if (!tcp_rsk_used_ao(req)) {
+ /* Copy over the MD5 key from the original socket */
+ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
+ if (key) {
+ const union tcp_md5_addr *addr;
+
+ addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
+ if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
+ inet_csk_prepare_forced_close(newsk);
+ tcp_done(newsk);
+ goto out;
+ }
}
}
#endif
+#ifdef CONFIG_TCP_AO
+ /* Copy over tcp_ao_info if any */
+ if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6))
+ goto out; /* OOM */
+#endif
if (__inet_inherit_port(sk, newsk) < 0) {
inet_csk_prepare_forced_close(newsk);
@@ -1540,12 +1699,13 @@ ipv6_pktoptions:
if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
- np->mcast_oif = tcp_v6_iif(opt_skb);
+ WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb));
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
- np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+ WRITE_ONCE(np->mcast_hops,
+ ipv6_hdr(opt_skb)->hop_limit);
if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
- if (np->repflow)
+ if (inet6_test_bit(REPFLOW, sk))
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
tcp_v6_restore_cb(opt_skb);
@@ -1640,9 +1800,12 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- drop_reason = tcp_inbound_md5_hash(sk, skb,
- &hdr->saddr, &hdr->daddr,
- AF_INET6, dif, sdif);
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
+ else
+ drop_reason = tcp_inbound_hash(sk, req, skb,
+ &hdr->saddr, &hdr->daddr,
+ AF_INET6, dif, sdif);
if (drop_reason) {
sk_drops_add(sk, skb);
reqsk_put(req);
@@ -1689,6 +1852,7 @@ process:
}
goto discard_and_relse;
}
+ nf_reset_ct(skb);
if (nsk == sk) {
reqsk_put(req);
tcp_v6_restore_cb(skb);
@@ -1715,8 +1879,8 @@ process:
goto discard_and_relse;
}
- drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
- AF_INET6, dif, sdif);
+ drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr,
+ AF_INET6, dif, sdif);
if (drop_reason)
goto discard_and_relse;
@@ -1891,7 +2055,6 @@ const struct inet_connection_sock_af_ops ipv6_specific = {
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
.net_header_len = sizeof(struct ipv6hdr),
- .net_frag_header_len = sizeof(struct frag_hdr),
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
@@ -1899,11 +2062,19 @@ const struct inet_connection_sock_af_ops ipv6_specific = {
.mtu_reduced = tcp_v6_mtu_reduced,
};
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
+#ifdef CONFIG_TCP_MD5SIG
.md5_lookup = tcp_v6_md5_lookup,
.calc_md5_hash = tcp_v6_md5_hash_skb,
.md5_parse = tcp_v6_parse_md5_keys,
+#endif
+#ifdef CONFIG_TCP_AO
+ .ao_lookup = tcp_v6_ao_lookup,
+ .calc_ao_hash = tcp_v6_ao_hash_skb,
+ .ao_parse = tcp_v6_parse_ao,
+ .ao_calc_key_sk = tcp_v6_ao_calc_key_sk,
+#endif
};
#endif
@@ -1925,11 +2096,19 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.mtu_reduced = tcp_v4_mtu_reduced,
};
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
+#ifdef CONFIG_TCP_MD5SIG
.md5_lookup = tcp_v4_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
.md5_parse = tcp_v6_parse_md5_keys,
+#endif
+#ifdef CONFIG_TCP_AO
+ .ao_lookup = tcp_v6_ao_lookup,
+ .calc_ao_hash = tcp_v4_ao_hash_skb,
+ .ao_parse = tcp_v6_parse_ao,
+ .ao_calc_key_sk = tcp_v4_ao_calc_key_sk,
+#endif
};
#endif
@@ -1944,7 +2123,7 @@ static int tcp_v6_init_sock(struct sock *sk)
icsk->icsk_af_ops = &ipv6_specific;
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
#endif
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ebc6ae47cfea..3f2249b4cd5f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -410,10 +410,11 @@ try_again:
*addr_len = sizeof(*sin6);
BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk,
- (struct sockaddr *)sin6);
+ (struct sockaddr *)sin6,
+ addr_len);
}
- if (udp_sk(sk)->gro_enabled)
+ if (udp_test_bit(GRO_ENABLED, sk))
udp_cmsg_recv(msg, sk, skb);
if (np->rxopt.all)
@@ -571,7 +572,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
- if (!sk || udp_sk(sk)->encap_type) {
+ if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) {
/* No socket for error: try tunnels before discarding */
if (static_branch_unlikely(&udpv6_encap_needed_key)) {
sk = __udp6_lib_err_encap(net, hdr, offset, uh,
@@ -598,7 +599,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!ip6_sk_accept_pmtu(sk))
goto out;
ip6_sk_update_pmtu(skb, sk, info);
- if (np->pmtudisc != IPV6_PMTUDISC_DONT)
+ if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT)
harderr = 1;
}
if (type == NDISC_REDIRECT) {
@@ -619,7 +620,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
}
- if (!np->recverr) {
+ if (!inet6_test_bit(RECVERR6, sk)) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
} else {
@@ -688,7 +689,8 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
}
nf_reset_ct(skb);
- if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) {
+ if (static_branch_unlikely(&udpv6_encap_needed_key) &&
+ READ_ONCE(up->encap_type)) {
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
/*
@@ -726,16 +728,17 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
/*
* UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
*/
- if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
+ if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) {
+ u16 pcrlen = READ_ONCE(up->pcrlen);
- if (up->pcrlen == 0) { /* full coverage was set */
+ if (pcrlen == 0) { /* full coverage was set */
net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n",
UDP_SKB_CB(skb)->cscov, skb->len);
goto drop;
}
- if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
+ if (UDP_SKB_CB(skb)->cscov < pcrlen) {
net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n",
- UDP_SKB_CB(skb)->cscov, up->pcrlen);
+ UDP_SKB_CB(skb)->cscov, pcrlen);
goto drop;
}
}
@@ -858,7 +861,7 @@ start_lookup:
/* If zero checksum and no_check is not on for
* the socket then skip it.
*/
- if (!uh->check && !udp_sk(sk)->no_check6_rx)
+ if (!uh->check && !udp_get_no_check6_rx(sk))
continue;
if (!first) {
first = sk;
@@ -980,7 +983,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
udp6_sk_rx_dst_set(sk, dst);
- if (!uh->check && !udp_sk(sk)->no_check6_rx) {
+ if (!uh->check && !udp_get_no_check6_rx(sk)) {
if (refcounted)
sock_put(sk);
goto report_csum_error;
@@ -1002,7 +1005,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
/* Unicast */
sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk) {
- if (!uh->check && !udp_sk(sk)->no_check6_rx)
+ if (!uh->check && !udp_get_no_check6_rx(sk))
goto report_csum_error;
return udp6_unicast_rcv_skb(sk, skb, uh);
}
@@ -1132,7 +1135,7 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
udp_flush_pending_frames(sk);
else if (up->pending) {
up->len = 0;
- up->pending = 0;
+ WRITE_ONCE(up->pending, 0);
ip6_flush_pending_frames(sk);
}
}
@@ -1155,7 +1158,7 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
- return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
}
/**
@@ -1241,7 +1244,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
kfree_skb(skb);
return -EINVAL;
}
- if (udp_sk(sk)->no_check6_tx) {
+ if (udp_get_no_check6_tx(sk)) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1262,7 +1265,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
if (is_udplite)
csum = udplite_csum(skb);
- else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */
+ else if (udp_get_no_check6_tx(sk)) { /* UDP csum disabled */
skb->ip_summed = CHECKSUM_NONE;
goto send;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
@@ -1281,7 +1284,7 @@ csum_partial:
send:
err = ip6_send_skb(skb);
if (err) {
- if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
+ if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) {
UDP6_INC_STATS(sock_net(sk),
UDP_MIB_SNDBUFERRORS, is_udplite);
err = 0;
@@ -1310,7 +1313,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
&inet_sk(sk)->cork.base);
out:
up->len = 0;
- up->pending = 0;
+ WRITE_ONCE(up->pending, 0);
return err;
}
@@ -1332,14 +1335,14 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int addr_len = msg->msg_namelen;
bool connected = false;
int ulen = len;
- int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
+ int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE;
int err;
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
ipcm6_init(&ipc6);
ipc6.gso_size = READ_ONCE(up->gso_size);
- ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
/* destination address check */
@@ -1367,7 +1370,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
default:
return -EINVAL;
}
- } else if (!up->pending) {
+ } else if (!READ_ONCE(up->pending)) {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
daddr = &sk->sk_v6_daddr;
@@ -1398,8 +1401,8 @@ do_udp_sendmsg:
return -EMSGSIZE;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
- if (up->pending) {
- if (up->pending == AF_INET)
+ if (READ_ONCE(up->pending)) {
+ if (READ_ONCE(up->pending) == AF_INET)
return udp_sendmsg(sk, msg, len);
/*
* There are pending frames.
@@ -1427,7 +1430,7 @@ do_udp_sendmsg:
fl6->fl6_dport = sin6->sin6_port;
daddr = &sin6->sin6_addr;
- if (np->sndflow) {
+ if (inet6_test_bit(SNDFLOW, sk)) {
fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
@@ -1508,6 +1511,7 @@ do_udp_sendmsg:
if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
(struct sockaddr *)sin6,
+ &addr_len,
&fl6->saddr);
if (err)
goto out_no_dst;
@@ -1537,10 +1541,10 @@ do_udp_sendmsg:
connected = false;
if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) {
- fl6->flowi6_oif = np->mcast_oif;
+ fl6->flowi6_oif = READ_ONCE(np->mcast_oif);
connected = false;
} else if (!fl6->flowi6_oif)
- fl6->flowi6_oif = np->ucast_oif;
+ fl6->flowi6_oif = READ_ONCE(np->ucast_oif);
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
@@ -1589,11 +1593,11 @@ back_from_confirm:
goto out;
}
- up->pending = AF_INET6;
+ WRITE_ONCE(up->pending, AF_INET6);
do_append_data:
if (ipc6.dontfrag < 0)
- ipc6.dontfrag = np->dontfrag;
+ ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
up->len += ulen;
err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
&ipc6, fl6, (struct rt6_info *)dst,
@@ -1603,10 +1607,10 @@ do_append_data:
else if (!corkreq)
err = udp_v6_push_pending_frames(sk);
else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
- up->pending = 0;
+ WRITE_ONCE(up->pending, 0);
if (err > 0)
- err = np->recverr ? net_xmit_errno(err) : 0;
+ err = inet6_test_bit(RECVERR6, sk) ? net_xmit_errno(err) : 0;
release_sock(sk);
out:
@@ -1644,11 +1648,11 @@ static void udpv6_splice_eof(struct socket *sock)
struct sock *sk = sock->sk;
struct udp_sock *up = udp_sk(sk);
- if (!up->pending || READ_ONCE(up->corkflag))
+ if (!READ_ONCE(up->pending) || udp_test_bit(CORK, sk))
return;
lock_sock(sk);
- if (up->pending && !READ_ONCE(up->corkflag))
+ if (up->pending && !udp_test_bit(CORK, sk))
udp_v6_push_pending_frames(sk);
release_sock(sk);
}
@@ -1670,7 +1674,7 @@ void udpv6_destroy_sock(struct sock *sk)
if (encap_destroy)
encap_destroy(sk);
}
- if (up->encap_enabled) {
+ if (udp_test_bit(ENCAP_ENABLED, sk)) {
static_branch_dec(&udpv6_encap_needed_key);
udp_encap_disable();
}
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 267d491e9707..a60bec9b14f1 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -17,7 +17,6 @@
static int udplitev6_sk_init(struct sock *sk)
{
udpv6_init_sock(sk);
- udp_sk(sk)->pcflag = UDPLITE_BIT;
pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, "
"please contact the netdev mailing list\n");
return 0;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 4907ab241d6b..6e36e5047fba 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -16,6 +16,8 @@
#include <linux/netfilter_ipv6.h>
#include <net/ipv6.h>
#include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>
int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
struct ip6_tnl *t)
@@ -67,28 +69,18 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
return 0;
}
-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
{
struct udp_sock *up = udp_sk(sk);
struct udphdr *uh;
struct ipv6hdr *ip6h;
int len;
int ip6hlen = sizeof(struct ipv6hdr);
-
__u8 *udpdata;
__be32 *udpdata32;
- __u16 encap_type = up->encap_type;
-
- if (skb->protocol == htons(ETH_P_IP))
- return xfrm4_udp_encap_rcv(sk, skb);
+ u16 encap_type;
+ encap_type = READ_ONCE(up->encap_type);
/* if this is not encapsulated socket, then just return now */
if (!encap_type)
return 1;
@@ -109,7 +101,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP:
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
- goto drop;
+ return -EINVAL;
} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
/* ESP Packet without Non-ESP header */
len = sizeof(struct udphdr);
@@ -120,7 +112,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP_NON_IKE:
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
- goto drop;
+ return -EINVAL;
} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
udpdata32[0] == 0 && udpdata32[1] == 0) {
@@ -138,31 +130,100 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
* protocol to ESP, and then call into the transform receiver.
*/
if (skb_unclone(skb, GFP_ATOMIC))
- goto drop;
+ return -EINVAL;
/* Now we can update and verify the packet length... */
ip6h = ipv6_hdr(skb);
ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len);
if (skb->len < ip6hlen + len) {
/* packet is too small!?! */
- goto drop;
+ return -EINVAL;
}
/* pull the data buffer up to the ESP header and set the
* transport header to point to ESP. Keep UDP on the stack
* for later.
*/
- __skb_pull(skb, len);
- skb_reset_transport_header(skb);
+ if (pull) {
+ __skb_pull(skb, len);
+ skb_reset_transport_header(skb);
+ } else {
+ skb_set_transport_header(skb, len);
+ }
/* process ESP */
- return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
- kfree_skb(skb);
return 0;
}
+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ int ret;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ return xfrm4_udp_encap_rcv(sk, skb);
+
+ ret = __xfrm6_udp_encap_rcv(sk, skb, true);
+ if (!ret)
+ return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0,
+ udp_sk(sk)->encap_type);
+
+ if (ret < 0) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ return ret;
+}
+
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+ struct sk_buff *skb)
+{
+ int offset = skb_gro_offset(skb);
+ const struct net_offload *ops;
+ struct sk_buff *pp = NULL;
+ int ret;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ return xfrm4_gro_udp_encap_rcv(sk, head, skb);
+
+ offset = offset - sizeof(struct udphdr);
+
+ if (!pskb_pull(skb, offset))
+ return NULL;
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]);
+ if (!ops || !ops->callbacks.gro_receive)
+ goto out;
+
+ ret = __xfrm6_udp_encap_rcv(sk, skb, false);
+ if (ret)
+ goto out;
+
+ skb_push(skb, offset);
+ NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+ pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+ rcu_read_unlock();
+
+ return pp;
+
+out:
+ rcu_read_unlock();
+ skb_push(skb, offset);
+ NAPI_GRO_CB(skb)->same_flow = 0;
+ NAPI_GRO_CB(skb)->flush = 1;
+
+ return NULL;
+}
+
int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
{
return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index ad07904642ca..5f7b1fdbffe6 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -95,7 +95,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
return -EMSGSIZE;
}
- if (toobig || dst_allfrag(skb_dst(skb)))
+ if (toobig)
return ip6_fragment(net, sk, skb,
__xfrm6_output_finish);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 41a680c76d2e..42fb6996b077 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -117,10 +117,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
- if (likely(xdst->u.rt6.rt6i_idev))
- in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
rt6_uncached_list_del(&xdst->u.rt6);
+ if (likely(xdst->u.rt6.rt6i_idev))
+ in6_dev_put(xdst->u.rt6.rt6i_idev);
xfrm_dst_destroy(xdst);
}
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index fc3fddeb6f36..6334f64f04d5 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -67,7 +67,7 @@ static int iucv_bus_match(struct device *dev, struct device_driver *drv)
return 0;
}
-struct bus_type iucv_bus = {
+const struct bus_type iucv_bus = {
.name = "iucv",
.match = iucv_bus_match,
};
@@ -1823,7 +1823,7 @@ static int __init iucv_init(void)
rc = -EPROTONOSUPPORT;
goto out;
}
- ctl_set_bit(0, 1);
+ system_ctl_set_bit(0, CR0_IUCV_BIT);
rc = iucv_query_maxconn();
if (rc)
goto out_ctl;
@@ -1871,7 +1871,7 @@ out_dev:
out_int:
unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);
out_ctl:
- ctl_clear_bit(0, 1);
+ system_ctl_clear_bit(0, 1);
out:
return rc;
}
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 393f01b2a7e6..1184d40167b8 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -634,7 +634,7 @@ retry:
msize = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- msize += skb_shinfo(skb)->frags[i].bv_len;
+ msize += skb_frag_size(&skb_shinfo(skb)->frags[i]);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE,
skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags,
@@ -930,15 +930,18 @@ partial_message:
out_error:
kcm_push(kcm);
- if (copied && sock->type == SOCK_SEQPACKET) {
+ if (sock->type == SOCK_SEQPACKET) {
/* Wrote some bytes before encountering an
* error, return partial success.
*/
- goto partial_message;
- }
-
- if (head != kcm->seq_skb)
+ if (copied)
+ goto partial_message;
+ if (head != kcm->seq_skb)
+ kfree_skb(head);
+ } else {
kfree_skb(head);
+ kcm->seq_skb = NULL;
+ }
err = sk_stream_error(sk, msg->msg_flags, err);
@@ -1859,6 +1862,8 @@ static __net_exit void kcm_exit_net(struct net *net)
* that all multiplexors and psocks have been destroyed.
*/
WARN_ON(!list_empty(&knet->mux_list));
+
+ mutex_destroy(&knet->mutex);
}
static struct pernet_operations kcm_net_ops = {
@@ -1941,4 +1946,5 @@ module_init(kcm_init);
module_exit(kcm_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("KCM (Kernel Connection Multiplexor) sockets");
MODULE_ALIAS_NETPROTO(PF_KCM);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 03608d3ded4b..8d21ff25f160 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1139,9 +1139,9 @@ static void l2tp_tunnel_destruct(struct sock *sk)
switch (tunnel->encap) {
case L2TP_ENCAPTYPE_UDP:
/* No longer an encapsulation socket. See net/ipv4/udp.c */
- (udp_sk(sk))->encap_type = 0;
- (udp_sk(sk))->encap_rcv = NULL;
- (udp_sk(sk))->encap_destroy = NULL;
+ WRITE_ONCE(udp_sk(sk)->encap_type, 0);
+ udp_sk(sk)->encap_rcv = NULL;
+ udp_sk(sk)->encap_destroy = NULL;
break;
case L2TP_ENCAPTYPE_IP:
break;
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index f2ae03c40473..25ca89f80414 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -37,12 +37,6 @@
/* via netdev_priv() */
struct l2tp_eth {
struct l2tp_session *session;
- atomic_long_t tx_bytes;
- atomic_long_t tx_packets;
- atomic_long_t tx_dropped;
- atomic_long_t rx_bytes;
- atomic_long_t rx_packets;
- atomic_long_t rx_errors;
};
/* via l2tp_session_priv() */
@@ -79,10 +73,10 @@ static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev
int ret = l2tp_xmit_skb(session, skb);
if (likely(ret == NET_XMIT_SUCCESS)) {
- atomic_long_add(len, &priv->tx_bytes);
- atomic_long_inc(&priv->tx_packets);
+ DEV_STATS_ADD(dev, tx_bytes, len);
+ DEV_STATS_INC(dev, tx_packets);
} else {
- atomic_long_inc(&priv->tx_dropped);
+ DEV_STATS_INC(dev, tx_dropped);
}
return NETDEV_TX_OK;
}
@@ -90,14 +84,12 @@ static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev
static void l2tp_eth_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
- struct l2tp_eth *priv = netdev_priv(dev);
-
- stats->tx_bytes = (unsigned long)atomic_long_read(&priv->tx_bytes);
- stats->tx_packets = (unsigned long)atomic_long_read(&priv->tx_packets);
- stats->tx_dropped = (unsigned long)atomic_long_read(&priv->tx_dropped);
- stats->rx_bytes = (unsigned long)atomic_long_read(&priv->rx_bytes);
- stats->rx_packets = (unsigned long)atomic_long_read(&priv->rx_packets);
- stats->rx_errors = (unsigned long)atomic_long_read(&priv->rx_errors);
+ stats->tx_bytes = DEV_STATS_READ(dev, tx_bytes);
+ stats->tx_packets = DEV_STATS_READ(dev, tx_packets);
+ stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
+ stats->rx_bytes = DEV_STATS_READ(dev, rx_bytes);
+ stats->rx_packets = DEV_STATS_READ(dev, rx_packets);
+ stats->rx_errors = DEV_STATS_READ(dev, rx_errors);
}
static const struct net_device_ops l2tp_eth_netdev_ops = {
@@ -126,7 +118,6 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
{
struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
struct net_device *dev;
- struct l2tp_eth *priv;
if (!pskb_may_pull(skb, ETH_HLEN))
goto error;
@@ -144,12 +135,11 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
if (!dev)
goto error_rcu;
- priv = netdev_priv(dev);
if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
- atomic_long_inc(&priv->rx_packets);
- atomic_long_add(data_len, &priv->rx_bytes);
+ DEV_STATS_INC(dev, rx_packets);
+ DEV_STATS_ADD(dev, rx_bytes, data_len);
} else {
- atomic_long_inc(&priv->rx_errors);
+ DEV_STATS_INC(dev, rx_errors);
}
rcu_read_unlock();
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index ed8ebb6f5909..dd3153966173 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -431,7 +431,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
return -ENOTCONN;
lsa->l2tp_conn_id = lsk->peer_conn_id;
lsa->l2tp_addr = sk->sk_v6_daddr;
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
lsa->l2tp_flowinfo = np->flow_label;
} else {
if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
@@ -507,7 +507,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
*/
if (len > INT_MAX - transhdrlen)
return -EMSGSIZE;
- ulen = len + transhdrlen;
/* Mirror BSD error message compatibility */
if (msg->msg_flags & MSG_OOB)
@@ -529,7 +528,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EAFNOSUPPORT;
daddr = &lsa->l2tp_addr;
- if (np->sndflow) {
+ if (inet6_test_bit(SNDFLOW, sk)) {
fl6.flowlabel = lsa->l2tp_flowinfo & IPV6_FLOWINFO_MASK;
if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
@@ -600,9 +599,9 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
final_p = fl6_update_dst(&fl6, opt, &final);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
@@ -621,13 +620,14 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (ipc6.dontfrag < 0)
- ipc6.dontfrag = np->dontfrag;
+ ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
if (msg->msg_flags & MSG_CONFIRM)
goto do_confirm;
back_from_confirm:
lock_sock(sk);
+ ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0;
err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, &ipc6,
&fl6, (struct rt6_info *)dst,
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 9b06c380866b..fde1140d899e 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -226,6 +226,8 @@ static int llc_ui_release(struct socket *sock)
}
netdev_put(llc->dev, &llc->dev_tracker);
sock_put(sk);
+ sock_orphan(sk);
+ sock->sk = NULL;
llc_sk_free(sk);
out:
return 0;
@@ -928,14 +930,15 @@ copy_uaddr:
*/
static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
+ DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
- DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
int flags = msg->msg_flags;
int noblock = flags & MSG_DONTWAIT;
+ int rc = -EINVAL, copied = 0, hdrlen, hh_len;
struct sk_buff *skb = NULL;
+ struct net_device *dev;
size_t size = 0;
- int rc = -EINVAL, copied = 0, hdrlen;
dprintk("%s: sending from %02X to %02X\n", __func__,
llc->laddr.lsap, llc->daddr.lsap);
@@ -955,22 +958,29 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (rc)
goto out;
}
- hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr);
+ dev = llc->dev;
+ hh_len = LL_RESERVED_SPACE(dev);
+ hdrlen = llc_ui_header_len(sk, addr);
size = hdrlen + len;
- if (size > llc->dev->mtu)
- size = llc->dev->mtu;
+ size = min_t(size_t, size, READ_ONCE(dev->mtu));
copied = size - hdrlen;
rc = -EINVAL;
if (copied < 0)
goto out;
release_sock(sk);
- skb = sock_alloc_send_skb(sk, size, noblock, &rc);
+ skb = sock_alloc_send_skb(sk, hh_len + size, noblock, &rc);
lock_sock(sk);
if (!skb)
goto out;
- skb->dev = llc->dev;
+ if (sock_flag(sk, SOCK_ZAPPED) ||
+ llc->dev != dev ||
+ hdrlen != llc_ui_header_len(sk, addr) ||
+ hh_len != LL_RESERVED_SPACE(dev) ||
+ size > READ_ONCE(dev->mtu))
+ goto out;
+ skb->dev = dev;
skb->protocol = llc_proto_type(addr->sllc_arphrd);
- skb_reserve(skb, hdrlen);
+ skb_reserve(skb, hh_len + hdrlen);
rc = memcpy_from_msg(skb_put(skb, copied), msg, copied);
if (rc)
goto out;
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 6e387aadffce..4f16d9c88350 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -135,22 +135,15 @@ static struct packet_type llc_packet_type __read_mostly = {
.func = llc_rcv,
};
-static struct packet_type llc_tr_packet_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_TR_802_2),
- .func = llc_rcv,
-};
-
static int __init llc_init(void)
{
dev_add_pack(&llc_packet_type);
- dev_add_pack(&llc_tr_packet_type);
return 0;
}
static void __exit llc_exit(void)
{
dev_remove_pack(&llc_packet_type);
- dev_remove_pack(&llc_tr_packet_type);
}
module_init(llc_init);
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index 7cac441862e2..51bccfb00a9c 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -127,8 +127,14 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
skb->transport_header += llc_len;
skb_pull(skb, llc_len);
if (skb->protocol == htons(ETH_P_802_2)) {
- __be16 pdulen = eth_hdr(skb)->h_proto;
- s32 data_size = ntohs(pdulen) - llc_len;
+ __be16 pdulen;
+ s32 data_size;
+
+ if (skb->mac_len < ETH_HLEN)
+ return 0;
+
+ pdulen = eth_hdr(skb)->h_proto;
+ data_size = ntohs(pdulen) - llc_len;
if (data_size < 0 ||
!pskb_may_pull(skb, data_size))
diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
index 79d1cef8f15a..06fb8e6944b0 100644
--- a/net/llc/llc_s_ac.c
+++ b/net/llc/llc_s_ac.c
@@ -153,6 +153,9 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb)
int rc = 1;
u32 data_size;
+ if (skb->mac_len < ETH_HLEN)
+ return 1;
+
llc_pdu_decode_sa(skb, mac_da);
llc_pdu_decode_da(skb, mac_sa);
llc_pdu_decode_ssap(skb, &dsap);
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 05c6ae092053..f50654292510 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -76,6 +76,9 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb)
u32 data_size;
struct sk_buff *nskb;
+ if (skb->mac_len < ETH_HLEN)
+ goto out;
+
/* The test request command is type U (llc_len = 3) */
data_size = ntohs(eth_hdr(skb)->h_proto) - 3;
nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, data_size);
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 51ec8256b7fa..13438cc0a6b1 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -57,6 +57,16 @@ endif
comment "Some wireless drivers require a rate control algorithm"
depends on MAC80211 && MAC80211_HAS_RC=n
+config MAC80211_KUNIT_TEST
+ tristate "KUnit tests for mac80211" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ depends on MAC80211
+ default KUNIT_ALL_TESTS
+ help
+ Enable this option to test mac80211 internals with kunit.
+
+ If unsure, say N.
+
config MAC80211_MESH
bool "Enable mac80211 mesh networking support"
depends on MAC80211
@@ -77,7 +87,7 @@ config MAC80211_LEDS
config MAC80211_DEBUGFS
bool "Export mac80211 internals in DebugFS"
- depends on MAC80211 && DEBUG_FS
+ depends on MAC80211 && CFG80211_DEBUGFS
help
Select this to see extensive information about
the internal state of mac80211 in debugfs.
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index b8de44da1fb8..4406b4f8f3b9 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -65,4 +65,8 @@ rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += \
mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y)
+obj-y += tests/
+
+mac80211-y += wbrf.o
+
ccflags-y += -DDEBUG
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index c6fa53230450..9bffac7a4974 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2023 Intel Corporation
*/
/**
@@ -55,8 +55,8 @@ static void ieee80211_free_tid_rx(struct rcu_head *h)
kfree(tid_rx);
}
-void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
- u16 initiator, u16 reason, bool tx)
+void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
+ u16 initiator, u16 reason, bool tx)
{
struct ieee80211_local *local = sta->local;
struct tid_ampdu_rx *tid_rx;
@@ -69,10 +69,10 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
.ssn = 0,
};
- lockdep_assert_held(&sta->ampdu_mlme.mtx);
+ lockdep_assert_wiphy(sta->local->hw.wiphy);
tid_rx = rcu_dereference_protected(sta->ampdu_mlme.tid_rx[tid],
- lockdep_is_held(&sta->ampdu_mlme.mtx));
+ lockdep_is_held(&sta->local->hw.wiphy->mtx));
if (!test_bit(tid, sta->ampdu_mlme.agg_session_valid))
return;
@@ -114,14 +114,6 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx);
}
-void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
- u16 initiator, u16 reason, bool tx)
-{
- mutex_lock(&sta->ampdu_mlme.mtx);
- ___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason, tx);
- mutex_unlock(&sta->ampdu_mlme.mtx);
-}
-
void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap,
const u8 *addr)
{
@@ -140,7 +132,7 @@ void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap,
if (ba_rx_bitmap & BIT(i))
set_bit(i, sta->ampdu_mlme.tid_rx_stop_requested);
- ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work);
+ wiphy_work_queue(sta->local->hw.wiphy, &sta->ampdu_mlme.work);
rcu_read_unlock();
}
EXPORT_SYMBOL(ieee80211_stop_rx_ba_session);
@@ -166,7 +158,7 @@ static void sta_rx_agg_session_timer_expired(struct timer_list *t)
sta->sta.addr, tid);
set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired);
- ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work);
+ wiphy_work_queue(sta->local->hw.wiphy, &sta->ampdu_mlme.work);
}
static void sta_rx_agg_reorder_timer_expired(struct timer_list *t)
@@ -250,11 +242,11 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
ieee80211_tx_skb(sdata, skb);
}
-void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
- u8 dialog_token, u16 timeout,
- u16 start_seq_num, u16 ba_policy, u16 tid,
- u16 buf_size, bool tx, bool auto_seq,
- const struct ieee80211_addba_ext_ie *addbaext)
+void __ieee80211_start_rx_ba_session(struct sta_info *sta,
+ u8 dialog_token, u16 timeout,
+ u16 start_seq_num, u16 ba_policy, u16 tid,
+ u16 buf_size, bool tx, bool auto_seq,
+ const struct ieee80211_addba_ext_ie *addbaext)
{
struct ieee80211_local *local = sta->sdata->local;
struct tid_ampdu_rx *tid_agg_rx;
@@ -270,6 +262,8 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
u16 status = WLAN_STATUS_REQUEST_DECLINED;
u16 max_buf_size;
+ lockdep_assert_wiphy(sta->local->hw.wiphy);
+
if (tid >= IEEE80211_FIRST_TSPEC_TSID) {
ht_dbg(sta->sdata,
"STA %pM requests BA session on unsupported tid %d\n",
@@ -325,9 +319,6 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
ht_dbg(sta->sdata, "AddBA Req buf_size=%d for %pM\n",
buf_size, sta->sta.addr);
- /* examine state machine */
- lockdep_assert_held(&sta->ampdu_mlme.mtx);
-
if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
struct tid_ampdu_rx *tid_rx;
@@ -355,9 +346,9 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
sta->sta.addr, tid);
/* delete existing Rx BA session on the same tid */
- ___ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT,
- WLAN_STATUS_UNSPECIFIED_QOS,
- false);
+ __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT,
+ WLAN_STATUS_UNSPECIFIED_QOS,
+ false);
}
if (ieee80211_hw_check(&local->hw, SUPPORTS_REORDERING_BUFFER)) {
@@ -444,20 +435,6 @@ end:
timeout, addbaext);
}
-static void __ieee80211_start_rx_ba_session(struct sta_info *sta,
- u8 dialog_token, u16 timeout,
- u16 start_seq_num, u16 ba_policy,
- u16 tid, u16 buf_size, bool tx,
- bool auto_seq,
- const struct ieee80211_addba_ext_ie *addbaext)
-{
- mutex_lock(&sta->ampdu_mlme.mtx);
- ___ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
- start_seq_num, ba_policy, tid,
- buf_size, tx, auto_seq, addbaext);
- mutex_unlock(&sta->ampdu_mlme.mtx);
-}
-
void ieee80211_process_addba_request(struct ieee80211_local *local,
struct sta_info *sta,
struct ieee80211_mgmt *mgmt,
@@ -507,7 +484,6 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
const u8 *addr, unsigned int tid)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- struct ieee80211_local *local = sdata->local;
struct sta_info *sta;
rcu_read_lock();
@@ -516,7 +492,7 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
goto unlock;
set_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl);
- ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+ wiphy_work_queue(sta->local->hw.wiphy, &sta->ampdu_mlme.work);
unlock:
rcu_read_unlock();
}
@@ -526,7 +502,6 @@ void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif,
const u8 *addr, unsigned int tid)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- struct ieee80211_local *local = sdata->local;
struct sta_info *sta;
rcu_read_lock();
@@ -535,7 +510,7 @@ void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif,
goto unlock;
set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired);
- ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+ wiphy_work_queue(sta->local->hw.wiphy, &sta->ampdu_mlme.work);
unlock:
rcu_read_unlock();
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index b6b772685881..b8a278355e18 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -142,7 +142,7 @@ EXPORT_SYMBOL(ieee80211_send_bar);
void ieee80211_assign_tid_tx(struct sta_info *sta, int tid,
struct tid_ampdu_tx *tid_tx)
{
- lockdep_assert_held(&sta->ampdu_mlme.mtx);
+ lockdep_assert_wiphy(sta->local->hw.wiphy);
lockdep_assert_held(&sta->lock);
rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx);
}
@@ -213,7 +213,7 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable)
struct ieee80211_txq *txq = sta->sta.txq[tid];
struct txq_info *txqi;
- lockdep_assert_held(&sta->ampdu_mlme.mtx);
+ lockdep_assert_wiphy(sta->local->hw.wiphy);
if (!txq)
return;
@@ -271,7 +271,7 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid)
{
struct tid_ampdu_tx *tid_tx;
- lockdep_assert_held(&sta->ampdu_mlme.mtx);
+ lockdep_assert_wiphy(sta->local->hw.wiphy);
lockdep_assert_held(&sta->lock);
tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
@@ -296,8 +296,8 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid)
kfree_rcu(tid_tx, rcu_head);
}
-int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
- enum ieee80211_agg_stop_reason reason)
+int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
+ enum ieee80211_agg_stop_reason reason)
{
struct ieee80211_local *local = sta->local;
struct tid_ampdu_tx *tid_tx;
@@ -311,7 +311,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
};
int ret;
- lockdep_assert_held(&sta->ampdu_mlme.mtx);
+ lockdep_assert_wiphy(sta->local->hw.wiphy);
switch (reason) {
case AGG_STOP_DECLINED:
@@ -461,7 +461,7 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta,
test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state)))
return;
- lockdep_assert_held(&sta->ampdu_mlme.mtx);
+ lockdep_assert_wiphy(sta->local->hw.wiphy);
/* activate the timer for the recipient's addBA response */
mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL);
@@ -497,7 +497,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
{
struct tid_ampdu_tx *tid_tx;
struct ieee80211_local *local = sta->local;
- struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_ampdu_params params = {
.sta = &sta->sta,
.action = IEEE80211_AMPDU_TX_START,
@@ -525,7 +525,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
*/
synchronize_net();
- sdata = sta->sdata;
params.ssn = sta->tid_seq[tid] >> 4;
ret = drv_ampdu_action(local, sdata, &params);
tid_tx->ssn = params.ssn;
@@ -539,9 +538,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
*/
set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state);
} else if (ret) {
- if (!sdata)
- return;
-
ht_dbg(sdata,
"BA request denied - HW unavailable for %pM tid %d\n",
sta->sta.addr, tid);
@@ -743,7 +739,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
*/
sta->ampdu_mlme.tid_start_tx[tid] = tid_tx;
- ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+ wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work);
/* this flow continues off the work */
err_unlock_sta:
@@ -764,7 +760,7 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
.ssn = 0,
};
- lockdep_assert_held(&sta->ampdu_mlme.mtx);
+ lockdep_assert_wiphy(sta->local->hw.wiphy);
tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
params.buf_size = tid_tx->buf_size;
@@ -801,7 +797,7 @@ void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
- lockdep_assert_held(&sta->ampdu_mlme.mtx);
+ lockdep_assert_wiphy(sta->local->hw.wiphy);
if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)))
return;
@@ -862,26 +858,12 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
goto out;
set_bit(HT_AGG_STATE_START_CB, &tid_tx->state);
- ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+ wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work);
out:
rcu_read_unlock();
}
EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
-int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
- enum ieee80211_agg_stop_reason reason)
-{
- int ret;
-
- mutex_lock(&sta->ampdu_mlme.mtx);
-
- ret = ___ieee80211_stop_tx_ba_session(sta, tid, reason);
-
- mutex_unlock(&sta->ampdu_mlme.mtx);
-
- return ret;
-}
-
int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
{
struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
@@ -916,7 +898,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
}
set_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state);
- ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+ wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work);
unlock:
spin_unlock_bh(&sta->lock);
@@ -976,7 +958,7 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
goto out;
set_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state);
- ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+ wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work);
out:
rcu_read_unlock();
}
@@ -993,6 +975,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
u16 capab, tid, buf_size;
bool amsdu;
+ lockdep_assert_wiphy(sta->local->hw.wiphy);
+
capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK;
tid = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_TID_MASK);
@@ -1003,16 +987,14 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
if (!amsdu && txq)
set_bit(IEEE80211_TXQ_NO_AMSDU, &to_txq_info(txq)->flags);
- mutex_lock(&sta->ampdu_mlme.mtx);
-
tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
if (!tid_tx)
- goto out;
+ return;
if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) {
ht_dbg(sta->sdata, "wrong addBA response token, %pM tid %d\n",
sta->sta.addr, tid);
- goto out;
+ return;
}
del_timer_sync(&tid_tx->addba_resp_timer);
@@ -1030,7 +1012,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
ht_dbg(sta->sdata,
"got addBA resp for %pM tid %d but we already gave up\n",
sta->sta.addr, tid);
- goto out;
+ return;
}
/*
@@ -1044,7 +1026,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED,
&tid_tx->state)) {
/* ignore duplicate response */
- goto out;
+ return;
}
tid_tx->buf_size = buf_size;
@@ -1065,9 +1047,6 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
}
} else {
- ___ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_DECLINED);
+ __ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_DECLINED);
}
-
- out:
- mutex_unlock(&sta->ampdu_mlme.mtx);
}
diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
index e8ebd343e2bf..fdf8b658fede 100644
--- a/net/mac80211/airtime.c
+++ b/net/mac80211/airtime.c
@@ -557,7 +557,7 @@ static int ieee80211_fill_rx_status(struct ieee80211_rx_status *stat,
if (ieee80211_fill_rate_info(hw, stat, band, ri))
return 0;
- if (rate->idx < 0 || !rate->count)
+ if (!ieee80211_rate_valid(rate))
return -1;
if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH)
@@ -632,7 +632,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
{
struct ieee80211_supported_band *sband;
struct ieee80211_chanctx_conf *conf;
- int rateidx, shift = 0;
+ int rateidx;
bool cck, short_pream;
u32 basic_rates;
u8 band = 0;
@@ -641,10 +641,8 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
len += 38; /* Ethernet header length */
conf = rcu_dereference(vif->bss_conf.chanctx_conf);
- if (conf) {
+ if (conf)
band = conf->def.chan->band;
- shift = ieee80211_chandef_get_shift(&conf->def);
- }
if (pubsta) {
struct sta_info *sta = container_of(pubsta, struct sta_info,
@@ -704,7 +702,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
short_pream = vif->bss_conf.use_short_preamble;
rateidx = basic_rates ? ffs(basic_rates) - 1 : 0;
- rate = sband->bitrates[rateidx].bitrate << shift;
+ rate = sband->bitrates[rateidx].bitrate;
cck = sband->bitrates[rateidx].flags & IEEE80211_RATE_MANDATORY_B;
return ieee80211_calc_legacy_rate_duration(rate, short_pream, cck, len);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 45e7a5d9c7d9..327682995c92 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -214,6 +214,8 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
struct sta_info *sta;
int ret;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
ret = ieee80211_if_change_type(sdata, type);
if (ret)
return ret;
@@ -235,12 +237,10 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
if (!ifmgd->associated)
return 0;
- mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, sdata->deflink.u.mgd.bssid);
if (sta)
drv_sta_set_4addr(local, sdata, &sta->sta,
params->use_4addr);
- mutex_unlock(&local->sta_mtx);
if (params->use_4addr)
ieee80211_send_4addr_nullfunc(local, sdata);
@@ -261,9 +261,9 @@ static int ieee80211_start_p2p_device(struct wiphy *wiphy,
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
int ret;
- mutex_lock(&sdata->local->chanctx_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
ret = ieee80211_check_combinations(sdata, NULL, 0, 0);
- mutex_unlock(&sdata->local->chanctx_mtx);
if (ret < 0)
return ret;
@@ -283,9 +283,9 @@ static int ieee80211_start_nan(struct wiphy *wiphy,
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
int ret;
- mutex_lock(&sdata->local->chanctx_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
ret = ieee80211_check_combinations(sdata, NULL, 0, 0);
- mutex_unlock(&sdata->local->chanctx_mtx);
if (ret < 0)
return ret;
@@ -452,13 +452,11 @@ static int ieee80211_set_tx(struct ieee80211_sub_if_data *sdata,
if (sta->ptk_idx == key_idx)
return 0;
- mutex_lock(&local->key_mtx);
- key = key_mtx_dereference(local, sta->ptk[key_idx]);
+ key = wiphy_dereference(local->hw.wiphy, sta->ptk[key_idx]);
if (key && key->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX)
ret = ieee80211_set_tx_key(key);
- mutex_unlock(&local->key_mtx);
return ret;
}
@@ -474,6 +472,8 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_key *key;
int err;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!ieee80211_sdata_running(sdata))
return -ENETDOWN;
@@ -510,8 +510,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
if (params->mode == NL80211_KEY_NO_TX)
key->conf.flags |= IEEE80211_KEY_FLAG_NO_AUTO_TX;
- mutex_lock(&local->sta_mtx);
-
if (mac_addr) {
sta = sta_info_get_bss(sdata, mac_addr);
/*
@@ -526,8 +524,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
*/
if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) {
ieee80211_key_free_unused(key);
- err = -ENOENT;
- goto out_unlock;
+ return -ENOENT;
}
}
@@ -566,9 +563,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
}
err = ieee80211_key_link(key, link, sta);
-
- out_unlock:
- mutex_unlock(&local->sta_mtx);
+ /* KRACK protection, shouldn't happen but just silently accept key */
+ if (err == -EALREADY)
+ err = 0;
return err;
}
@@ -582,8 +579,7 @@ ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id,
struct ieee80211_key *key;
if (link_id >= 0) {
- link = rcu_dereference_check(sdata->link[link_id],
- lockdep_is_held(&sdata->wdev.mtx));
+ link = sdata_dereference(sdata->link[link_id], sdata);
if (!link)
return NULL;
}
@@ -598,7 +594,7 @@ ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id,
if (link_id >= 0) {
link_sta = rcu_dereference_check(sta->link[link_id],
- lockdep_is_held(&local->sta_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (!link_sta)
return NULL;
} else {
@@ -606,30 +602,29 @@ ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id,
}
if (pairwise && key_idx < NUM_DEFAULT_KEYS)
- return rcu_dereference_check_key_mtx(local,
- sta->ptk[key_idx]);
+ return wiphy_dereference(local->hw.wiphy,
+ sta->ptk[key_idx]);
if (!pairwise &&
key_idx < NUM_DEFAULT_KEYS +
NUM_DEFAULT_MGMT_KEYS +
NUM_DEFAULT_BEACON_KEYS)
- return rcu_dereference_check_key_mtx(local,
- link_sta->gtk[key_idx]);
+ return wiphy_dereference(local->hw.wiphy,
+ link_sta->gtk[key_idx]);
return NULL;
}
if (pairwise && key_idx < NUM_DEFAULT_KEYS)
- return rcu_dereference_check_key_mtx(local,
- sdata->keys[key_idx]);
+ return wiphy_dereference(local->hw.wiphy, sdata->keys[key_idx]);
- key = rcu_dereference_check_key_mtx(local, link->gtk[key_idx]);
+ key = wiphy_dereference(local->hw.wiphy, link->gtk[key_idx]);
if (key)
return key;
/* or maybe it was a WEP key */
if (key_idx < NUM_DEFAULT_KEYS)
- return rcu_dereference_check_key_mtx(local, sdata->keys[key_idx]);
+ return wiphy_dereference(local->hw.wiphy, sdata->keys[key_idx]);
return NULL;
}
@@ -641,25 +636,16 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
struct ieee80211_key *key;
- int ret;
- mutex_lock(&local->sta_mtx);
- mutex_lock(&local->key_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr);
- if (!key) {
- ret = -ENOENT;
- goto out_unlock;
- }
+ if (!key)
+ return -ENOENT;
ieee80211_key_free(key, sdata->vif.type == NL80211_IFTYPE_STATION);
- ret = 0;
- out_unlock:
- mutex_unlock(&local->key_mtx);
- mutex_unlock(&local->sta_mtx);
-
- return ret;
+ return 0;
}
static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
@@ -830,15 +816,11 @@ void sta_set_rate_info_tx(struct sta_info *sta,
rinfo->nss = ieee80211_rate_get_vht_nss(rate);
} else {
struct ieee80211_supported_band *sband;
- int shift = ieee80211_vif_get_shift(&sta->sdata->vif);
- u16 brate;
sband = ieee80211_get_sband(sta->sdata);
WARN_ON_ONCE(sband && !sband->bitrates);
- if (sband && sband->bitrates) {
- brate = sband->bitrates[rate->idx].bitrate;
- rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
- }
+ if (sband && sband->bitrates)
+ rinfo->legacy = sband->bitrates[rate->idx].bitrate;
}
if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
rinfo->bw = RATE_INFO_BW_40;
@@ -860,7 +842,7 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
struct sta_info *sta;
int ret = -ENOENT;
- mutex_lock(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
sta = sta_info_get_by_idx(sdata, idx);
if (sta) {
@@ -869,8 +851,6 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
sta_set_sinfo(sta, sinfo, true);
}
- mutex_unlock(&local->sta_mtx);
-
return ret;
}
@@ -890,7 +870,7 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
struct sta_info *sta;
int ret = -ENOENT;
- mutex_lock(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
sta = sta_info_get_bss(sdata, mac);
if (sta) {
@@ -898,8 +878,6 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
sta_set_sinfo(sta, sinfo, true);
}
- mutex_unlock(&local->sta_mtx);
-
return ret;
}
@@ -910,6 +888,8 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
struct ieee80211_sub_if_data *sdata;
int ret = 0;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (cfg80211_chandef_identical(&local->monitor_chandef, chandef))
return 0;
@@ -917,22 +897,16 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
sdata = wiphy_dereference(local->hw.wiphy,
local->monitor_sdata);
if (sdata) {
- sdata_lock(sdata);
- mutex_lock(&local->mtx);
ieee80211_link_release_channel(&sdata->deflink);
ret = ieee80211_link_use_channel(&sdata->deflink,
chandef,
IEEE80211_CHANCTX_EXCLUSIVE);
- mutex_unlock(&local->mtx);
- sdata_unlock(sdata);
}
} else {
- mutex_lock(&local->mtx);
if (local->open_count == local->monitors) {
local->_oper_chandef = *chandef;
ieee80211_hw_config(local, 0);
}
- mutex_unlock(&local->mtx);
}
if (ret == 0)
@@ -984,51 +958,61 @@ static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata,
struct fils_discovery_data *new, *old = NULL;
struct ieee80211_fils_discovery *fd;
- if (!params->tmpl || !params->tmpl_len)
- return -EINVAL;
+ if (!params->update)
+ return 0;
fd = &link_conf->fils_discovery;
fd->min_interval = params->min_interval;
fd->max_interval = params->max_interval;
old = sdata_dereference(link->u.ap.fils_discovery, sdata);
- new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL);
- if (!new)
- return -ENOMEM;
- new->len = params->tmpl_len;
- memcpy(new->data, params->tmpl, params->tmpl_len);
- rcu_assign_pointer(link->u.ap.fils_discovery, new);
-
if (old)
kfree_rcu(old, rcu_head);
- return 0;
+ if (params->tmpl && params->tmpl_len) {
+ new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+ new->len = params->tmpl_len;
+ memcpy(new->data, params->tmpl, params->tmpl_len);
+ rcu_assign_pointer(link->u.ap.fils_discovery, new);
+ } else {
+ RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL);
+ }
+
+ return BSS_CHANGED_FILS_DISCOVERY;
}
static int
ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata,
struct cfg80211_unsol_bcast_probe_resp *params,
struct ieee80211_link_data *link,
- struct ieee80211_bss_conf *link_conf)
+ struct ieee80211_bss_conf *link_conf,
+ u64 *changed)
{
struct unsol_bcast_probe_resp_data *new, *old = NULL;
- if (!params->tmpl || !params->tmpl_len)
- return -EINVAL;
+ if (!params->update)
+ return 0;
- old = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata);
- new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL);
- if (!new)
- return -ENOMEM;
- new->len = params->tmpl_len;
- memcpy(new->data, params->tmpl, params->tmpl_len);
- rcu_assign_pointer(link->u.ap.unsol_bcast_probe_resp, new);
+ link_conf->unsol_bcast_probe_resp_interval = params->interval;
+ old = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata);
if (old)
kfree_rcu(old, rcu_head);
- link_conf->unsol_bcast_probe_resp_interval = params->interval;
+ if (params->tmpl && params->tmpl_len) {
+ new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+ new->len = params->tmpl_len;
+ memcpy(new->data, params->tmpl, params->tmpl_len);
+ rcu_assign_pointer(link->u.ap.unsol_bcast_probe_resp, new);
+ } else {
+ RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL);
+ }
+ *changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP;
return 0;
}
@@ -1275,6 +1259,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_link_data *link;
struct ieee80211_bss_conf *link_conf;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
link = sdata_dereference(sdata->link[link_id], sdata);
if (!link)
return -ENOLINK;
@@ -1286,7 +1272,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
return -EALREADY;
if (params->smps_mode != NL80211_SMPS_OFF)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
link->smps_mode = IEEE80211_SMPS_OFF;
@@ -1384,12 +1370,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
return err;
}
- mutex_lock(&local->mtx);
err = ieee80211_link_use_channel(link, &params->chandef,
IEEE80211_CHANCTX_SHARED);
if (!err)
ieee80211_link_copy_chanctx_to_vlans(link, false);
- mutex_unlock(&local->mtx);
if (err) {
link_conf->beacon_int = prev_beacon_int;
return err;
@@ -1460,23 +1444,17 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (err < 0)
goto error;
- if (params->fils_discovery.max_interval) {
- err = ieee80211_set_fils_discovery(sdata,
- &params->fils_discovery,
- link, link_conf);
- if (err < 0)
- goto error;
- changed |= BSS_CHANGED_FILS_DISCOVERY;
- }
+ err = ieee80211_set_fils_discovery(sdata, &params->fils_discovery,
+ link, link_conf);
+ if (err < 0)
+ goto error;
+ changed |= err;
- if (params->unsol_bcast_probe_resp.interval) {
- err = ieee80211_set_unsol_bcast_probe_resp(sdata,
- &params->unsol_bcast_probe_resp,
- link, link_conf);
- if (err < 0)
- goto error;
- changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP;
- }
+ err = ieee80211_set_unsol_bcast_probe_resp(sdata,
+ &params->unsol_bcast_probe_resp,
+ link, link_conf, &changed);
+ if (err < 0)
+ goto error;
err = drv_start_ap(sdata->local, sdata, link_conf);
if (err) {
@@ -1500,26 +1478,26 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
return 0;
error:
- mutex_lock(&local->mtx);
ieee80211_link_release_channel(link);
- mutex_unlock(&local->mtx);
return err;
}
static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
- struct cfg80211_beacon_data *params)
+ struct cfg80211_ap_update *params)
+
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_link_data *link;
+ struct cfg80211_beacon_data *beacon = &params->beacon;
struct beacon_data *old;
int err;
struct ieee80211_bss_conf *link_conf;
u64 changed = 0;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(wiphy);
- link = sdata_dereference(sdata->link[params->link_id], sdata);
+ link = sdata_dereference(sdata->link[beacon->link_id], sdata);
if (!link)
return -ENOLINK;
@@ -1535,14 +1513,26 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
if (!old)
return -ENOENT;
- err = ieee80211_assign_beacon(sdata, link, params, NULL, NULL,
+ err = ieee80211_assign_beacon(sdata, link, beacon, NULL, NULL,
&changed);
if (err < 0)
return err;
- if (params->he_bss_color_valid &&
- params->he_bss_color.enabled != link_conf->he_bss_color.enabled) {
- link_conf->he_bss_color.enabled = params->he_bss_color.enabled;
+ err = ieee80211_set_fils_discovery(sdata, &params->fils_discovery,
+ link, link_conf);
+ if (err < 0)
+ return err;
+ changed |= err;
+
+ err = ieee80211_set_unsol_bcast_probe_resp(sdata,
+ &params->unsol_bcast_probe_resp,
+ link, link_conf, &changed);
+ if (err < 0)
+ return err;
+
+ if (beacon->he_bss_color_valid &&
+ beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) {
+ link_conf->he_bss_color.enabled = beacon->he_bss_color.enabled;
changed |= BSS_CHANGED_HE_BSS_COLOR;
}
@@ -1576,7 +1566,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
sdata_dereference(sdata->link[link_id], sdata);
struct ieee80211_bss_conf *link_conf = link->conf;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(local->hw.wiphy);
old_beacon = sdata_dereference(link->u.ap.beacon, sdata);
if (!old_beacon)
@@ -1590,7 +1580,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
sdata);
/* abort any running channel switch or color change */
- mutex_lock(&local->mtx);
link_conf->csa_active = false;
link_conf->color_change_active = false;
if (link->csa_block_tx) {
@@ -1599,8 +1588,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
link->csa_block_tx = false;
}
- mutex_unlock(&local->mtx);
-
ieee80211_free_next_beacon(link);
/* turn off carrier for this interface and dependent VLANs */
@@ -1643,7 +1630,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
if (sdata->wdev.cac_started) {
chandef = link_conf->chandef;
- cancel_delayed_work_sync(&link->dfs_cac_timer_work);
+ wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
GFP_KERNEL);
@@ -1655,10 +1642,8 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf);
ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf);
- mutex_lock(&local->mtx);
ieee80211_link_copy_chanctx_to_vlans(link, true);
ieee80211_link_release_channel(link);
- mutex_unlock(&local->mtx);
return 0;
}
@@ -1800,13 +1785,13 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
sdata_dereference(sdata->link[link_id], sdata);
struct link_sta_info *link_sta =
rcu_dereference_protected(sta->link[link_id],
- lockdep_is_held(&local->sta_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
/*
- * If there are no changes, then accept a link that doesn't exist,
+ * If there are no changes, then accept a link that exist,
* unless it's a new link.
*/
- if (params->link_id < 0 && !new_link &&
+ if (params->link_id >= 0 && !new_link &&
!params->link_mac && !params->txpwr_set &&
!params->supported_rates_len &&
!params->ht_capa && !params->vht_capa &&
@@ -1857,7 +1842,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
/* VHT can override some HT caps such as the A-MSDU max length */
if (params->vht_capa)
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
- params->vht_capa, link_sta);
+ params->vht_capa, NULL,
+ link_sta);
if (params->he_capa)
ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
@@ -1883,6 +1869,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
sband->band);
}
+ ieee80211_sta_set_rx_nss(link_sta);
+
return ret;
}
@@ -2034,6 +2022,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_sub_if_data *sdata;
int err;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (params->vlan) {
sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
@@ -2077,9 +2067,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
* visible yet), sta_apply_parameters (and inner functions) require
* the mutex due to other paths.
*/
- mutex_lock(&local->sta_mtx);
err = sta_apply_parameters(local, sta, params);
- mutex_unlock(&local->sta_mtx);
if (err) {
sta_info_free(local, sta);
return err;
@@ -2122,13 +2110,11 @@ static int ieee80211_change_station(struct wiphy *wiphy,
enum cfg80211_station_type statype;
int err;
- mutex_lock(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
sta = sta_info_get_bss(sdata, mac);
- if (!sta) {
- err = -ENOENT;
- goto out_err;
- }
+ if (!sta)
+ return -ENOENT;
switch (sdata->vif.type) {
case NL80211_IFTYPE_MESH_POINT:
@@ -2158,22 +2144,19 @@ static int ieee80211_change_station(struct wiphy *wiphy,
statype = CFG80211_STA_AP_CLIENT_UNASSOC;
break;
default:
- err = -EOPNOTSUPP;
- goto out_err;
+ return -EOPNOTSUPP;
}
err = cfg80211_check_station_change(wiphy, params, statype);
if (err)
- goto out_err;
+ return err;
if (params->vlan && params->vlan != sta->sdata->dev) {
vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
if (params->vlan->ieee80211_ptr->use_4addr) {
- if (vlansdata->u.vlan.sta) {
- err = -EBUSY;
- goto out_err;
- }
+ if (vlansdata->u.vlan.sta)
+ return -EBUSY;
rcu_assign_pointer(vlansdata->u.vlan.sta, sta);
__ieee80211_check_fast_rx_iface(vlansdata);
@@ -2199,18 +2182,9 @@ static int ieee80211_change_station(struct wiphy *wiphy,
}
}
- /* we use sta_info_get_bss() so this might be different */
- if (sdata != sta->sdata) {
- mutex_lock_nested(&sta->sdata->wdev.mtx, 1);
- err = sta_apply_parameters(local, sta, params);
- mutex_unlock(&sta->sdata->wdev.mtx);
- } else {
- err = sta_apply_parameters(local, sta, params);
- }
+ err = sta_apply_parameters(local, sta, params);
if (err)
- goto out_err;
-
- mutex_unlock(&local->sta_mtx);
+ return err;
if (sdata->vif.type == NL80211_IFTYPE_STATION &&
params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) {
@@ -2219,9 +2193,6 @@ static int ieee80211_change_station(struct wiphy *wiphy,
}
return 0;
-out_err:
- mutex_unlock(&local->sta_mtx);
- return err;
}
#ifdef CONFIG_MAC80211_MESH
@@ -2587,7 +2558,7 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
* devices that report signal in dBm.
*/
if (!ieee80211_hw_check(&sdata->local->hw, SIGNAL_DBM))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
conf->rssi_threshold = nconf->rssi_threshold;
}
if (_chg_mesh_attr(NL80211_MESHCONF_HT_OPMODE, mask)) {
@@ -2634,6 +2605,8 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
int err;
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
memcpy(&ifmsh->mshcfg, conf, sizeof(struct mesh_config));
err = copy_mesh_setup(ifmsh, setup);
if (err)
@@ -2645,10 +2618,8 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev,
sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
sdata->deflink.needed_rx_chains = sdata->local->rx_chains;
- mutex_lock(&sdata->local->mtx);
err = ieee80211_link_use_channel(&sdata->deflink, &setup->chandef,
IEEE80211_CHANCTX_SHARED);
- mutex_unlock(&sdata->local->mtx);
if (err)
return err;
@@ -2659,11 +2630,11 @@ static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
ieee80211_stop_mesh(sdata);
- mutex_lock(&sdata->local->mtx);
ieee80211_link_release_channel(&sdata->deflink);
kfree(sdata->u.mesh.ie);
- mutex_unlock(&sdata->local->mtx);
return 0;
}
@@ -3021,6 +2992,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
bool update_txp_type = false;
bool has_monitor = false;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (wdev) {
sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
@@ -3068,7 +3041,6 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
break;
}
- mutex_lock(&local->iflist_mtx);
list_for_each_entry(sdata, &local->interfaces, list) {
if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
has_monitor = true;
@@ -3084,7 +3056,6 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
continue;
ieee80211_recalc_txpower(sdata, update_txp_type);
}
- mutex_unlock(&local->iflist_mtx);
if (has_monitor) {
sdata = wiphy_dereference(local->hw.wiphy,
@@ -3117,6 +3088,10 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy,
else
*dbm = sdata->vif.bss_conf.txpower;
+ /* INT_MIN indicates no power level was set yet */
+ if (*dbm == INT_MIN)
+ return -EINVAL;
+
return 0;
}
@@ -3173,14 +3148,24 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta;
bool tdls_peer_found = false;
- lockdep_assert_held(&sdata->wdev.mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION))
return -EINVAL;
+ if (ieee80211_vif_is_mld(&sdata->vif) &&
+ !(sdata->vif.active_links & BIT(link->link_id)))
+ return 0;
+
old_req = link->u.mgd.req_smps;
link->u.mgd.req_smps = smps_mode;
+ /* The driver indicated that EML is enabled for the interface, which
+ * implies that SMPS flows towards the AP should be stopped.
+ */
+ if (sdata->vif.driver_flags & IEEE80211_VIF_EML_ACTIVE)
+ return 0;
+
if (old_req == smps_mode &&
smps_mode != IEEE80211_SMPS_AUTOMATIC)
return 0;
@@ -3194,7 +3179,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
link->conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT)
return 0;
- ap = link->u.mgd.bssid;
+ ap = sdata->vif.cfg.ap_addr;
rcu_read_lock();
list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
@@ -3216,7 +3201,9 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
/* send SM PS frame to AP */
err = ieee80211_send_smps_action(sdata, smps_mode,
- ap, ap);
+ ap, ap,
+ ieee80211_vif_is_mld(&sdata->vif) ?
+ link->link_id : -1);
if (err)
link->u.mgd.req_smps = old_req;
else if (smps_mode != IEEE80211_SMPS_OFF && tdls_peer_found)
@@ -3246,7 +3233,6 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
local->dynamic_ps_forced_timeout = timeout;
/* no change, but if automatic follow powersave */
- sdata_lock(sdata);
for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
struct ieee80211_link_data *link;
@@ -3257,7 +3243,6 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
__ieee80211_request_smps_mgd(sdata, link,
link->u.mgd.req_smps);
}
- sdata_unlock(sdata);
if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
@@ -3403,7 +3388,8 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
struct ieee80211_local *local = sdata->local;
int err;
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!list_empty(&local->roc_list) || local->scanning) {
err = -EBUSY;
goto out_unlock;
@@ -3418,12 +3404,10 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
if (err)
goto out_unlock;
- ieee80211_queue_delayed_work(&sdata->local->hw,
- &sdata->deflink.dfs_cac_timer_work,
- msecs_to_jiffies(cac_time_ms));
+ wiphy_delayed_work_queue(wiphy, &sdata->deflink.dfs_cac_timer_work,
+ msecs_to_jiffies(cac_time_ms));
out_unlock:
- mutex_unlock(&local->mtx);
return err;
}
@@ -3433,20 +3417,21 @@ static void ieee80211_end_cac(struct wiphy *wiphy,
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
list_for_each_entry(sdata, &local->interfaces, list) {
/* it might be waiting for the local->mtx, but then
* by the time it gets it, sdata->wdev.cac_started
* will no longer be true
*/
- cancel_delayed_work(&sdata->deflink.dfs_cac_timer_work);
+ wiphy_delayed_work_cancel(wiphy,
+ &sdata->deflink.dfs_cac_timer_work);
if (sdata->wdev.cac_started) {
ieee80211_link_release_channel(&sdata->deflink);
sdata->wdev.cac_started = false;
}
}
- mutex_unlock(&local->mtx);
}
static struct cfg80211_beacon_data *
@@ -3578,11 +3563,11 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif)
if (iter == sdata || iter->vif.mbssid_tx_vif != vif)
continue;
- ieee80211_queue_work(&iter->local->hw,
- &iter->deflink.csa_finalize_work);
+ wiphy_work_queue(iter->local->hw.wiphy,
+ &iter->deflink.csa_finalize_work);
}
}
- ieee80211_queue_work(&local->hw, &sdata->deflink.csa_finalize_work);
+ wiphy_work_queue(local->hw.wiphy, &sdata->deflink.csa_finalize_work);
rcu_read_unlock();
}
@@ -3638,15 +3623,14 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
return 0;
}
-static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
+static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data)
{
+ struct ieee80211_sub_if_data *sdata = link_data->sdata;
struct ieee80211_local *local = sdata->local;
u64 changed = 0;
int err;
- sdata_assert_lock(sdata);
- lockdep_assert_held(&local->mtx);
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
/*
* using reservation isn't immediate as it may be deferred until later
@@ -3655,20 +3639,20 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
* completed successfully
*/
- if (sdata->deflink.reserved_chanctx) {
+ if (link_data->reserved_chanctx) {
/*
* with multi-vif csa driver may call ieee80211_csa_finish()
* many times while waiting for other interfaces to use their
* reservations
*/
- if (sdata->deflink.reserved_ready)
+ if (link_data->reserved_ready)
return 0;
return ieee80211_link_use_reserved_context(&sdata->deflink);
}
- if (!cfg80211_chandef_identical(&sdata->vif.bss_conf.chandef,
- &sdata->deflink.csa_chandef))
+ if (!cfg80211_chandef_identical(&link_data->conf->chandef,
+ &link_data->csa_chandef))
return -EINVAL;
sdata->vif.bss_conf.csa_active = false;
@@ -3683,57 +3667,53 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
changed |= BSS_CHANGED_EHT_PUNCTURING;
}
- ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed);
+ ieee80211_link_info_change_notify(sdata, link_data, changed);
- if (sdata->deflink.csa_block_tx) {
+ if (link_data->csa_block_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->deflink.csa_block_tx = false;
+ link_data->csa_block_tx = false;
}
- err = drv_post_channel_switch(sdata);
+ err = drv_post_channel_switch(link_data);
if (err)
return err;
- cfg80211_ch_switch_notify(sdata->dev, &sdata->deflink.csa_chandef, 0,
- sdata->vif.bss_conf.eht_puncturing);
+ cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chandef,
+ link_data->link_id,
+ link_data->conf->eht_puncturing);
return 0;
}
-static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
+static void ieee80211_csa_finalize(struct ieee80211_link_data *link_data)
{
- if (__ieee80211_csa_finalize(sdata)) {
+ struct ieee80211_sub_if_data *sdata = link_data->sdata;
+
+ if (__ieee80211_csa_finalize(link_data)) {
sdata_info(sdata, "failed to finalize CSA, disconnecting\n");
cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev,
GFP_KERNEL);
}
}
-void ieee80211_csa_finalize_work(struct work_struct *work)
+void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work)
{
- struct ieee80211_sub_if_data *sdata =
- container_of(work, struct ieee80211_sub_if_data,
- deflink.csa_finalize_work);
+ struct ieee80211_link_data *link =
+ container_of(work, struct ieee80211_link_data, csa_finalize_work);
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
- sdata_lock(sdata);
- mutex_lock(&local->mtx);
- mutex_lock(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
/* AP might have been stopped while waiting for the lock. */
- if (!sdata->vif.bss_conf.csa_active)
- goto unlock;
+ if (!link->conf->csa_active)
+ return;
if (!ieee80211_sdata_running(sdata))
- goto unlock;
-
- ieee80211_csa_finalize(sdata);
+ return;
-unlock:
- mutex_unlock(&local->chanctx_mtx);
- mutex_unlock(&local->mtx);
- sdata_unlock(sdata);
+ ieee80211_csa_finalize(link);
}
static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
@@ -3889,8 +3869,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
u64 changed = 0;
int err;
- sdata_assert_lock(sdata);
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!list_empty(&local->roc_list) || local->scanning)
return -EBUSY;
@@ -3906,9 +3885,8 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
if (sdata->vif.bss_conf.csa_active)
return -EBUSY;
- mutex_lock(&local->chanctx_mtx);
conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (!conf) {
err = -EBUSY;
goto out;
@@ -3978,11 +3956,10 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
drv_channel_switch_beacon(sdata, &params->chandef);
} else {
/* if the beacon didn't change, we can finalize immediately */
- ieee80211_csa_finalize(sdata);
+ ieee80211_csa_finalize(&sdata->deflink);
}
out:
- mutex_unlock(&local->chanctx_mtx);
return err;
}
@@ -3991,18 +3968,15 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
- int err;
- mutex_lock(&local->mtx);
- err = __ieee80211_channel_switch(wiphy, dev, params);
- mutex_unlock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
- return err;
+ return __ieee80211_channel_switch(wiphy, dev, params);
}
u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local)
{
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
local->roc_cookie_counter++;
@@ -4034,7 +4008,8 @@ int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
return -ENOMEM;
}
- IEEE80211_SKB_CB(skb)->ack_frame_id = id;
+ IEEE80211_SKB_CB(skb)->status_data_idr = 1;
+ IEEE80211_SKB_CB(skb)->status_data = id;
*cookie = ieee80211_mgmt_tx_cookie(local);
IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie;
@@ -4084,11 +4059,17 @@ ieee80211_update_mgmt_frame_registrations(struct wiphy *wiphy,
static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
+ int ret;
if (local->started)
return -EOPNOTSUPP;
- return drv_set_antenna(local, tx_ant, rx_ant);
+ ret = drv_set_antenna(local, tx_ant, rx_ant);
+ if (ret)
+ return ret;
+
+ local->rx_chains = hweight8(rx_ant);
+ return 0;
}
static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant)
@@ -4130,7 +4111,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
int ret;
/* the lock is needed to assign the cookie later */
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
rcu_read_lock();
sta = sta_info_get_bss(sdata, peer);
@@ -4201,7 +4182,6 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
ret = 0;
unlock:
rcu_read_unlock();
- mutex_unlock(&local->mtx);
return ret;
}
@@ -4559,7 +4539,8 @@ static int ieee80211_set_tid_config(struct wiphy *wiphy,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct sta_info *sta;
- int ret;
+
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (!sdata->local->ops->set_tid_config)
return -EOPNOTSUPP;
@@ -4567,17 +4548,11 @@ static int ieee80211_set_tid_config(struct wiphy *wiphy,
if (!tid_conf->peer)
return drv_set_tid_config(sdata->local, sdata, NULL, tid_conf);
- mutex_lock(&sdata->local->sta_mtx);
sta = sta_info_get_bss(sdata, tid_conf->peer);
- if (!sta) {
- mutex_unlock(&sdata->local->sta_mtx);
+ if (!sta)
return -ENOENT;
- }
- ret = drv_set_tid_config(sdata->local, sdata, &sta->sta, tid_conf);
- mutex_unlock(&sdata->local->sta_mtx);
-
- return ret;
+ return drv_set_tid_config(sdata->local, sdata, &sta->sta, tid_conf);
}
static int ieee80211_reset_tid_config(struct wiphy *wiphy,
@@ -4586,7 +4561,8 @@ static int ieee80211_reset_tid_config(struct wiphy *wiphy,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct sta_info *sta;
- int ret;
+
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (!sdata->local->ops->reset_tid_config)
return -EOPNOTSUPP;
@@ -4594,17 +4570,11 @@ static int ieee80211_reset_tid_config(struct wiphy *wiphy,
if (!peer)
return drv_reset_tid_config(sdata->local, sdata, NULL, tids);
- mutex_lock(&sdata->local->sta_mtx);
sta = sta_info_get_bss(sdata, peer);
- if (!sta) {
- mutex_unlock(&sdata->local->sta_mtx);
+ if (!sta)
return -ENOENT;
- }
- ret = drv_reset_tid_config(sdata->local, sdata, &sta->sta, tids);
- mutex_unlock(&sdata->local->sta_mtx);
-
- return ret;
+ return drv_reset_tid_config(sdata->local, sdata, &sta->sta, tids);
}
static int ieee80211_set_sar_specs(struct wiphy *wiphy,
@@ -4690,6 +4660,8 @@ static void
ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata,
u8 color, int enable, u64 changed)
{
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
sdata->vif.bss_conf.he_bss_color.color = color;
sdata->vif.bss_conf.he_bss_color.enabled = enable;
changed |= BSS_CHANGED_HE_BSS_COLOR;
@@ -4699,7 +4671,6 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata,
if (!sdata->vif.bss_conf.nontransmitted && sdata->vif.mbssid_tx_vif) {
struct ieee80211_sub_if_data *child;
- mutex_lock(&sdata->local->iflist_mtx);
list_for_each_entry(child, &sdata->local->interfaces, list) {
if (child != sdata && child->vif.mbssid_tx_vif == &sdata->vif) {
child->vif.bss_conf.he_bss_color.color = color;
@@ -4709,7 +4680,6 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata,
BSS_CHANGED_HE_BSS_COLOR);
}
}
- mutex_unlock(&sdata->local->iflist_mtx);
}
}
@@ -4719,8 +4689,7 @@ static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata)
u64 changed = 0;
int err;
- sdata_assert_lock(sdata);
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
sdata->vif.bss_conf.color_change_active = false;
@@ -4738,28 +4707,24 @@ static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata)
return 0;
}
-void ieee80211_color_change_finalize_work(struct work_struct *work)
+void ieee80211_color_change_finalize_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data,
deflink.color_change_finalize_work);
struct ieee80211_local *local = sdata->local;
- sdata_lock(sdata);
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
/* AP might have been stopped while waiting for the lock. */
if (!sdata->vif.bss_conf.color_change_active)
- goto unlock;
+ return;
if (!ieee80211_sdata_running(sdata))
- goto unlock;
+ return;
ieee80211_color_change_finalize(sdata);
-
-unlock:
- mutex_unlock(&local->mtx);
- sdata_unlock(sdata);
}
void ieee80211_color_collision_detection_work(struct work_struct *work)
@@ -4770,17 +4735,15 @@ void ieee80211_color_collision_detection_work(struct work_struct *work)
color_collision_detect_work);
struct ieee80211_sub_if_data *sdata = link->sdata;
- sdata_lock(sdata);
cfg80211_obss_color_collision_notify(sdata->dev, link->color_bitmap);
- sdata_unlock(sdata);
}
void ieee80211_color_change_finish(struct ieee80211_vif *vif)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- ieee80211_queue_work(&sdata->local->hw,
- &sdata->deflink.color_change_finalize_work);
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &sdata->deflink.color_change_finalize_work);
}
EXPORT_SYMBOL_GPL(ieee80211_color_change_finish);
@@ -4816,13 +4779,11 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev,
u64 changed = 0;
int err;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (sdata->vif.bss_conf.nontransmitted)
return -EINVAL;
- mutex_lock(&local->mtx);
-
/* don't allow another color change if one is already active or if csa
* is active
*/
@@ -4847,7 +4808,6 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev,
ieee80211_color_change_finalize(sdata);
out:
- mutex_unlock(&local->mtx);
return err;
}
@@ -4869,16 +4829,13 @@ static int ieee80211_add_intf_link(struct wiphy *wiphy,
unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
- int res;
+
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (wdev->use_4addr)
return -EOPNOTSUPP;
- mutex_lock(&sdata->local->mtx);
- res = ieee80211_vif_set_links(sdata, wdev->valid_links, 0);
- mutex_unlock(&sdata->local->mtx);
-
- return res;
+ return ieee80211_vif_set_links(sdata, wdev->valid_links, 0);
}
static void ieee80211_del_intf_link(struct wiphy *wiphy,
@@ -4887,9 +4844,9 @@ static void ieee80211_del_intf_link(struct wiphy *wiphy,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
- mutex_lock(&sdata->local->mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
ieee80211_vif_set_links(sdata, wdev->valid_links, 0);
- mutex_unlock(&sdata->local->mtx);
}
static int sta_add_link_station(struct ieee80211_local *local,
@@ -4929,13 +4886,10 @@ ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = wiphy_priv(wiphy);
- int ret;
- mutex_lock(&sdata->local->sta_mtx);
- ret = sta_add_link_station(local, sdata, params);
- mutex_unlock(&sdata->local->sta_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
- return ret;
+ return sta_add_link_station(local, sdata, params);
}
static int sta_mod_link_station(struct ieee80211_local *local,
@@ -4960,13 +4914,10 @@ ieee80211_mod_link_station(struct wiphy *wiphy, struct net_device *dev,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = wiphy_priv(wiphy);
- int ret;
- mutex_lock(&sdata->local->sta_mtx);
- ret = sta_mod_link_station(local, sdata, params);
- mutex_unlock(&sdata->local->sta_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
- return ret;
+ return sta_mod_link_station(local, sdata, params);
}
static int sta_del_link_station(struct ieee80211_sub_if_data *sdata,
@@ -4995,13 +4946,10 @@ ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev,
struct link_station_del_parameters *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- int ret;
- mutex_lock(&sdata->local->sta_mtx);
- ret = sta_del_link_station(sdata, params);
- mutex_unlock(&sdata->local->sta_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
- return ret;
+ return sta_del_link_station(sdata, params);
}
static int ieee80211_set_hw_timestamp(struct wiphy *wiphy,
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 68952752b599..ef4c2cebc080 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -18,7 +18,7 @@ static int ieee80211_chanctx_num_assigned(struct ieee80211_local *local,
struct ieee80211_link_data *link;
int num = 0;
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(link, &ctx->assigned_links, assigned_chanctx_list)
num++;
@@ -32,7 +32,7 @@ static int ieee80211_chanctx_num_reserved(struct ieee80211_local *local,
struct ieee80211_link_data *link;
int num = 0;
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list)
num++;
@@ -52,7 +52,7 @@ static int ieee80211_num_chanctx(struct ieee80211_local *local)
struct ieee80211_chanctx *ctx;
int num = 0;
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(ctx, &local->chanctx_list, list)
num++;
@@ -62,7 +62,8 @@ static int ieee80211_num_chanctx(struct ieee80211_local *local)
static bool ieee80211_can_create_new_chanctx(struct ieee80211_local *local)
{
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
return ieee80211_num_chanctx(local) < ieee80211_max_num_channels(local);
}
@@ -73,7 +74,7 @@ ieee80211_link_get_chanctx(struct ieee80211_link_data *link)
struct ieee80211_chanctx_conf *conf;
conf = rcu_dereference_protected(link->conf->chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (!conf)
return NULL;
@@ -87,7 +88,7 @@ ieee80211_chanctx_reserved_chandef(struct ieee80211_local *local,
{
struct ieee80211_link_data *link;
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(link, &ctx->reserved_links,
reserved_chanctx_list) {
@@ -110,7 +111,7 @@ ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local,
{
struct ieee80211_link_data *link;
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(link, &ctx->assigned_links,
assigned_chanctx_list) {
@@ -136,7 +137,7 @@ ieee80211_chanctx_combined_chandef(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
const struct cfg80211_chan_def *compat)
{
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
compat = ieee80211_chanctx_reserved_chandef(local, ctx, compat);
if (!compat)
@@ -154,7 +155,7 @@ ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
const struct cfg80211_chan_def *def)
{
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (ieee80211_chanctx_combined_chandef(local, ctx, def))
return true;
@@ -173,7 +174,7 @@ ieee80211_find_reservation_chanctx(struct ieee80211_local *local,
{
struct ieee80211_chanctx *ctx;
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (mode == IEEE80211_CHANCTX_EXCLUSIVE)
return NULL;
@@ -361,7 +362,7 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
enum nl80211_chan_width max_bw;
struct cfg80211_chan_def min_def;
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
/* don't optimize non-20MHz based and radar_enabled confs */
if (ctx->conf.def.width == NL80211_CHAN_WIDTH_5 ||
@@ -506,11 +507,16 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local,
WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef));
+ ieee80211_remove_wbrf(local, &ctx->conf.def);
+
ctx->conf.def = *chandef;
/* check if min chanctx also changed */
changed = IEEE80211_CHANCTX_CHANGE_WIDTH |
_ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
+
+ ieee80211_add_wbrf(local, &ctx->conf.def);
+
drv_change_chanctx(local, ctx, changed);
if (!local->use_chanctx) {
@@ -537,7 +543,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
{
struct ieee80211_chanctx *ctx;
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (mode == IEEE80211_CHANCTX_EXCLUSIVE)
return NULL;
@@ -572,7 +578,7 @@ bool ieee80211_is_radar_required(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -602,8 +608,7 @@ ieee80211_chanctx_radar_required(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata;
bool required = false;
- lockdep_assert_held(&local->chanctx_mtx);
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -641,7 +646,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local,
{
struct ieee80211_chanctx *ctx;
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
ctx = kzalloc(sizeof(*ctx) + local->hw.chanctx_data_size, GFP_KERNEL);
if (!ctx)
@@ -665,8 +670,9 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local,
u32 changed;
int err;
- lockdep_assert_held(&local->mtx);
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
+ ieee80211_add_wbrf(local, &ctx->conf.def);
if (!local->use_chanctx)
local->hw.conf.radar_enabled = ctx->conf.radar_enabled;
@@ -698,8 +704,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx;
int err;
- lockdep_assert_held(&local->mtx);
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
ctx = ieee80211_alloc_chanctx(local, chandef, mode);
if (!ctx)
@@ -718,7 +723,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local,
static void ieee80211_del_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!local->use_chanctx) {
struct cfg80211_chan_def *chandef = &local->_oper_chandef;
@@ -748,12 +753,14 @@ static void ieee80211_del_chanctx(struct ieee80211_local *local,
}
ieee80211_recalc_idle(local);
+
+ ieee80211_remove_wbrf(local, &ctx->conf.def);
}
static void ieee80211_free_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
WARN_ON_ONCE(ieee80211_chanctx_refcount(local, ctx) != 0);
@@ -770,7 +777,7 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
const struct cfg80211_chan_def *compat = NULL;
struct sta_info *sta;
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -833,9 +840,7 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
{
bool radar_enabled;
- lockdep_assert_held(&local->chanctx_mtx);
- /* for ieee80211_is_radar_required */
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
radar_enabled = ieee80211_chanctx_radar_required(local, chanctx);
@@ -862,10 +867,10 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link,
int ret = 0;
if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
conf = rcu_dereference_protected(link->conf->chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (conf) {
curr_ctx = container_of(conf, struct ieee80211_chanctx, conf);
@@ -920,7 +925,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata;
u8 rx_chains_static, rx_chains_dynamic;
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
rx_chains_static = 1;
rx_chains_dynamic = 1;
@@ -1023,7 +1028,7 @@ __ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP))
return;
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
/* Check that conf exists, even when clearing this function
* must be called with the AP's channel context still there
@@ -1032,7 +1037,7 @@ __ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
* to a channel context that has already been freed.
*/
conf = rcu_dereference_protected(link_conf->chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
WARN_ON(!conf);
if (clear)
@@ -1056,11 +1061,9 @@ void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
{
struct ieee80211_local *local = link->sdata->local;
- mutex_lock(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
__ieee80211_link_copy_chanctx_to_vlans(link, clear);
-
- mutex_unlock(&local->chanctx_mtx);
}
int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link)
@@ -1068,7 +1071,7 @@ int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link)
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_chanctx *ctx = link->reserved_chanctx;
- lockdep_assert_held(&sdata->local->chanctx_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (WARN_ON(!ctx))
return -EINVAL;
@@ -1108,11 +1111,11 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx *new_ctx, *curr_ctx, *ctx;
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
curr_ctx = ieee80211_link_get_chanctx(link);
if (curr_ctx && local->use_chanctx && !local->ops->switch_vif_chanctx)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
new_ctx = ieee80211_find_reservation_chanctx(local, chandef, mode);
if (!new_ctx) {
@@ -1206,8 +1209,8 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link)
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_MESH_POINT:
case NL80211_IFTYPE_OCB:
- ieee80211_queue_work(&sdata->local->hw,
- &link->csa_finalize_work);
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &link->csa_finalize_work);
break;
case NL80211_IFTYPE_STATION:
wiphy_delayed_work_queue(sdata->local->hw.wiphy,
@@ -1265,8 +1268,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
u64 changed = 0;
int err;
- lockdep_assert_held(&local->mtx);
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
new_ctx = link->reserved_chanctx;
old_ctx = ieee80211_link_get_chanctx(link);
@@ -1390,7 +1392,7 @@ ieee80211_link_has_in_place_reservation(struct ieee80211_link_data *link)
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_chanctx *old_ctx, *new_ctx;
- lockdep_assert_held(&sdata->local->chanctx_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
new_ctx = link->reserved_chanctx;
old_ctx = ieee80211_link_get_chanctx(link);
@@ -1415,8 +1417,7 @@ static int ieee80211_chsw_switch_hwconf(struct ieee80211_local *local,
{
const struct cfg80211_chan_def *chandef;
- lockdep_assert_held(&local->mtx);
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
chandef = ieee80211_chanctx_reserved_chandef(local, new_ctx, NULL);
if (WARN_ON(!chandef))
@@ -1437,8 +1438,7 @@ static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx, *old_ctx;
int i, err;
- lockdep_assert_held(&local->mtx);
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
vif_chsw = kcalloc(n_vifs, sizeof(vif_chsw[0]), GFP_KERNEL);
if (!vif_chsw)
@@ -1482,8 +1482,7 @@ static int ieee80211_chsw_switch_ctxs(struct ieee80211_local *local)
struct ieee80211_chanctx *ctx;
int err;
- lockdep_assert_held(&local->mtx);
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(ctx, &local->chanctx_list, list) {
if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
@@ -1523,8 +1522,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
int err, n_assigned, n_reserved, n_ready;
int n_ctx = 0, n_vifs_switch = 0, n_vifs_assign = 0, n_vifs_ctxless = 0;
- lockdep_assert_held(&local->mtx);
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
/*
* If there are 2 independent pairs of channel contexts performing
@@ -1783,10 +1781,10 @@ static void __ieee80211_link_release_channel(struct ieee80211_link_data *link)
struct ieee80211_chanctx *ctx;
bool use_reserved_switch = false;
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
conf = rcu_dereference_protected(link_conf->chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (!conf)
return;
@@ -1821,7 +1819,7 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link,
u8 radar_detect_width = 0;
int ret;
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (sdata->vif.active_links &&
!(sdata->vif.active_links & BIT(link->link_id))) {
@@ -1829,8 +1827,6 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link,
return 0;
}
- mutex_lock(&local->chanctx_mtx);
-
ret = cfg80211_chandef_dfs_required(local->hw.wiphy,
chandef,
sdata->wdev.iftype);
@@ -1872,7 +1868,6 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link,
if (ret)
link->radar_required = false;
- mutex_unlock(&local->chanctx_mtx);
return ret;
}
@@ -1884,8 +1879,7 @@ int ieee80211_link_use_reserved_context(struct ieee80211_link_data *link)
struct ieee80211_chanctx *old_ctx;
int err;
- lockdep_assert_held(&local->mtx);
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
new_ctx = link->reserved_chanctx;
old_ctx = ieee80211_link_get_chanctx(link);
@@ -1948,51 +1942,40 @@ int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link,
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *ctx;
const struct cfg80211_chan_def *compat;
- int ret;
+
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
IEEE80211_CHAN_DISABLED))
return -EINVAL;
- mutex_lock(&local->chanctx_mtx);
- if (cfg80211_chandef_identical(chandef, &link_conf->chandef)) {
- ret = 0;
- goto out;
- }
+ if (cfg80211_chandef_identical(chandef, &link_conf->chandef))
+ return 0;
if (chandef->width == NL80211_CHAN_WIDTH_20_NOHT ||
- link_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) {
- ret = -EINVAL;
- goto out;
- }
+ link_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT)
+ return -EINVAL;
conf = rcu_dereference_protected(link_conf->chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
- if (!conf) {
- ret = -EINVAL;
- goto out;
- }
+ lockdep_is_held(&local->hw.wiphy->mtx));
+ if (!conf)
+ return -EINVAL;
ctx = container_of(conf, struct ieee80211_chanctx, conf);
compat = cfg80211_chandef_compatible(&conf->def, chandef);
- if (!compat) {
- ret = -EINVAL;
- goto out;
- }
+ if (!compat)
+ return -EINVAL;
switch (ctx->replace_state) {
case IEEE80211_CHANCTX_REPLACE_NONE:
- if (!ieee80211_chanctx_reserved_chandef(local, ctx, compat)) {
- ret = -EBUSY;
- goto out;
- }
+ if (!ieee80211_chanctx_reserved_chandef(local, ctx, compat))
+ return -EBUSY;
break;
case IEEE80211_CHANCTX_WILL_BE_REPLACED:
/* TODO: Perhaps the bandwidth change could be treated as a
* reservation itself? */
- ret = -EBUSY;
- goto out;
+ return -EBUSY;
case IEEE80211_CHANCTX_REPLACES_OTHER:
/* channel context that is going to replace another channel
* context doesn't really exist and shouldn't be assigned
@@ -2006,22 +1989,17 @@ int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link,
ieee80211_recalc_chanctx_chantype(local, ctx);
*changed |= BSS_CHANGED_BANDWIDTH;
- ret = 0;
- out:
- mutex_unlock(&local->chanctx_mtx);
- return ret;
+ return 0;
}
void ieee80211_link_release_channel(struct ieee80211_link_data *link)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
- mutex_lock(&sdata->local->chanctx_mtx);
- if (rcu_access_pointer(link->conf->chanctx_conf)) {
- lockdep_assert_held(&sdata->local->mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
+ if (rcu_access_pointer(link->conf->chanctx_conf))
__ieee80211_link_release_channel(link);
- }
- mutex_unlock(&sdata->local->chanctx_mtx);
}
void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link)
@@ -2034,20 +2012,19 @@ void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link)
struct ieee80211_sub_if_data *ap;
struct ieee80211_chanctx_conf *conf;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->bss))
return;
ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap);
- mutex_lock(&local->chanctx_mtx);
-
rcu_read_lock();
ap_conf = rcu_dereference(ap->vif.link_conf[link_id]);
conf = rcu_dereference_protected(ap_conf->chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
rcu_assign_pointer(link_conf->chanctx_conf, conf);
rcu_read_unlock();
- mutex_unlock(&local->chanctx_mtx);
}
void ieee80211_iter_chan_contexts_atomic(
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 207f772bd8ce..74be49191e70 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -4,7 +4,7 @@
*
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018 - 2019, 2021-2022 Intel Corporation
+ * Copyright (C) 2018 - 2019, 2021-2023 Intel Corporation
*/
#include <linux/debugfs.h>
@@ -288,10 +288,10 @@ static ssize_t aql_txq_limit_write(struct file *file,
q_limit_low_old = local->aql_txq_limit_low[ac];
q_limit_high_old = local->aql_txq_limit_high[ac];
+ wiphy_lock(local->hw.wiphy);
local->aql_txq_limit_low[ac] = q_limit_low;
local->aql_txq_limit_high[ac] = q_limit_high;
- mutex_lock(&local->sta_mtx);
list_for_each_entry(sta, &local->sta_list, list) {
/* If a sta has customized queue limits, keep it */
if (sta->airtime[ac].aql_limit_low == q_limit_low_old &&
@@ -300,7 +300,8 @@ static ssize_t aql_txq_limit_write(struct file *file,
sta->airtime[ac].aql_limit_high = q_limit_high;
}
}
- mutex_unlock(&local->sta_mtx);
+ wiphy_unlock(local->hw.wiphy);
+
return count;
}
@@ -496,6 +497,7 @@ static const char *hw_flag_names[] = {
FLAG(SUPPORTS_CONC_MON_RX_DECAP),
FLAG(DETECTS_COLOR_COLLISION),
FLAG(MLO_MCAST_MULTI_LINK_TX),
+ FLAG(DISALLOW_PUNCTURING),
#undef FLAG
};
@@ -594,9 +596,9 @@ static ssize_t format_devstat_counter(struct ieee80211_local *local,
char buf[20];
int res;
- rtnl_lock();
+ wiphy_lock(local->hw.wiphy);
res = drv_get_stats(local, &stats);
- rtnl_unlock();
+ wiphy_unlock(local->hw.wiphy);
if (res)
return res;
res = printvalue(&stats, buf, sizeof(buf));
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 16a04330e7dc..7e54da508765 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -4,7 +4,7 @@
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright (C) 2015 Intel Deutschland GmbH
- * Copyright (C) 2021-2022 Intel Corporation
+ * Copyright (C) 2021-2023 Intel Corporation
*/
#include <linux/kobject.h>
@@ -378,14 +378,14 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
if (!sdata->vif.debugfs_dir)
return;
- lockdep_assert_held(&sdata->local->key_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
debugfs_remove(sdata->debugfs.default_unicast_key);
sdata->debugfs.default_unicast_key = NULL;
if (sdata->default_unicast_key) {
- key = key_mtx_dereference(sdata->local,
- sdata->default_unicast_key);
+ key = wiphy_dereference(sdata->local->hw.wiphy,
+ sdata->default_unicast_key);
sprintf(buf, "../keys/%d", key->debugfs.cnt);
sdata->debugfs.default_unicast_key =
debugfs_create_symlink("default_unicast_key",
@@ -396,8 +396,8 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
sdata->debugfs.default_multicast_key = NULL;
if (sdata->deflink.default_multicast_key) {
- key = key_mtx_dereference(sdata->local,
- sdata->deflink.default_multicast_key);
+ key = wiphy_dereference(sdata->local->hw.wiphy,
+ sdata->deflink.default_multicast_key);
sprintf(buf, "../keys/%d", key->debugfs.cnt);
sdata->debugfs.default_multicast_key =
debugfs_create_symlink("default_multicast_key",
@@ -413,8 +413,8 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
if (!sdata->vif.debugfs_dir)
return;
- key = key_mtx_dereference(sdata->local,
- sdata->deflink.default_mgmt_key);
+ key = wiphy_dereference(sdata->local->hw.wiphy,
+ sdata->deflink.default_mgmt_key);
if (key) {
sprintf(buf, "../keys/%d", key->debugfs.cnt);
sdata->debugfs.default_mgmt_key =
@@ -442,8 +442,8 @@ ieee80211_debugfs_key_add_beacon_default(struct ieee80211_sub_if_data *sdata)
if (!sdata->vif.debugfs_dir)
return;
- key = key_mtx_dereference(sdata->local,
- sdata->deflink.default_beacon_key);
+ key = wiphy_dereference(sdata->local->hw.wiphy,
+ sdata->deflink.default_beacon_key);
if (key) {
sprintf(buf, "../keys/%d", key->debugfs.cnt);
sdata->debugfs.default_beacon_key =
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 63250286dc8b..68596ef78b15 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -22,46 +22,148 @@
#include "debugfs_netdev.h"
#include "driver-ops.h"
-static ssize_t ieee80211_if_read(
- void *data,
+struct ieee80211_if_read_sdata_data {
+ ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int);
+ struct ieee80211_sub_if_data *sdata;
+};
+
+static ssize_t ieee80211_if_read_sdata_handler(struct wiphy *wiphy,
+ struct file *file,
+ char *buf,
+ size_t bufsize,
+ void *data)
+{
+ struct ieee80211_if_read_sdata_data *d = data;
+
+ return d->format(d->sdata, buf, bufsize);
+}
+
+static ssize_t ieee80211_if_read_sdata(
+ struct file *file,
char __user *userbuf,
size_t count, loff_t *ppos,
- ssize_t (*format)(const void *, char *, int))
+ ssize_t (*format)(const struct ieee80211_sub_if_data *sdata, char *, int))
{
+ struct ieee80211_sub_if_data *sdata = file->private_data;
+ struct ieee80211_if_read_sdata_data data = {
+ .format = format,
+ .sdata = sdata,
+ };
char buf[200];
- ssize_t ret = -EINVAL;
- read_lock(&dev_base_lock);
- ret = (*format)(data, buf, sizeof(buf));
- read_unlock(&dev_base_lock);
+ return wiphy_locked_debugfs_read(sdata->local->hw.wiphy,
+ file, buf, sizeof(buf),
+ userbuf, count, ppos,
+ ieee80211_if_read_sdata_handler,
+ &data);
+}
+
+struct ieee80211_if_write_sdata_data {
+ ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int);
+ struct ieee80211_sub_if_data *sdata;
+};
- if (ret >= 0)
- ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret);
+static ssize_t ieee80211_if_write_sdata_handler(struct wiphy *wiphy,
+ struct file *file,
+ char *buf,
+ size_t count,
+ void *data)
+{
+ struct ieee80211_if_write_sdata_data *d = data;
- return ret;
+ return d->write(d->sdata, buf, count);
}
-static ssize_t ieee80211_if_write(
- void *data,
+static ssize_t ieee80211_if_write_sdata(
+ struct file *file,
const char __user *userbuf,
size_t count, loff_t *ppos,
- ssize_t (*write)(void *, const char *, int))
+ ssize_t (*write)(struct ieee80211_sub_if_data *sdata, const char *, int))
{
+ struct ieee80211_sub_if_data *sdata = file->private_data;
+ struct ieee80211_if_write_sdata_data data = {
+ .write = write,
+ .sdata = sdata,
+ };
char buf[64];
- ssize_t ret;
- if (count >= sizeof(buf))
- return -E2BIG;
+ return wiphy_locked_debugfs_write(sdata->local->hw.wiphy,
+ file, buf, sizeof(buf),
+ userbuf, count,
+ ieee80211_if_write_sdata_handler,
+ &data);
+}
+
+struct ieee80211_if_read_link_data {
+ ssize_t (*format)(const struct ieee80211_link_data *, char *, int);
+ struct ieee80211_link_data *link;
+};
+
+static ssize_t ieee80211_if_read_link_handler(struct wiphy *wiphy,
+ struct file *file,
+ char *buf,
+ size_t bufsize,
+ void *data)
+{
+ struct ieee80211_if_read_link_data *d = data;
+
+ return d->format(d->link, buf, bufsize);
+}
+
+static ssize_t ieee80211_if_read_link(
+ struct file *file,
+ char __user *userbuf,
+ size_t count, loff_t *ppos,
+ ssize_t (*format)(const struct ieee80211_link_data *link, char *, int))
+{
+ struct ieee80211_link_data *link = file->private_data;
+ struct ieee80211_if_read_link_data data = {
+ .format = format,
+ .link = link,
+ };
+ char buf[200];
+
+ return wiphy_locked_debugfs_read(link->sdata->local->hw.wiphy,
+ file, buf, sizeof(buf),
+ userbuf, count, ppos,
+ ieee80211_if_read_link_handler,
+ &data);
+}
+
+struct ieee80211_if_write_link_data {
+ ssize_t (*write)(struct ieee80211_link_data *, const char *, int);
+ struct ieee80211_link_data *link;
+};
+
+static ssize_t ieee80211_if_write_link_handler(struct wiphy *wiphy,
+ struct file *file,
+ char *buf,
+ size_t count,
+ void *data)
+{
+ struct ieee80211_if_write_sdata_data *d = data;
- if (copy_from_user(buf, userbuf, count))
- return -EFAULT;
- buf[count] = '\0';
+ return d->write(d->sdata, buf, count);
+}
- rtnl_lock();
- ret = (*write)(data, buf, count);
- rtnl_unlock();
+static ssize_t ieee80211_if_write_link(
+ struct file *file,
+ const char __user *userbuf,
+ size_t count, loff_t *ppos,
+ ssize_t (*write)(struct ieee80211_link_data *link, const char *, int))
+{
+ struct ieee80211_link_data *link = file->private_data;
+ struct ieee80211_if_write_link_data data = {
+ .write = write,
+ .link = link,
+ };
+ char buf[64];
- return ret;
+ return wiphy_locked_debugfs_write(link->sdata->local->hw.wiphy,
+ file, buf, sizeof(buf),
+ userbuf, count,
+ ieee80211_if_write_link_handler,
+ &data);
}
#define IEEE80211_IF_FMT(name, type, field, format_string) \
@@ -126,41 +228,37 @@ static const struct file_operations name##_ops = { \
.llseek = generic_file_llseek, \
}
-#define _IEEE80211_IF_FILE_R_FN(name, type) \
+#define _IEEE80211_IF_FILE_R_FN(name) \
static ssize_t ieee80211_if_read_##name(struct file *file, \
char __user *userbuf, \
size_t count, loff_t *ppos) \
{ \
- ssize_t (*fn)(const void *, char *, int) = (void *) \
- ((ssize_t (*)(const type, char *, int)) \
- ieee80211_if_fmt_##name); \
- return ieee80211_if_read(file->private_data, \
- userbuf, count, ppos, fn); \
+ return ieee80211_if_read_sdata(file, \
+ userbuf, count, ppos, \
+ ieee80211_if_fmt_##name); \
}
-#define _IEEE80211_IF_FILE_W_FN(name, type) \
+#define _IEEE80211_IF_FILE_W_FN(name) \
static ssize_t ieee80211_if_write_##name(struct file *file, \
const char __user *userbuf, \
size_t count, loff_t *ppos) \
{ \
- ssize_t (*fn)(void *, const char *, int) = (void *) \
- ((ssize_t (*)(type, const char *, int)) \
- ieee80211_if_parse_##name); \
- return ieee80211_if_write(file->private_data, userbuf, count, \
- ppos, fn); \
+ return ieee80211_if_write_sdata(file, userbuf, \
+ count, ppos, \
+ ieee80211_if_parse_##name); \
}
#define IEEE80211_IF_FILE_R(name) \
- _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_sub_if_data *) \
+ _IEEE80211_IF_FILE_R_FN(name) \
_IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, NULL)
#define IEEE80211_IF_FILE_W(name) \
- _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_sub_if_data *) \
+ _IEEE80211_IF_FILE_W_FN(name) \
_IEEE80211_IF_FILE_OPS(name, NULL, ieee80211_if_write_##name)
#define IEEE80211_IF_FILE_RW(name) \
- _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_sub_if_data *) \
- _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_sub_if_data *) \
+ _IEEE80211_IF_FILE_R_FN(name) \
+ _IEEE80211_IF_FILE_W_FN(name) \
_IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, \
ieee80211_if_write_##name)
@@ -168,18 +266,37 @@ static ssize_t ieee80211_if_write_##name(struct file *file, \
IEEE80211_IF_FMT_##format(name, struct ieee80211_sub_if_data, field) \
IEEE80211_IF_FILE_R(name)
-/* Same but with a link_ prefix in the ops variable name and different type */
+#define _IEEE80211_IF_LINK_R_FN(name) \
+static ssize_t ieee80211_if_read_##name(struct file *file, \
+ char __user *userbuf, \
+ size_t count, loff_t *ppos) \
+{ \
+ return ieee80211_if_read_link(file, \
+ userbuf, count, ppos, \
+ ieee80211_if_fmt_##name); \
+}
+
+#define _IEEE80211_IF_LINK_W_FN(name) \
+static ssize_t ieee80211_if_write_##name(struct file *file, \
+ const char __user *userbuf, \
+ size_t count, loff_t *ppos) \
+{ \
+ return ieee80211_if_write_link(file, userbuf, \
+ count, ppos, \
+ ieee80211_if_parse_##name); \
+}
+
#define IEEE80211_IF_LINK_FILE_R(name) \
- _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_link_data *) \
+ _IEEE80211_IF_LINK_R_FN(name) \
_IEEE80211_IF_FILE_OPS(link_##name, ieee80211_if_read_##name, NULL)
#define IEEE80211_IF_LINK_FILE_W(name) \
- _IEEE80211_IF_FILE_W_FN(name) \
+ _IEEE80211_IF_LINK_W_FN(name) \
_IEEE80211_IF_FILE_OPS(link_##name, NULL, ieee80211_if_write_##name)
#define IEEE80211_IF_LINK_FILE_RW(name) \
- _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_link_data *) \
- _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_link_data *) \
+ _IEEE80211_IF_LINK_R_FN(name) \
+ _IEEE80211_IF_LINK_W_FN(name) \
_IEEE80211_IF_FILE_OPS(link_##name, ieee80211_if_read_##name, \
ieee80211_if_write_##name)
@@ -265,9 +382,11 @@ static int ieee80211_set_smps(struct ieee80211_link_data *link,
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
- int err;
- if (sdata->vif.driver_flags & IEEE80211_VIF_DISABLE_SMPS_OVERRIDE)
+ /* The driver indicated that EML is enabled for the interface, thus do
+ * not allow to override the SMPS state.
+ */
+ if (sdata->vif.driver_flags & IEEE80211_VIF_EML_ACTIVE)
return -EOPNOTSUPP;
if (!(local->hw.wiphy->features & NL80211_FEATURE_STATIC_SMPS) &&
@@ -283,11 +402,7 @@ static int ieee80211_set_smps(struct ieee80211_link_data *link,
if (sdata->vif.type != NL80211_IFTYPE_STATION)
return -EOPNOTSUPP;
- sdata_lock(sdata);
- err = __ieee80211_request_smps_mgd(link->sdata, link, smps_mode);
- sdata_unlock(sdata);
-
- return err;
+ return __ieee80211_request_smps_mgd(link->sdata, link, smps_mode);
}
static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = {
@@ -359,16 +474,13 @@ static ssize_t ieee80211_if_parse_tkip_mic_test(
case NL80211_IFTYPE_STATION:
fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
/* BSSID SA DA */
- sdata_lock(sdata);
if (!sdata->u.mgd.associated) {
- sdata_unlock(sdata);
dev_kfree_skb(skb);
return -ENOTCONN;
}
memcpy(hdr->addr1, sdata->deflink.u.mgd.bssid, ETH_ALEN);
memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
memcpy(hdr->addr3, addr, ETH_ALEN);
- sdata_unlock(sdata);
break;
default:
dev_kfree_skb(skb);
@@ -885,18 +997,20 @@ static void add_link_files(struct ieee80211_link_data *link,
}
}
-void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
+static void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
+ bool mld_vif)
{
char buf[10+IFNAMSIZ];
sprintf(buf, "netdev:%s", sdata->name);
sdata->vif.debugfs_dir = debugfs_create_dir(buf,
sdata->local->hw.wiphy->debugfsdir);
+ /* deflink also has this */
+ sdata->deflink.debugfs_dir = sdata->vif.debugfs_dir;
sdata->debugfs.subdir_stations = debugfs_create_dir("stations",
sdata->vif.debugfs_dir);
add_files(sdata);
-
- if (!(sdata->local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO))
+ if (!mld_vif)
add_link_files(&sdata->deflink, sdata->vif.debugfs_dir);
}
@@ -924,11 +1038,24 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
debugfs_rename(dir->d_parent, dir, dir->d_parent, buf);
}
+void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata,
+ bool mld_vif)
+{
+ ieee80211_debugfs_remove_netdev(sdata);
+ ieee80211_debugfs_add_netdev(sdata, mld_vif);
+
+ if (sdata->flags & IEEE80211_SDATA_IN_DRIVER) {
+ drv_vif_add_debugfs(sdata->local, sdata);
+ if (!mld_vif)
+ ieee80211_link_debugfs_drv_add(&sdata->deflink);
+ }
+}
+
void ieee80211_link_debugfs_add(struct ieee80211_link_data *link)
{
char link_dir_name[10];
- if (WARN_ON(!link->sdata->vif.debugfs_dir))
+ if (WARN_ON(!link->sdata->vif.debugfs_dir || link->debugfs_dir))
return;
/* For now, this should not be called for non-MLO capable drivers */
@@ -965,7 +1092,8 @@ void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link)
void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link)
{
- if (WARN_ON(!link->debugfs_dir))
+ if (link->sdata->vif.type == NL80211_IFTYPE_MONITOR ||
+ WARN_ON(!link->debugfs_dir))
return;
drv_link_add_debugfs(link->sdata->local, link->sdata,
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
index 99e688dcabd6..a02ec0a413f6 100644
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h
@@ -1,4 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Portions:
+ * Copyright (C) 2023 Intel Corporation
+ */
/* routines exported for debugfs handling */
#ifndef __IEEE80211_DEBUGFS_NETDEV_H
@@ -7,9 +11,10 @@
#include "ieee80211_i.h"
#ifdef CONFIG_MAC80211_DEBUGFS
-void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata);
void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata);
void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata);
+void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata,
+ bool mld_vif);
void ieee80211_link_debugfs_add(struct ieee80211_link_data *link);
void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link);
@@ -17,16 +22,15 @@ void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link);
void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link);
void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link);
#else
-static inline void ieee80211_debugfs_add_netdev(
- struct ieee80211_sub_if_data *sdata)
-{}
static inline void ieee80211_debugfs_remove_netdev(
struct ieee80211_sub_if_data *sdata)
{}
static inline void ieee80211_debugfs_rename_netdev(
struct ieee80211_sub_if_data *sdata)
{}
-
+static inline void ieee80211_debugfs_recreate_netdev(
+ struct ieee80211_sub_if_data *sdata, bool mld_vif)
+{}
static inline void ieee80211_link_debugfs_add(struct ieee80211_link_data *link)
{}
static inline void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link)
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 5a97fb248c85..1e9389c49a57 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -5,7 +5,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2022 Intel Corporation
+ * Copyright (C) 2018 - 2023 Intel Corporation
*/
#include <linux/debugfs.h>
@@ -16,7 +16,7 @@
#include "sta_info.h"
#include "driver-ops.h"
-/* sta attributtes */
+/* sta attributes */
#define STA_READ(name, field, format_string) \
static ssize_t sta_ ##name## _read(struct file *file, \
@@ -312,23 +312,14 @@ static ssize_t sta_aql_write(struct file *file, const char __user *userbuf,
STA_OPS_RW(aql);
-static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
- size_t count, loff_t *ppos)
+static ssize_t sta_agg_status_do_read(struct wiphy *wiphy, struct file *file,
+ char *buf, size_t bufsz, void *data)
{
- char *buf, *p;
- ssize_t bufsz = 71 + IEEE80211_NUM_TIDS * 40;
+ struct sta_info *sta = data;
+ char *p = buf;
int i;
- struct sta_info *sta = file->private_data;
struct tid_ampdu_rx *tid_rx;
struct tid_ampdu_tx *tid_tx;
- ssize_t ret;
-
- buf = kzalloc(bufsz, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- p = buf;
-
- rcu_read_lock();
p += scnprintf(p, bufsz + buf - p, "next dialog_token: %#02x\n",
sta->ampdu_mlme.dialog_token_allocator + 1);
@@ -338,8 +329,8 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
bool tid_rx_valid;
- tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]);
- tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]);
+ tid_rx = wiphy_dereference(wiphy, sta->ampdu_mlme.tid_rx[i]);
+ tid_tx = wiphy_dereference(wiphy, sta->ampdu_mlme.tid_tx[i]);
tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid);
p += scnprintf(p, bufsz + buf - p, "%02d", i);
@@ -358,31 +349,39 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
tid_tx ? skb_queue_len(&tid_tx->pending) : 0);
p += scnprintf(p, bufsz + buf - p, "\n");
}
- rcu_read_unlock();
- ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+ return p - buf;
+}
+
+static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct sta_info *sta = file->private_data;
+ struct wiphy *wiphy = sta->local->hw.wiphy;
+ size_t bufsz = 71 + IEEE80211_NUM_TIDS * 40;
+ char *buf = kmalloc(bufsz, GFP_KERNEL);
+ ssize_t ret;
+
+ if (!buf)
+ return -ENOMEM;
+
+ ret = wiphy_locked_debugfs_read(wiphy, file, buf, bufsz,
+ userbuf, count, ppos,
+ sta_agg_status_do_read, sta);
kfree(buf);
+
return ret;
}
-static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf,
- size_t count, loff_t *ppos)
+static ssize_t sta_agg_status_do_write(struct wiphy *wiphy, struct file *file,
+ char *buf, size_t count, void *data)
{
- char _buf[25] = {}, *buf = _buf;
- struct sta_info *sta = file->private_data;
+ struct sta_info *sta = data;
bool start, tx;
unsigned long tid;
- char *pos;
+ char *pos = buf;
int ret, timeout = 5000;
- if (count > sizeof(_buf))
- return -EINVAL;
-
- if (copy_from_user(buf, userbuf, count))
- return -EFAULT;
-
- buf[sizeof(_buf) - 1] = '\0';
- pos = buf;
buf = strsep(&pos, " ");
if (!buf)
return -EINVAL;
@@ -434,6 +433,19 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu
return ret ?: count;
}
+
+static ssize_t sta_agg_status_write(struct file *file,
+ const char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct sta_info *sta = file->private_data;
+ struct wiphy *wiphy = sta->local->hw.wiphy;
+ char _buf[26];
+
+ return wiphy_locked_debugfs_write(wiphy, file, _buf, sizeof(_buf),
+ userbuf, count,
+ sta_agg_status_do_write, sta);
+}
STA_OPS_RW(agg_status);
/* link sta attributes */
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index 30cd0c905a24..3b7f70073fc3 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2015 Intel Deutschland GmbH
- * Copyright (C) 2022 Intel Corporation
+ * Copyright (C) 2022-2023 Intel Corporation
*/
#include <net/mac80211.h>
#include "ieee80211_i.h"
@@ -15,6 +15,7 @@ int drv_start(struct ieee80211_local *local)
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (WARN_ON(local->started))
return -EALREADY;
@@ -35,6 +36,7 @@ int drv_start(struct ieee80211_local *local)
void drv_stop(struct ieee80211_local *local)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (WARN_ON(!local->started))
return;
@@ -58,6 +60,7 @@ int drv_add_interface(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
(sdata->vif.type == NL80211_IFTYPE_MONITOR &&
@@ -69,10 +72,18 @@ int drv_add_interface(struct ieee80211_local *local,
ret = local->ops->add_interface(&local->hw, &sdata->vif);
trace_drv_return_int(local, ret);
- if (ret == 0)
+ if (ret)
+ return ret;
+
+ if (!(sdata->flags & IEEE80211_SDATA_IN_DRIVER)) {
sdata->flags |= IEEE80211_SDATA_IN_DRIVER;
- return ret;
+ drv_vif_add_debugfs(local, sdata);
+ /* initially vif is not MLD */
+ ieee80211_link_debugfs_drv_add(&sdata->deflink);
+ }
+
+ return 0;
}
int drv_change_interface(struct ieee80211_local *local,
@@ -82,6 +93,7 @@ int drv_change_interface(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -96,13 +108,18 @@ void drv_remove_interface(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
+ sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER;
+
+ /* Remove driver debugfs entries */
+ ieee80211_debugfs_recreate_netdev(sdata, sdata->vif.valid_links);
+
trace_drv_remove_interface(local, sdata);
local->ops->remove_interface(&local->hw, &sdata->vif);
- sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER;
trace_drv_return_void(local);
}
@@ -116,6 +133,7 @@ int drv_sta_state(struct ieee80211_local *local,
int ret = 0;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
@@ -149,6 +167,7 @@ int drv_sta_set_txpwr(struct ieee80211_local *local,
int ret = -EOPNOTSUPP;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
@@ -190,6 +209,7 @@ int drv_conf_tx(struct ieee80211_local *local,
int ret = -EOPNOTSUPP;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -223,6 +243,7 @@ u64 drv_get_tsf(struct ieee80211_local *local,
u64 ret = -1ULL;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return ret;
@@ -239,6 +260,7 @@ void drv_set_tsf(struct ieee80211_local *local,
u64 tsf)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
@@ -254,6 +276,7 @@ void drv_offset_tsf(struct ieee80211_local *local,
s64 offset)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
@@ -268,6 +291,7 @@ void drv_reset_tsf(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
@@ -285,7 +309,9 @@ int drv_assign_vif_chanctx(struct ieee80211_local *local,
{
int ret = 0;
- drv_verify_link_exists(sdata, link_conf);
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -312,8 +338,8 @@ void drv_unassign_vif_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
- drv_verify_link_exists(sdata, link_conf);
if (!check_sdata_in_driver(sdata))
return;
@@ -340,6 +366,7 @@ int drv_switch_vif_chanctx(struct ieee80211_local *local,
int i;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!local->ops->switch_vif_chanctx)
return -EOPNOTSUPP;
@@ -392,9 +419,7 @@ int drv_ampdu_action(struct ieee80211_local *local,
int ret = -EOPNOTSUPP;
might_sleep();
-
- if (!sdata)
- return -EIO;
+ lockdep_assert_wiphy(local->hw.wiphy);
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
@@ -416,6 +441,7 @@ void drv_link_info_changed(struct ieee80211_local *local,
int link_id, u64 changed)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON |
BSS_CHANGED_BEACON_ENABLED) &&
@@ -458,6 +484,7 @@ int drv_set_key(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
@@ -485,6 +512,7 @@ int drv_change_vif_links(struct ieee80211_local *local,
int ret = -EOPNOTSUPP;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -510,10 +538,13 @@ int drv_change_vif_links(struct ieee80211_local *local,
if (ret)
return ret;
- for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) {
- link = rcu_access_pointer(sdata->link[link_id]);
+ if (!local->in_reconfig && !local->resuming) {
+ for_each_set_bit(link_id, &links_to_add,
+ IEEE80211_MLD_MAX_NUM_LINKS) {
+ link = rcu_access_pointer(sdata->link[link_id]);
- ieee80211_link_debugfs_drv_add(link);
+ ieee80211_link_debugfs_drv_add(link);
+ }
}
return 0;
@@ -532,6 +563,7 @@ int drv_change_sta_links(struct ieee80211_local *local,
int ret = -EOPNOTSUPP;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -547,7 +579,7 @@ int drv_change_sta_links(struct ieee80211_local *local,
for_each_set_bit(link_id, &links_to_rem, IEEE80211_MLD_MAX_NUM_LINKS) {
link_sta = rcu_dereference_protected(info->link[link_id],
- lockdep_is_held(&local->sta_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
ieee80211_link_sta_debugfs_drv_remove(link_sta);
}
@@ -561,9 +593,13 @@ int drv_change_sta_links(struct ieee80211_local *local,
if (ret)
return ret;
+ /* during reconfig don't add it to debugfs again */
+ if (local->in_reconfig || local->resuming)
+ return 0;
+
for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) {
link_sta = rcu_dereference_protected(info->link[link_id],
- lockdep_is_held(&local->sta_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
ieee80211_link_sta_debugfs_drv_add(link_sta);
}
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index c4505593ba7a..eb482fb8c3af 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -23,7 +23,7 @@
static inline struct ieee80211_sub_if_data *
get_bss_sdata(struct ieee80211_sub_if_data *sdata)
{
- if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ if (sdata && sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
sdata = container_of(sdata->bss, struct ieee80211_sub_if_data,
u.ap);
@@ -40,6 +40,9 @@ static inline void drv_tx(struct ieee80211_local *local,
static inline void drv_sync_rx_queues(struct ieee80211_local *local,
struct sta_info *sta)
{
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (local->ops->sync_rx_queues) {
trace_drv_sync_rx_queues(local, sta->sdata, &sta->sta);
local->ops->sync_rx_queues(&local->hw);
@@ -94,6 +97,7 @@ static inline int drv_suspend(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_suspend(local);
ret = local->ops->suspend(&local->hw, wowlan);
@@ -106,6 +110,7 @@ static inline int drv_resume(struct ieee80211_local *local)
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_resume(local);
ret = local->ops->resume(&local->hw);
@@ -117,6 +122,7 @@ static inline void drv_set_wakeup(struct ieee80211_local *local,
bool enabled)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!local->ops->set_wakeup)
return;
@@ -142,6 +148,7 @@ static inline int drv_config(struct ieee80211_local *local, u32 changed)
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_config(local, changed);
ret = local->ops->config(&local->hw, changed);
@@ -154,6 +161,7 @@ static inline void drv_vif_cfg_changed(struct ieee80211_local *local,
u64 changed)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
@@ -193,6 +201,7 @@ static inline void drv_configure_filter(struct ieee80211_local *local,
u64 multicast)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_configure_filter(local, changed_flags, total_flags,
multicast);
@@ -207,6 +216,7 @@ static inline void drv_config_iface_filter(struct ieee80211_local *local,
unsigned int changed_flags)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_config_iface_filter(local, sdata, filter_flags,
changed_flags);
@@ -263,6 +273,7 @@ static inline int drv_hw_scan(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -277,6 +288,7 @@ static inline void drv_cancel_hw_scan(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
@@ -295,6 +307,7 @@ drv_sched_scan_start(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -312,6 +325,7 @@ static inline int drv_sched_scan_stop(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -328,6 +342,7 @@ static inline void drv_sw_scan_start(struct ieee80211_local *local,
const u8 *mac_addr)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_sw_scan_start(local, sdata, mac_addr);
if (local->ops->sw_scan_start)
@@ -339,6 +354,7 @@ static inline void drv_sw_scan_complete(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_sw_scan_complete(local, sdata);
if (local->ops->sw_scan_complete)
@@ -352,6 +368,7 @@ static inline int drv_get_stats(struct ieee80211_local *local,
int ret = -EOPNOTSUPP;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (local->ops->get_stats)
ret = local->ops->get_stats(&local->hw, stats);
@@ -375,6 +392,7 @@ static inline int drv_set_frag_threshold(struct ieee80211_local *local,
int ret = 0;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_set_frag_threshold(local, value);
if (local->ops->set_frag_threshold)
@@ -389,6 +407,7 @@ static inline int drv_set_rts_threshold(struct ieee80211_local *local,
int ret = 0;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_set_rts_threshold(local, value);
if (local->ops->set_rts_threshold)
@@ -402,6 +421,7 @@ static inline int drv_set_coverage_class(struct ieee80211_local *local,
{
int ret = 0;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_set_coverage_class(local, value);
if (local->ops->set_coverage_class)
@@ -435,6 +455,7 @@ static inline int drv_sta_add(struct ieee80211_local *local,
int ret = 0;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
@@ -454,6 +475,7 @@ static inline void drv_sta_remove(struct ieee80211_local *local,
struct ieee80211_sta *sta)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
@@ -467,12 +489,30 @@ static inline void drv_sta_remove(struct ieee80211_local *local,
}
#ifdef CONFIG_MAC80211_DEBUGFS
+static inline void drv_vif_add_debugfs(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ might_sleep();
+
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
+ WARN_ON(!sdata->vif.debugfs_dir))
+ return;
+
+ sdata = get_bss_sdata(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ if (local->ops->vif_add_debugfs)
+ local->ops->vif_add_debugfs(&local->hw, &sdata->vif);
+}
+
static inline void drv_link_add_debugfs(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_bss_conf *link_conf,
struct dentry *dir)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
@@ -489,6 +529,7 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local,
struct dentry *dir)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
@@ -505,6 +546,7 @@ static inline void drv_link_sta_add_debugfs(struct ieee80211_local *local,
struct dentry *dir)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
@@ -514,6 +556,12 @@ static inline void drv_link_sta_add_debugfs(struct ieee80211_local *local,
local->ops->link_sta_add_debugfs(&local->hw, &sdata->vif,
link_sta, dir);
}
+#else
+static inline void drv_vif_add_debugfs(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ might_sleep();
+}
#endif
static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local,
@@ -521,6 +569,7 @@ static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local,
struct sta_info *sta)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
@@ -569,6 +618,9 @@ static inline void drv_sta_statistics(struct ieee80211_local *local,
struct ieee80211_sta *sta,
struct station_info *sinfo)
{
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
return;
@@ -599,6 +651,7 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local)
int ret = 0; /* default unsupported op for less congestion */
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_tx_last_beacon(local);
if (local->ops->tx_last_beacon)
@@ -616,6 +669,9 @@ static inline int drv_get_survey(struct ieee80211_local *local, int idx,
{
int ret = -EOPNOTSUPP;
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
trace_drv_get_survey(local, idx, survey);
if (local->ops->get_survey)
@@ -629,6 +685,7 @@ static inline int drv_get_survey(struct ieee80211_local *local, int idx,
static inline void drv_rfkill_poll(struct ieee80211_local *local)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (local->ops->rfkill_poll)
local->ops->rfkill_poll(&local->hw);
@@ -638,9 +695,13 @@ static inline void drv_flush(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
u32 queues, bool drop)
{
- struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL;
+ struct ieee80211_vif *vif;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
+ sdata = get_bss_sdata(sdata);
+ vif = sdata ? &sdata->vif : NULL;
if (sdata && !check_sdata_in_driver(sdata))
return;
@@ -656,6 +717,9 @@ static inline void drv_flush_sta(struct ieee80211_local *local,
struct sta_info *sta)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
+ sdata = get_bss_sdata(sdata);
if (sdata && !check_sdata_in_driver(sdata))
return;
@@ -671,6 +735,7 @@ static inline void drv_channel_switch(struct ieee80211_local *local,
struct ieee80211_channel_switch *ch_switch)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_channel_switch(local, sdata, ch_switch);
local->ops->channel_switch(&local->hw, &sdata->vif, ch_switch);
@@ -683,6 +748,7 @@ static inline int drv_set_antenna(struct ieee80211_local *local,
{
int ret = -EOPNOTSUPP;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (local->ops->set_antenna)
ret = local->ops->set_antenna(&local->hw, tx_ant, rx_ant);
trace_drv_set_antenna(local, tx_ant, rx_ant, ret);
@@ -694,6 +760,7 @@ static inline int drv_get_antenna(struct ieee80211_local *local,
{
int ret = -EOPNOTSUPP;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (local->ops->get_antenna)
ret = local->ops->get_antenna(&local->hw, tx_ant, rx_ant);
trace_drv_get_antenna(local, *tx_ant, *rx_ant, ret);
@@ -709,6 +776,7 @@ static inline int drv_remain_on_channel(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_remain_on_channel(local, sdata, chan, duration, type);
ret = local->ops->remain_on_channel(&local->hw, &sdata->vif,
@@ -725,6 +793,7 @@ drv_cancel_remain_on_channel(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_cancel_remain_on_channel(local, sdata);
ret = local->ops->cancel_remain_on_channel(&local->hw, &sdata->vif);
@@ -736,9 +805,10 @@ drv_cancel_remain_on_channel(struct ieee80211_local *local,
static inline int drv_set_ringparam(struct ieee80211_local *local,
u32 tx, u32 rx)
{
- int ret = -ENOTSUPP;
+ int ret = -EOPNOTSUPP;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_set_ringparam(local, tx, rx);
if (local->ops->set_ringparam)
@@ -752,6 +822,7 @@ static inline void drv_get_ringparam(struct ieee80211_local *local,
u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_get_ringparam(local, tx, tx_max, rx, rx_max);
if (local->ops->get_ringparam)
@@ -764,6 +835,7 @@ static inline bool drv_tx_frames_pending(struct ieee80211_local *local)
bool ret = false;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_tx_frames_pending(local);
if (local->ops->tx_frames_pending)
@@ -780,6 +852,7 @@ static inline int drv_set_bitrate_mask(struct ieee80211_local *local,
int ret = -EOPNOTSUPP;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -797,6 +870,9 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct cfg80211_gtk_rekey_data *data)
{
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!check_sdata_in_driver(sdata))
return;
@@ -851,11 +927,13 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local,
struct ieee80211_prep_tx_info *info)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION);
+ info->link_id = info->link_id < 0 ? 0 : info->link_id;
trace_drv_mgd_prepare_tx(local, sdata, info->duration,
info->subtype, info->success);
if (local->ops->mgd_prepare_tx)
@@ -868,6 +946,7 @@ static inline void drv_mgd_complete_tx(struct ieee80211_local *local,
struct ieee80211_prep_tx_info *info)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
@@ -882,17 +961,22 @@ static inline void drv_mgd_complete_tx(struct ieee80211_local *local,
static inline void
drv_mgd_protect_tdls_discover(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
+ struct ieee80211_sub_if_data *sdata,
+ int link_id)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION);
+ link_id = link_id > 0 ? link_id : 0;
+
trace_drv_mgd_protect_tdls_discover(local, sdata);
if (local->ops->mgd_protect_tdls_discover)
- local->ops->mgd_protect_tdls_discover(&local->hw, &sdata->vif);
+ local->ops->mgd_protect_tdls_discover(&local->hw, &sdata->vif,
+ link_id);
trace_drv_return_void(local);
}
@@ -902,6 +986,7 @@ static inline int drv_add_chanctx(struct ieee80211_local *local,
int ret = -EOPNOTSUPP;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_add_chanctx(local, ctx);
if (local->ops->add_chanctx)
@@ -917,6 +1002,7 @@ static inline void drv_remove_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (WARN_ON(!ctx->driver_present))
return;
@@ -933,6 +1019,7 @@ static inline void drv_change_chanctx(struct ieee80211_local *local,
u32 changed)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_change_chanctx(local, ctx, changed);
if (local->ops->change_chanctx) {
@@ -942,14 +1029,6 @@ static inline void drv_change_chanctx(struct ieee80211_local *local,
trace_drv_return_void(local);
}
-static inline void drv_verify_link_exists(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_bss_conf *link_conf)
-{
- /* deflink always exists, so need to check only for other links */
- if (sdata->deflink.conf != link_conf)
- sdata_assert_lock(sdata);
-}
-
int drv_assign_vif_chanctx(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_bss_conf *link_conf,
@@ -968,10 +1047,8 @@ static inline int drv_start_ap(struct ieee80211_local *local,
{
int ret = 0;
- /* make sure link_conf is protected */
- drv_verify_link_exists(sdata, link_conf);
-
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -987,8 +1064,8 @@ static inline void drv_stop_ap(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_bss_conf *link_conf)
{
- /* make sure link_conf is protected */
- drv_verify_link_exists(sdata, link_conf);
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
@@ -1004,6 +1081,7 @@ drv_reconfig_complete(struct ieee80211_local *local,
enum ieee80211_reconfig_type reconfig_type)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
trace_drv_reconfig_complete(local, reconfig_type);
if (local->ops->reconfig_complete)
@@ -1016,6 +1094,9 @@ drv_set_default_unicast_key(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
int key_idx)
{
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!check_sdata_in_driver(sdata))
return;
@@ -1046,6 +1127,9 @@ drv_channel_switch_beacon(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (local->ops->channel_switch_beacon) {
trace_drv_channel_switch_beacon(local, sdata, chandef);
local->ops->channel_switch_beacon(&local->hw, &sdata->vif,
@@ -1060,6 +1144,9 @@ drv_pre_channel_switch(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
int ret = 0;
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -1072,17 +1159,22 @@ drv_pre_channel_switch(struct ieee80211_sub_if_data *sdata,
}
static inline int
-drv_post_channel_switch(struct ieee80211_sub_if_data *sdata)
+drv_post_channel_switch(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
int ret = 0;
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!check_sdata_in_driver(sdata))
return -EIO;
trace_drv_post_channel_switch(local, sdata);
if (local->ops->post_channel_switch)
- ret = local->ops->post_channel_switch(&local->hw, &sdata->vif);
+ ret = local->ops->post_channel_switch(&local->hw, &sdata->vif,
+ link->conf);
trace_drv_return_int(local, ret);
return ret;
}
@@ -1092,6 +1184,9 @@ drv_abort_channel_switch(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!check_sdata_in_driver(sdata))
return;
@@ -1107,6 +1202,9 @@ drv_channel_switch_rx_beacon(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!check_sdata_in_driver(sdata))
return;
@@ -1122,6 +1220,7 @@ static inline int drv_join_ibss(struct ieee80211_local *local,
int ret = 0;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -1136,6 +1235,7 @@ static inline void drv_leave_ibss(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
@@ -1163,6 +1263,9 @@ static inline int drv_get_txpower(struct ieee80211_local *local,
{
int ret;
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!local->ops->get_txpower)
return -EOPNOTSUPP;
@@ -1182,6 +1285,7 @@ drv_tdls_channel_switch(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -1202,6 +1306,7 @@ drv_tdls_cancel_channel_switch(struct ieee80211_local *local,
struct ieee80211_sta *sta)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
@@ -1267,6 +1372,11 @@ drv_get_ftm_responder_stats(struct ieee80211_local *local,
{
u32 ret = -EOPNOTSUPP;
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
+
if (local->ops->get_ftm_responder_stats)
ret = local->ops->get_ftm_responder_stats(&local->hw,
&sdata->vif,
@@ -1283,6 +1393,7 @@ static inline int drv_start_pmsr(struct ieee80211_local *local,
int ret = -EOPNOTSUPP;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return -EIO;
@@ -1302,6 +1413,7 @@ static inline void drv_abort_pmsr(struct ieee80211_local *local,
trace_drv_abort_pmsr(local, sdata);
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
@@ -1317,6 +1429,7 @@ static inline int drv_start_nan(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
check_sdata_in_driver(sdata);
trace_drv_start_nan(local, sdata, conf);
@@ -1329,6 +1442,7 @@ static inline void drv_stop_nan(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
check_sdata_in_driver(sdata);
trace_drv_stop_nan(local, sdata);
@@ -1344,6 +1458,7 @@ static inline int drv_nan_change_conf(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
check_sdata_in_driver(sdata);
if (!local->ops->nan_change_conf)
@@ -1364,6 +1479,7 @@ static inline int drv_add_nan_func(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
check_sdata_in_driver(sdata);
if (!local->ops->add_nan_func)
@@ -1381,6 +1497,7 @@ static inline void drv_del_nan_func(struct ieee80211_local *local,
u8 instance_id)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
check_sdata_in_driver(sdata);
trace_drv_del_nan_func(local, sdata, instance_id);
@@ -1397,6 +1514,7 @@ static inline int drv_set_tid_config(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
ret = local->ops->set_tid_config(&local->hw, &sdata->vif, sta,
tid_conf);
trace_drv_return_int(local, ret);
@@ -1411,6 +1529,7 @@ static inline int drv_reset_tid_config(struct ieee80211_local *local,
int ret;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
ret = local->ops->reset_tid_config(&local->hw, &sdata->vif, sta, tids);
trace_drv_return_int(local, ret);
@@ -1421,6 +1540,7 @@ static inline void drv_update_vif_offload(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
check_sdata_in_driver(sdata);
if (!local->ops->update_vif_offload)
@@ -1436,6 +1556,9 @@ static inline void drv_sta_set_4addr(struct ieee80211_local *local,
struct ieee80211_sta *sta, bool enabled)
{
sdata = get_bss_sdata(sdata);
+
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
@@ -1451,6 +1574,9 @@ static inline void drv_sta_set_decap_offload(struct ieee80211_local *local,
bool enabled)
{
sdata = get_bss_sdata(sdata);
+
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
@@ -1469,6 +1595,7 @@ static inline void drv_add_twt_setup(struct ieee80211_local *local,
struct ieee80211_twt_params *twt_agrt;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
@@ -1486,6 +1613,7 @@ static inline void drv_twt_teardown_request(struct ieee80211_local *local,
u8 flowid)
{
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!check_sdata_in_driver(sdata))
return;
@@ -1526,6 +1654,8 @@ static inline int drv_net_setup_tc(struct ieee80211_local *local,
{
int ret = -EOPNOTSUPP;
+ might_sleep();
+
sdata = get_bss_sdata(sdata);
trace_drv_net_setup_tc(local, sdata, type);
if (local->ops->net_setup_tc)
@@ -1536,6 +1666,26 @@ static inline int drv_net_setup_tc(struct ieee80211_local *local,
return ret;
}
+static inline bool drv_can_activate_links(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u16 active_links)
+{
+ bool ret = true;
+
+ lockdep_assert_wiphy(local->hw.wiphy);
+
+ if (!check_sdata_in_driver(sdata))
+ return false;
+
+ trace_drv_can_activate_links(local, sdata, active_links);
+ if (local->ops->can_activate_links)
+ ret = local->ops->can_activate_links(&local->hw, &sdata->vif,
+ active_links);
+ trace_drv_return_bool(local, ret);
+
+ return ret;
+}
+
int drv_change_vif_links(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
u16 old_links, u16 new_links,
diff --git a/net/mac80211/drop.h b/net/mac80211/drop.h
index 49dc809cab29..12a6f0e9eca6 100644
--- a/net/mac80211/drop.h
+++ b/net/mac80211/drop.h
@@ -18,9 +18,54 @@ typedef unsigned int __bitwise ieee80211_rx_result;
/* this line for the trailing \ - add before this */
#define MAC80211_DROP_REASONS_UNUSABLE(R) \
+ /* 0x00 == ___RX_DROP_UNUSABLE */ \
R(RX_DROP_U_MIC_FAIL) \
R(RX_DROP_U_REPLAY) \
R(RX_DROP_U_BAD_MMIE) \
+ R(RX_DROP_U_DUP) \
+ R(RX_DROP_U_SPURIOUS) \
+ R(RX_DROP_U_DECRYPT_FAIL) \
+ R(RX_DROP_U_NO_KEY_ID) \
+ R(RX_DROP_U_BAD_CIPHER) \
+ R(RX_DROP_U_OOM) \
+ R(RX_DROP_U_NONSEQ_PN) \
+ R(RX_DROP_U_BAD_KEY_COLOR) \
+ R(RX_DROP_U_BAD_4ADDR) \
+ R(RX_DROP_U_BAD_AMSDU) \
+ R(RX_DROP_U_BAD_AMSDU_CIPHER) \
+ R(RX_DROP_U_INVALID_8023) \
+ /* 0x10 */ \
+ R(RX_DROP_U_RUNT_ACTION) \
+ R(RX_DROP_U_UNPROT_ACTION) \
+ R(RX_DROP_U_UNPROT_DUAL) \
+ R(RX_DROP_U_UNPROT_UCAST_MGMT) \
+ R(RX_DROP_U_UNPROT_MCAST_MGMT) \
+ R(RX_DROP_U_UNPROT_BEACON) \
+ R(RX_DROP_U_UNPROT_UNICAST_PUB_ACTION) \
+ R(RX_DROP_U_UNPROT_ROBUST_ACTION) \
+ R(RX_DROP_U_ACTION_UNKNOWN_SRC) \
+ R(RX_DROP_U_REJECTED_ACTION_RESPONSE) \
+ R(RX_DROP_U_EXPECT_DEFRAG_PROT) \
+ R(RX_DROP_U_WEP_DEC_FAIL) \
+ R(RX_DROP_U_NO_IV) \
+ R(RX_DROP_U_NO_ICV) \
+ R(RX_DROP_U_AP_RX_GROUPCAST) \
+ R(RX_DROP_U_SHORT_MMIC) \
+ /* 0x20 */ \
+ R(RX_DROP_U_MMIC_FAIL) \
+ R(RX_DROP_U_SHORT_TKIP) \
+ R(RX_DROP_U_TKIP_FAIL) \
+ R(RX_DROP_U_SHORT_CCMP) \
+ R(RX_DROP_U_SHORT_CCMP_MIC) \
+ R(RX_DROP_U_SHORT_GCMP) \
+ R(RX_DROP_U_SHORT_GCMP_MIC) \
+ R(RX_DROP_U_SHORT_CMAC) \
+ R(RX_DROP_U_SHORT_CMAC256) \
+ R(RX_DROP_U_SHORT_GMAC) \
+ R(RX_DROP_U_UNEXPECTED_VLAN_4ADDR) \
+ R(RX_DROP_U_UNEXPECTED_STA_4ADDR) \
+ R(RX_DROP_U_UNEXPECTED_VLAN_MCAST) \
+ R(RX_DROP_U_NOT_PORT_CONTROL) \
/* this line for the trailing \ - add before this */
/* having two enums allows for checking ieee80211_rx_result use with sparse */
@@ -46,11 +91,13 @@ enum mac80211_drop_reason {
RX_CONTINUE = (__force ieee80211_rx_result)___RX_CONTINUE,
RX_QUEUED = (__force ieee80211_rx_result)___RX_QUEUED,
RX_DROP_MONITOR = (__force ieee80211_rx_result)___RX_DROP_MONITOR,
- RX_DROP_UNUSABLE = (__force ieee80211_rx_result)___RX_DROP_UNUSABLE,
#define DEF(x) x = (__force ieee80211_rx_result)___ ## x,
MAC80211_DROP_REASONS_MONITOR(DEF)
MAC80211_DROP_REASONS_UNUSABLE(DEF)
#undef DEF
};
+#define RX_RES_IS_UNUSABLE(result) \
+ (((__force u32)(result) & SKB_DROP_REASON_SUBSYS_MASK) == ___RX_DROP_UNUSABLE)
+
#endif /* MAC80211_DROP_H */
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index a3830d925cc2..99f6174a9d69 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -5,7 +5,7 @@
* Copied from cfg.c - originally
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2014 Intel Corporation (Author: Johannes Berg)
- * Copyright (C) 2018, 2022 Intel Corporation
+ * Copyright (C) 2018, 2022-2023 Intel Corporation
*/
#include <linux/types.h>
#include <net/cfg80211.h>
@@ -19,11 +19,16 @@ static int ieee80211_set_ringparam(struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct ieee80211_local *local = wiphy_priv(dev->ieee80211_ptr->wiphy);
+ int ret;
if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0)
return -EINVAL;
- return drv_set_ringparam(local, rp->tx_pending, rp->rx_pending);
+ wiphy_lock(local->hw.wiphy);
+ ret = drv_set_ringparam(local, rp->tx_pending, rp->rx_pending);
+ wiphy_unlock(local->hw.wiphy);
+
+ return ret;
}
static void ieee80211_get_ringparam(struct net_device *dev,
@@ -35,8 +40,10 @@ static void ieee80211_get_ringparam(struct net_device *dev,
memset(rp, 0, sizeof(*rp));
+ wiphy_lock(local->hw.wiphy);
drv_get_ringparam(local, &rp->tx_pending, &rp->tx_max_pending,
&rp->rx_pending, &rp->rx_max_pending);
+ wiphy_unlock(local->hw.wiphy);
}
static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = {
@@ -102,7 +109,7 @@ static void ieee80211_get_stats(struct net_device *dev,
* network device.
*/
- mutex_lock(&local->sta_mtx);
+ wiphy_lock(local->hw.wiphy);
if (sdata->vif.type == NL80211_IFTYPE_STATION) {
sta = sta_info_get_bss(sdata, sdata->deflink.u.mgd.bssid);
@@ -198,12 +205,13 @@ do_survey:
else
data[i++] = -1LL;
- mutex_unlock(&local->sta_mtx);
-
- if (WARN_ON(i != STA_STATS_LEN))
+ if (WARN_ON(i != STA_STATS_LEN)) {
+ wiphy_unlock(local->hw.wiphy);
return;
+ }
drv_get_et_stats(sdata, stats, &(data[STA_STATS_LEN]));
+ wiphy_unlock(local->hw.wiphy);
}
static void ieee80211_get_strings(struct net_device *dev, u32 sset, u8 *data)
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 33729870ad8a..749f4ecab990 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -271,6 +271,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
case NL80211_CHAN_WIDTH_80:
case NL80211_CHAN_WIDTH_80P80:
case NL80211_CHAN_WIDTH_160:
+ case NL80211_CHAN_WIDTH_320:
bw = ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
break;
@@ -316,16 +317,16 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
{
int i;
- mutex_lock(&sta->ampdu_mlme.mtx);
+ lockdep_assert_wiphy(sta->local->hw.wiphy);
+
for (i = 0; i < IEEE80211_NUM_TIDS; i++)
- ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
- WLAN_REASON_QSTA_LEAVE_QBSS,
- reason != AGG_STOP_DESTROY_STA &&
- reason != AGG_STOP_PEER_REQUEST);
+ __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
+ WLAN_REASON_QSTA_LEAVE_QBSS,
+ reason != AGG_STOP_DESTROY_STA &&
+ reason != AGG_STOP_PEER_REQUEST);
for (i = 0; i < IEEE80211_NUM_TIDS; i++)
- ___ieee80211_stop_tx_ba_session(sta, i, reason);
- mutex_unlock(&sta->ampdu_mlme.mtx);
+ __ieee80211_stop_tx_ba_session(sta, i, reason);
/*
* In case the tear down is part of a reconfigure due to HW restart
@@ -333,9 +334,8 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
* the BA session, so handle it to properly clean tid_tx data.
*/
if(reason == AGG_STOP_DESTROY_STA) {
- cancel_work_sync(&sta->ampdu_mlme.work);
+ wiphy_work_cancel(sta->local->hw.wiphy, &sta->ampdu_mlme.work);
- mutex_lock(&sta->ampdu_mlme.mtx);
for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
struct tid_ampdu_tx *tid_tx =
rcu_dereference_protected_tid_tx(sta, i);
@@ -346,11 +346,10 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state))
ieee80211_stop_tx_ba_cb(sta, i, tid_tx);
}
- mutex_unlock(&sta->ampdu_mlme.mtx);
}
}
-void ieee80211_ba_session_work(struct work_struct *work)
+void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work)
{
struct sta_info *sta =
container_of(work, struct sta_info, ampdu_mlme.work);
@@ -358,32 +357,33 @@ void ieee80211_ba_session_work(struct work_struct *work)
bool blocked;
int tid;
+ lockdep_assert_wiphy(sta->local->hw.wiphy);
+
/* When this flag is set, new sessions should be blocked. */
blocked = test_sta_flag(sta, WLAN_STA_BLOCK_BA);
- mutex_lock(&sta->ampdu_mlme.mtx);
for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) {
if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired))
- ___ieee80211_stop_rx_ba_session(
+ __ieee80211_stop_rx_ba_session(
sta, tid, WLAN_BACK_RECIPIENT,
WLAN_REASON_QSTA_TIMEOUT, true);
if (test_and_clear_bit(tid,
sta->ampdu_mlme.tid_rx_stop_requested))
- ___ieee80211_stop_rx_ba_session(
+ __ieee80211_stop_rx_ba_session(
sta, tid, WLAN_BACK_RECIPIENT,
WLAN_REASON_UNSPECIFIED, true);
if (!blocked &&
test_and_clear_bit(tid,
sta->ampdu_mlme.tid_rx_manage_offl))
- ___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
- IEEE80211_MAX_AMPDU_BUF_HT,
- false, true, NULL);
+ __ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
+ IEEE80211_MAX_AMPDU_BUF_HT,
+ false, true, NULL);
if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
sta->ampdu_mlme.tid_rx_manage_offl))
- ___ieee80211_stop_rx_ba_session(
+ __ieee80211_stop_rx_ba_session(
sta, tid, WLAN_BACK_RECIPIENT,
0, false);
@@ -414,9 +414,7 @@ void ieee80211_ba_session_work(struct work_struct *work)
*/
synchronize_net();
- mutex_unlock(&sta->ampdu_mlme.mtx);
-
- ieee80211_queue_work(&sdata->local->hw, work);
+ wiphy_work_queue(sdata->local->hw.wiphy, work);
return;
}
@@ -448,12 +446,11 @@ void ieee80211_ba_session_work(struct work_struct *work)
test_and_clear_bit(HT_AGG_STATE_START_CB, &tid_tx->state))
ieee80211_start_tx_ba_cb(sta, tid, tid_tx);
if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state))
- ___ieee80211_stop_tx_ba_session(sta, tid,
- AGG_STOP_LOCAL_REQUEST);
+ __ieee80211_stop_tx_ba_session(sta, tid,
+ AGG_STOP_LOCAL_REQUEST);
if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state))
ieee80211_stop_tx_ba_cb(sta, tid, tid_tx);
}
- mutex_unlock(&sta->ampdu_mlme.mtx);
}
void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
@@ -538,11 +535,13 @@ ieee80211_smps_mode_to_smps_mode(enum ieee80211_smps_mode smps)
int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
enum ieee80211_smps_mode smps, const u8 *da,
- const u8 *bssid)
+ const u8 *bssid, int link_id)
{
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
struct ieee80211_mgmt *action_frame;
+ struct ieee80211_tx_info *info;
+ u8 status_link_id = link_id < 0 ? 0 : link_id;
/* 27 = header + category + action + smps mode */
skb = dev_alloc_skb(27 + local->hw.extra_tx_headroom);
@@ -562,6 +561,7 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
case IEEE80211_SMPS_AUTOMATIC:
case IEEE80211_SMPS_NUM_MODES:
WARN_ON(1);
+ smps = IEEE80211_SMPS_OFF;
fallthrough;
case IEEE80211_SMPS_OFF:
action_frame->u.action.u.ht_smps.smps_control =
@@ -578,8 +578,13 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
}
/* we'll do more on status of this frame */
- IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
- ieee80211_tx_skb(sdata, skb);
+ info = IEEE80211_SKB_CB(skb);
+ info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+ /* we have 12 bits, and need 6: link_id 4, smps 2 */
+ info->status_data = IEEE80211_STATUS_TYPE_SMPS |
+ u16_encode_bits(status_link_id << 2 | smps,
+ IEEE80211_STATUS_SUBDATA_MASK);
+ ieee80211_tx_skb_tid(sdata, skb, 7, link_id);
return 0;
}
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index e1900077bc4b..8f2b445a5ec3 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -51,7 +51,6 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
u32 rate_flags, rates = 0, rates_added = 0;
struct beacon_data *presp;
int frame_len;
- int shift;
/* Build IBSS probe response */
frame_len = sizeof(struct ieee80211_hdr_3addr) +
@@ -92,7 +91,6 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
sband = local->hw.wiphy->bands[chandef->chan->band];
rate_flags = ieee80211_chandef_rate_flags(chandef);
- shift = ieee80211_chandef_get_shift(chandef);
rates_n = 0;
if (have_higher_than_11mbit)
*have_higher_than_11mbit = false;
@@ -111,8 +109,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
*pos++ = WLAN_EID_SUPP_RATES;
*pos++ = min_t(int, 8, rates_n);
for (ri = 0; ri < sband->n_bitrates; ri++) {
- int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate,
- 5 * (1 << shift));
+ int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate, 5);
u8 basic = 0;
if (!(rates & BIT(ri)))
continue;
@@ -155,8 +152,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
*pos++ = WLAN_EID_EXT_SUPP_RATES;
*pos++ = rates_n - 8;
for (; ri < sband->n_bitrates; ri++) {
- int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate,
- 5 * (1 << shift));
+ int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate, 5);
u8 basic = 0;
if (!(rates & BIT(ri)))
continue;
@@ -235,7 +231,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
bool radar_required;
int err;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(local->hw.wiphy);
/* Reset own TSF to allow time synchronization work. */
drv_reset_tsf(local, sdata);
@@ -299,17 +295,14 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
radar_required = err;
- mutex_lock(&local->mtx);
if (ieee80211_link_use_channel(&sdata->deflink, &chandef,
ifibss->fixed_channel ?
IEEE80211_CHANCTX_SHARED :
IEEE80211_CHANCTX_EXCLUSIVE)) {
sdata_info(sdata, "Failed to join IBSS, no channel context\n");
- mutex_unlock(&local->mtx);
return;
}
sdata->deflink.radar_required = radar_required;
- mutex_unlock(&local->mtx);
memcpy(ifibss->bssid, bssid, ETH_ALEN);
@@ -367,9 +360,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
sdata->vif.cfg.ssid_len = 0;
RCU_INIT_POINTER(ifibss->presp, NULL);
kfree_rcu(presp, rcu_head);
- mutex_lock(&local->mtx);
ieee80211_link_release_channel(&sdata->deflink);
- mutex_unlock(&local->mtx);
sdata_info(sdata, "Failed to join IBSS, driver failure: %d\n",
err);
return;
@@ -382,7 +373,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
bss_meta.chan = chan;
- bss_meta.scan_width = cfg80211_chandef_to_scan_width(&chandef);
bss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta, mgmt,
presp->head_len, GFP_KERNEL);
@@ -405,9 +395,8 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
enum nl80211_channel_type chan_type;
u64 tsf;
u32 rate_flags;
- int shift;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (beacon_int < 10)
beacon_int = 10;
@@ -440,7 +429,6 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
sband = sdata->local->hw.wiphy->bands[cbss->channel->band];
rate_flags = ieee80211_chandef_rate_flags(&sdata->u.ibss.chandef);
- shift = ieee80211_vif_get_shift(&sdata->vif);
basic_rates = 0;
@@ -454,8 +442,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
!= rate_flags)
continue;
- brate = DIV_ROUND_UP(sband->bitrates[j].bitrate,
- 5 * (1 << shift));
+ brate = DIV_ROUND_UP(sband->bitrates[j].bitrate, 5);
if (brate == rate) {
if (is_basic)
basic_rates |= BIT(j);
@@ -488,7 +475,7 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
u16 capability = WLAN_CAPABILITY_IBSS;
u64 tsf;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (ifibss->privacy)
capability |= WLAN_CAPABILITY_PRIVACY;
@@ -498,7 +485,7 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
ifibss->ssid_len, IEEE80211_BSS_TYPE_IBSS,
IEEE80211_PRIVACY(ifibss->privacy));
- if (WARN_ON(!cbss))
+ if (unlikely(!cbss))
return -EINVAL;
rcu_read_lock();
@@ -530,7 +517,7 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed)
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct cfg80211_bss *cbss;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
/* When not connected/joined, sending CSA doesn't make sense. */
if (ifibss->state != IEEE80211_IBSS_MLME_JOINED)
@@ -600,7 +587,6 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
struct sta_info *sta;
struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_supported_band *sband;
- enum nl80211_bss_scan_width scan_width;
int band;
/*
@@ -629,7 +615,6 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
if (WARN_ON_ONCE(!chanctx_conf))
return NULL;
band = chanctx_conf->def.chan->band;
- scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def);
rcu_read_unlock();
sta = sta_info_alloc(sdata, addr, GFP_KERNEL);
@@ -641,7 +626,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
/* make sure mandatory rates are always added */
sband = local->hw.wiphy->bands[band];
sta->sta.deflink.supp_rates[band] = supp_rates |
- ieee80211_mandatory_rates(sband, scan_width);
+ ieee80211_mandatory_rates(sband);
return ieee80211_ibss_finish_sta(sta);
}
@@ -652,7 +637,7 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
int active = 0;
struct sta_info *sta;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
rcu_read_lock();
@@ -680,6 +665,8 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata)
struct beacon_data *presp;
struct sta_info *sta;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!is_zero_ether_addr(ifibss->bssid)) {
cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->chandef.chan,
ifibss->bssid, ifibss->ssid,
@@ -726,9 +713,7 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata)
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
BSS_CHANGED_IBSS);
drv_leave_ibss(local, sdata);
- mutex_lock(&local->mtx);
ieee80211_link_release_channel(&sdata->deflink);
- mutex_unlock(&local->mtx);
}
static void ieee80211_csa_connection_drop_work(struct wiphy *wiphy,
@@ -738,16 +723,12 @@ static void ieee80211_csa_connection_drop_work(struct wiphy *wiphy,
container_of(work, struct ieee80211_sub_if_data,
u.ibss.csa_connection_drop_work);
- sdata_lock(sdata);
-
ieee80211_ibss_disconnect(sdata);
synchronize_rcu();
skb_queue_purge(&sdata->skb_queue);
/* trigger a scan to find another IBSS network to join */
wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
-
- sdata_unlock(sdata);
}
static void ieee80211_ibss_csa_mark_radar(struct ieee80211_sub_if_data *sdata)
@@ -779,7 +760,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
ieee80211_conn_flags_t conn_flags;
u32 vht_cap_info = 0;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
conn_flags = IEEE80211_CONN_DISABLE_VHT;
@@ -951,7 +932,7 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
{
u16 auth_alg, auth_transaction;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (len < 24 + 6)
return;
@@ -984,7 +965,6 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
{
struct sta_info *sta;
enum nl80211_band band = rx_status->band;
- enum nl80211_bss_scan_width scan_width;
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
bool rates_updated = false;
@@ -1010,15 +990,9 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
u32 prev_rates;
prev_rates = sta->sta.deflink.supp_rates[band];
- /* make sure mandatory rates are always added */
- scan_width = NL80211_BSS_CHAN_WIDTH_20;
- if (rx_status->bw == RATE_INFO_BW_5)
- scan_width = NL80211_BSS_CHAN_WIDTH_5;
- else if (rx_status->bw == RATE_INFO_BW_10)
- scan_width = NL80211_BSS_CHAN_WIDTH_10;
sta->sta.deflink.supp_rates[band] = supp_rates |
- ieee80211_mandatory_rates(sband, scan_width);
+ ieee80211_mandatory_rates(sband);
if (sta->sta.deflink.supp_rates[band] != prev_rates) {
ibss_dbg(sdata,
"updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n",
@@ -1072,7 +1046,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
&chandef);
memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie));
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
- &cap_ie,
+ &cap_ie, NULL,
&sta->deflink);
if (memcmp(&cap, &sta->sta.deflink.vht_cap, sizeof(cap)))
rates_updated |= true;
@@ -1205,7 +1179,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta;
struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_supported_band *sband;
- enum nl80211_bss_scan_width scan_width;
int band;
/*
@@ -1231,7 +1204,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
return;
}
band = chanctx_conf->def.chan->band;
- scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def);
rcu_read_unlock();
sta = sta_info_alloc(sdata, addr, GFP_ATOMIC);
@@ -1241,7 +1213,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
/* make sure mandatory rates are always added */
sband = local->hw.wiphy->bands[band];
sta->sta.deflink.supp_rates[band] = supp_rates |
- ieee80211_mandatory_rates(sband, scan_width);
+ ieee80211_mandatory_rates(sband);
spin_lock(&ifibss->incomplete_lock);
list_add(&sta->list, &ifibss->incomplete_stations);
@@ -1257,7 +1229,7 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT;
unsigned long exp_rsn = IEEE80211_IBSS_RSN_INACTIVITY_LIMIT;
- mutex_lock(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
unsigned long last_active = ieee80211_sta_last_active(sta);
@@ -1282,8 +1254,6 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
WARN_ON(__sta_info_destroy(sta));
}
}
-
- mutex_unlock(&local->sta_mtx);
}
/*
@@ -1293,9 +1263,8 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
- enum nl80211_bss_scan_width scan_width;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
mod_timer(&ifibss->timer,
round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
@@ -1315,9 +1284,8 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
sdata_info(sdata,
"No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n");
- scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef);
ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len,
- NULL, 0, scan_width);
+ NULL, 0);
}
static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
@@ -1327,7 +1295,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
u16 capability;
int i;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (ifibss->fixed_bssid) {
memcpy(bssid, ifibss->bssid, ETH_ALEN);
@@ -1435,10 +1403,9 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
struct cfg80211_bss *cbss;
struct ieee80211_channel *chan = NULL;
const u8 *bssid = NULL;
- enum nl80211_bss_scan_width scan_width;
int active_ibss;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
active_ibss = ieee80211_sta_active_ibss(sdata);
ibss_dbg(sdata, "sta_find_ibss (active_ibss=%d)\n", active_ibss);
@@ -1494,8 +1461,6 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
sdata_info(sdata, "Trigger new scan to find an IBSS to join\n");
- scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef);
-
if (ifibss->fixed_channel) {
num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy,
&ifibss->chandef,
@@ -1503,11 +1468,10 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
ARRAY_SIZE(channels));
ieee80211_request_ibss_scan(sdata, ifibss->ssid,
ifibss->ssid_len, channels,
- num, scan_width);
+ num);
} else {
ieee80211_request_ibss_scan(sdata, ifibss->ssid,
- ifibss->ssid_len, NULL,
- 0, scan_width);
+ ifibss->ssid_len, NULL, 0);
}
} else {
int interval = IEEE80211_SCAN_INTERVAL;
@@ -1532,7 +1496,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
struct beacon_data *presp;
u8 *pos, *end;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
presp = sdata_dereference(ifibss->presp, sdata);
@@ -1628,10 +1592,8 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
mgmt = (struct ieee80211_mgmt *) skb->data;
fc = le16_to_cpu(mgmt->frame_control);
- sdata_lock(sdata);
-
if (!sdata->u.ibss.ssid_len)
- goto mgmt_out; /* not ready to merge yet */
+ return; /* not ready to merge yet */
switch (fc & IEEE80211_FCTL_STYPE) {
case IEEE80211_STYPE_PROBE_REQ:
@@ -1671,9 +1633,6 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
break;
}
}
-
- mgmt_out:
- sdata_unlock(sdata);
}
void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
@@ -1681,15 +1640,13 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct sta_info *sta;
- sdata_lock(sdata);
-
/*
* Work could be scheduled after scan or similar
* when we aren't even joined (or trying) with a
* network.
*/
if (!ifibss->ssid_len)
- goto out;
+ return;
spin_lock_bh(&ifibss->incomplete_lock);
while (!list_empty(&ifibss->incomplete_stations)) {
@@ -1715,9 +1672,6 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
WARN_ON(1);
break;
}
-
- out:
- sdata_unlock(sdata);
}
static void ieee80211_ibss_timer(struct timer_list *t)
@@ -1744,7 +1698,8 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
- mutex_lock(&local->iflist_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
list_for_each_entry(sdata, &local->interfaces, list) {
if (!ieee80211_sdata_running(sdata))
continue;
@@ -1752,7 +1707,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
continue;
sdata->u.ibss.last_scan_completed = jiffies;
}
- mutex_unlock(&local->iflist_mtx);
}
int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
@@ -1767,6 +1721,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
int i;
int ret;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (params->chandef.chan->freq_offset) {
/* this may work, but is untested */
return -EOPNOTSUPP;
@@ -1787,10 +1743,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
chanmode = (params->channel_fixed && !ret) ?
IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE;
- mutex_lock(&local->chanctx_mtx);
ret = ieee80211_check_combinations(sdata, &params->chandef, chanmode,
radar_detect_width);
- mutex_unlock(&local->chanctx_mtx);
if (ret < 0)
return ret;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 06bd406846d2..0b2b53550bd9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -85,12 +85,21 @@ extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS];
#define IEEE80211_MAX_NAN_INSTANCE_ID 255
+enum ieee80211_status_data {
+ IEEE80211_STATUS_TYPE_MASK = 0x00f,
+ IEEE80211_STATUS_TYPE_INVALID = 0,
+ IEEE80211_STATUS_TYPE_SMPS = 1,
+ IEEE80211_STATUS_SUBDATA_MASK = 0xff0,
+};
-/*
- * Keep a station's queues on the active list for deficit accounting purposes
- * if it was active or queued during the last 100ms
- */
-#define AIRTIME_ACTIVE_DURATION (HZ / 10)
+static inline bool
+ieee80211_sta_keep_active(struct sta_info *sta, u8 ac)
+{
+ /* Keep a station's queues on the active list for deficit accounting
+ * purposes if it was active or queued during the last 100ms.
+ */
+ return time_before_eq(jiffies, sta->airtime[ac].last_active + HZ / 10);
+}
struct ieee80211_bss {
u32 device_ts_beacon, device_ts_presp;
@@ -430,6 +439,7 @@ struct ieee80211_mgd_assoc_data {
bool need_beacon;
bool synced;
bool timeout_started;
+ bool comeback; /* whether the AP has requested association comeback */
bool s1g;
unsigned int assoc_link_id;
@@ -461,13 +471,24 @@ struct ieee80211_sta_tx_tspec {
bool downgraded;
};
+/* Advertised TID-to-link mapping info */
+struct ieee80211_adv_ttlm_info {
+ /* time in TUs at which the new mapping is established, or 0 if there is
+ * no planned advertised TID-to-link mapping
+ */
+ u16 switch_time;
+ u32 duration; /* duration of the planned T2L map in TUs */
+ u16 map; /* map of usable links for all TIDs */
+ bool active; /* whether the advertised mapping is active or not */
+};
+
DECLARE_EWMA(beacon_signal, 4, 4)
struct ieee80211_if_managed {
struct timer_list timer;
struct timer_list conn_mon_timer;
struct timer_list bcn_mon_timer;
- struct work_struct monitor_work;
+ struct wiphy_work monitor_work;
struct wiphy_work beacon_connection_loss_work;
struct wiphy_work csa_connection_drop_work;
@@ -530,7 +551,7 @@ struct ieee80211_if_managed {
/* TDLS support */
u8 tdls_peer[ETH_ALEN] __aligned(2);
- struct delayed_work tdls_peer_del_work;
+ struct wiphy_delayed_work tdls_peer_del_work;
struct sk_buff *orig_teardown_skb; /* The original teardown skb */
struct sk_buff *teardown_skb; /* A copy to send through the AP */
spinlock_t teardown_lock; /* To lock changing teardown_skb */
@@ -544,7 +565,7 @@ struct ieee80211_if_managed {
* on the BE queue, but there's a lot of VO traffic, we might
* get stuck in a downgraded situation and flush takes forever.
*/
- struct delayed_work tx_tspec_wk;
+ struct wiphy_delayed_work tx_tspec_wk;
/* Information elements from the last transmitted (Re)Association
* Request frame.
@@ -554,6 +575,10 @@ struct ieee80211_if_managed {
struct wiphy_delayed_work ml_reconf_work;
u16 removed_links;
+
+ /* TID-to-link mapping support */
+ struct wiphy_delayed_work ttlm_work;
+ struct ieee80211_adv_ttlm_info ttlm_info;
};
struct ieee80211_if_ibss {
@@ -618,8 +643,9 @@ struct ieee80211_if_ocb {
* these declarations define the interface, which enables
* vendor-specific mesh synchronization
*
+ * @rx_bcn_presp: beacon/probe response was received
+ * @adjust_tsf: TSF adjustment method
*/
-struct ieee802_11_elems;
struct ieee80211_mesh_sync_ops {
void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata, u16 stype,
struct ieee80211_mgmt *mgmt, unsigned int len,
@@ -676,7 +702,7 @@ struct ieee80211_if_mesh {
struct timer_list mesh_path_root_timer;
unsigned long wrkq_flags;
- unsigned long mbss_changed;
+ unsigned long mbss_changed[64 / BITS_PER_LONG];
bool userspace_handles_dfs;
@@ -859,12 +885,13 @@ enum txq_info_flags {
* struct txq_info - per tid queue
*
* @tin: contains packets split into multiple flows
- * @def_flow: used as a fallback flow when a packet destined to @tin hashes to
- * a fq_flow which is already owned by a different tin
- * @def_cvars: codel vars for @def_flow
+ * @def_cvars: codel vars for the @tin's default_flow
+ * @cstats: code statistics for this queue
* @frags: used to keep fragments created after dequeue
* @schedule_order: used with ieee80211_local->active_txqs
* @schedule_round: counter to prevent infinite loops on TXQ scheduling
+ * @flags: TXQ flags from &enum txq_info_flags
+ * @txq: the driver visible part
*/
struct txq_info {
struct fq_tin tin;
@@ -893,7 +920,8 @@ struct ieee80211_if_mntr {
* struct ieee80211_if_nan - NAN state
*
* @conf: current NAN configuration
- * @func_ids: a bitmap of available instance_id's
+ * @func_lock: lock for @func_inst_ids
+ * @function_inst_ids: a bitmap of available instance_id's
*/
struct ieee80211_if_nan {
struct cfg80211_nan_conf conf;
@@ -926,6 +954,9 @@ struct ieee80211_link_data_managed {
struct wiphy_delayed_work chswitch_work;
struct wiphy_work request_smps_work;
+ /* used to reconfigure hardware SM PS */
+ struct wiphy_work recalc_smps;
+
bool beacon_crc_valid;
u32 beacon_crc;
struct ewma_beacon_signal ave_beacon_signal;
@@ -970,8 +1001,8 @@ struct ieee80211_link_data {
struct ieee80211_sub_if_data *sdata;
unsigned int link_id;
- struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */
- struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */
+ struct list_head assigned_chanctx_list; /* protected by wiphy mutex */
+ struct list_head reserved_chanctx_list; /* protected by wiphy mutex */
/* multicast keys only */
struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS +
@@ -981,18 +1012,18 @@ struct ieee80211_link_data {
struct ieee80211_key __rcu *default_mgmt_key;
struct ieee80211_key __rcu *default_beacon_key;
- struct work_struct csa_finalize_work;
- bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */
+ struct wiphy_work csa_finalize_work;
+ bool csa_block_tx;
bool operating_11g_mode;
struct cfg80211_chan_def csa_chandef;
- struct work_struct color_change_finalize_work;
+ struct wiphy_work color_change_finalize_work;
struct delayed_work color_collision_detect_work;
u64 color_bitmap;
- /* context reservation -- protected with chanctx_mtx */
+ /* context reservation -- protected with wiphy mutex */
struct ieee80211_chanctx *reserved_chanctx;
struct cfg80211_chan_def reserved_chandef;
bool reserved_radar_required;
@@ -1005,7 +1036,7 @@ struct ieee80211_link_data {
int ap_power_level; /* in dBm */
bool radar_required;
- struct delayed_work dfs_cac_timer_work;
+ struct wiphy_delayed_work dfs_cac_timer_work;
union {
struct ieee80211_link_data_managed mgd;
@@ -1032,7 +1063,7 @@ struct ieee80211_sub_if_data {
/* count for keys needing tailroom space allocation */
int crypto_tx_tailroom_needed_cnt;
int crypto_tx_tailroom_pending_dec;
- struct delayed_work dec_tailroom_needed_wk;
+ struct wiphy_delayed_work dec_tailroom_needed_wk;
struct net_device *dev;
struct ieee80211_local *local;
@@ -1064,9 +1095,6 @@ struct ieee80211_sub_if_data {
atomic_t num_tx_queued;
struct mac80211_qos_map __rcu *qos_map;
- /* used to reconfigure hardware SM PS */
- struct work_struct recalc_smps;
-
struct wiphy_work work;
struct sk_buff_head skb_queue;
struct sk_buff_head status_queue;
@@ -1106,7 +1134,7 @@ struct ieee80211_sub_if_data {
struct ieee80211_link_data __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS];
/* for ieee80211_set_active_links_async() */
- struct work_struct activate_links_work;
+ struct wiphy_work activate_links_work;
u16 desired_active_links;
#ifdef CONFIG_MAC80211_DEBUGFS
@@ -1129,62 +1157,8 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
return container_of(p, struct ieee80211_sub_if_data, vif);
}
-static inline void sdata_lock(struct ieee80211_sub_if_data *sdata)
- __acquires(&sdata->wdev.mtx)
-{
- mutex_lock(&sdata->wdev.mtx);
- __acquire(&sdata->wdev.mtx);
-}
-
-static inline void sdata_unlock(struct ieee80211_sub_if_data *sdata)
- __releases(&sdata->wdev.mtx)
-{
- mutex_unlock(&sdata->wdev.mtx);
- __release(&sdata->wdev.mtx);
-}
-
#define sdata_dereference(p, sdata) \
- rcu_dereference_protected(p, lockdep_is_held(&sdata->wdev.mtx))
-
-static inline void
-sdata_assert_lock(struct ieee80211_sub_if_data *sdata)
-{
- lockdep_assert_held(&sdata->wdev.mtx);
-}
-
-static inline int
-ieee80211_chanwidth_get_shift(enum nl80211_chan_width width)
-{
- switch (width) {
- case NL80211_CHAN_WIDTH_5:
- return 2;
- case NL80211_CHAN_WIDTH_10:
- return 1;
- default:
- return 0;
- }
-}
-
-static inline int
-ieee80211_chandef_get_shift(struct cfg80211_chan_def *chandef)
-{
- return ieee80211_chanwidth_get_shift(chandef->width);
-}
-
-static inline int
-ieee80211_vif_get_shift(struct ieee80211_vif *vif)
-{
- struct ieee80211_chanctx_conf *chanctx_conf;
- int shift = 0;
-
- rcu_read_lock();
- chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf);
- if (chanctx_conf)
- shift = ieee80211_chandef_get_shift(&chanctx_conf->def);
- rcu_read_unlock();
-
- return shift;
-}
+ wiphy_dereference(sdata->local->hw.wiphy, p)
static inline int
ieee80211_get_mbssid_beacon_len(struct cfg80211_mbssid_elems *elems,
@@ -1254,7 +1228,7 @@ struct tpt_led_trigger {
#endif
/**
- * mac80211 scan flags - currently active scan mode
+ * enum mac80211_scan_flags - currently active scan mode
*
* @SCAN_SW_SCANNING: We're currently in the process of scanning but may as
* well be on the operating channel
@@ -1272,7 +1246,7 @@ struct tpt_led_trigger {
* and could send a probe request after receiving a beacon.
* @SCAN_BEACON_DONE: Beacon received, we can now send a probe request
*/
-enum {
+enum mac80211_scan_flags {
SCAN_SW_SCANNING,
SCAN_HW_SCANNING,
SCAN_ONCHANNEL_SCANNING,
@@ -1362,7 +1336,7 @@ struct ieee80211_local {
spinlock_t filter_lock;
/* used for uploading changed mc list */
- struct work_struct reconfig_filter;
+ struct wiphy_work reconfig_filter;
/* aggregated multicast list */
struct netdev_hw_addr_list mc_list;
@@ -1406,7 +1380,7 @@ struct ieee80211_local {
/* wowlan is enabled -- don't reconfig on resume */
bool wowlan;
- struct work_struct radar_detected_work;
+ struct wiphy_work radar_detected_work;
/* number of RX chains the hardware has */
u8 rx_chains;
@@ -1429,10 +1403,9 @@ struct ieee80211_local {
/* Station data */
/*
- * The mutex only protects the list, hash table and
- * counter, reads are done with RCU.
+ * The list, hash table and counter are protected
+ * by the wiphy mutex, reads are done with RCU.
*/
- struct mutex sta_mtx;
spinlock_t tim_lock;
unsigned long num_sta;
struct list_head sta_list;
@@ -1461,15 +1434,6 @@ struct ieee80211_local {
struct list_head mon_list; /* only that are IFF_UP && !cooked */
struct mutex iflist_mtx;
- /*
- * Key mutex, protects sdata's key_list and sta_info's
- * key pointers and ptk_idx (write access, they're RCU.)
- */
- struct mutex key_mtx;
-
- /* mutex for scan and work locking */
- struct mutex mtx;
-
/* Scanning and BSS list */
unsigned long scanning;
struct cfg80211_ssid scan_ssid;
@@ -1483,14 +1447,14 @@ struct ieee80211_local {
int hw_scan_ies_bufsize;
struct cfg80211_scan_info scan_info;
- struct work_struct sched_scan_stopped_work;
+ struct wiphy_work sched_scan_stopped_work;
struct ieee80211_sub_if_data __rcu *sched_scan_sdata;
struct cfg80211_sched_scan_request __rcu *sched_scan_req;
u8 scan_addr[ETH_ALEN];
unsigned long leave_oper_channel_time;
enum mac80211_scan_state next_scan_state;
- struct delayed_work scan_work;
+ struct wiphy_delayed_work scan_work;
struct ieee80211_sub_if_data __rcu *scan_sdata;
/* For backward compatibility only -- do not use */
struct cfg80211_chan_def _oper_chandef;
@@ -1500,7 +1464,6 @@ struct ieee80211_local {
/* channel contexts */
struct list_head chanctx_list;
- struct mutex chanctx_mtx;
#ifdef CONFIG_MAC80211_LEDS
struct led_trigger tx_led, rx_led, assoc_led, radio_led;
@@ -1554,8 +1517,8 @@ struct ieee80211_local {
* interface (and monitors) in PS, this then points there.
*/
struct ieee80211_sub_if_data *ps_sdata;
- struct work_struct dynamic_ps_enable_work;
- struct work_struct dynamic_ps_disable_work;
+ struct wiphy_work dynamic_ps_enable_work;
+ struct wiphy_work dynamic_ps_disable_work;
struct timer_list dynamic_ps_timer;
struct notifier_block ifa_notifier;
struct notifier_block ifa6_notifier;
@@ -1583,9 +1546,9 @@ struct ieee80211_local {
/*
* Remain-on-channel support
*/
- struct delayed_work roc_work;
+ struct wiphy_delayed_work roc_work;
struct list_head roc_list;
- struct work_struct hw_roc_start, hw_roc_done;
+ struct wiphy_work hw_roc_start, hw_roc_done;
unsigned long hw_roc_start_time;
u64 roc_cookie_counter;
@@ -1600,6 +1563,8 @@ struct ieee80211_local {
/* extended capabilities provided by mac80211 */
u8 ext_capa[8];
+
+ bool wbrf_supported;
};
static inline struct ieee80211_sub_if_data *
@@ -1733,6 +1698,8 @@ struct ieee802_11_elems {
const struct ieee80211_eht_operation *eht_operation;
const struct ieee80211_multi_link_elem *ml_basic;
const struct ieee80211_multi_link_elem *ml_reconf;
+ const struct ieee80211_bandwidth_indication *bandwidth_indication;
+ const struct ieee80211_ttlm_elem *ttlm[IEEE80211_TTLM_MAX_CNT];
/* length of them, respectively */
u8 ext_capab_len;
@@ -1766,6 +1733,8 @@ struct ieee802_11_elems {
/* The reconfiguration Multi-Link element in the original IEs */
const struct element *ml_reconf_elem;
+ u8 ttlm_num;
+
/*
* store the per station profile pointer and length in case that the
* parsing also handled Multi-Link element parsing for a specific link
@@ -1783,7 +1752,7 @@ struct ieee802_11_elems {
*/
size_t scratch_len;
u8 *scratch_pos;
- u8 scratch[];
+ u8 scratch[] __counted_by(scratch_len);
};
static inline struct ieee80211_local *hw_to_local(
@@ -1807,10 +1776,7 @@ static inline bool txq_has_queue(struct ieee80211_txq *txq)
static inline bool
ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status)
{
- WARN_ON_ONCE(status->flag & RX_FLAG_MACTIME_START &&
- status->flag & RX_FLAG_MACTIME_END);
- return !!(status->flag & (RX_FLAG_MACTIME_START | RX_FLAG_MACTIME_END |
- RX_FLAG_MACTIME_PLCP_START));
+ return status->flag & RX_FLAG_MACTIME;
}
void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata);
@@ -1929,12 +1895,11 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata,
u64 *changed);
/* scan/BSS handling */
-void ieee80211_scan_work(struct work_struct *work);
+void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work);
int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
const u8 *ssid, u8 ssid_len,
struct ieee80211_channel **channels,
- unsigned int n_channels,
- enum nl80211_bss_scan_width scan_width);
+ unsigned int n_channels);
int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
struct cfg80211_scan_request *req);
void ieee80211_scan_cancel(struct ieee80211_local *local);
@@ -1962,7 +1927,8 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
struct cfg80211_sched_scan_request *req);
int ieee80211_request_sched_scan_stop(struct ieee80211_local *local);
void ieee80211_sched_scan_end(struct ieee80211_local *local);
-void ieee80211_sched_scan_stopped_work(struct work_struct *work);
+void ieee80211_sched_scan_stopped_work(struct wiphy *wiphy,
+ struct wiphy_work *work);
/* off-channel/mgmt-tx */
void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local);
@@ -1982,12 +1948,13 @@ int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy,
struct wireless_dev *wdev, u64 cookie);
/* channel switch handling */
-void ieee80211_csa_finalize_work(struct work_struct *work);
+void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work);
int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_csa_settings *params);
/* color change handling */
-void ieee80211_color_change_finalize_work(struct work_struct *work);
+void ieee80211_color_change_finalize_work(struct wiphy *wiphy,
+ struct wiphy_work *work);
void ieee80211_color_collision_detection_work(struct work_struct *work);
/* interface handling */
@@ -2037,8 +2004,10 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
void ieee80211_link_stop(struct ieee80211_link_data *link);
int ieee80211_vif_set_links(struct ieee80211_sub_if_data *sdata,
u16 new_links, u16 dormant_links);
-void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata);
-int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links);
+static inline void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata)
+{
+ ieee80211_vif_set_links(sdata, 0, 0);
+}
/* tx handling */
void ieee80211_clear_tx_pending(struct ieee80211_local *local);
@@ -2060,7 +2029,7 @@ struct sk_buff *
ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u32 info_flags);
void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
- int retry_count, int shift, bool send_to_cooked,
+ int retry_count, bool send_to_cooked,
struct ieee80211_tx_status *status);
void ieee80211_check_fast_xmit(struct sta_info *sta);
@@ -2093,19 +2062,17 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
u16 initiator, u16 reason_code);
int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
enum ieee80211_smps_mode smps, const u8 *da,
- const u8 *bssid);
+ const u8 *bssid, int link_id);
bool ieee80211_smps_is_restrictive(enum ieee80211_smps_mode smps_mode_old,
enum ieee80211_smps_mode smps_mode_new);
-void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
- u16 initiator, u16 reason, bool stop);
void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
u16 initiator, u16 reason, bool stop);
-void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
- u8 dialog_token, u16 timeout,
- u16 start_seq_num, u16 ba_policy, u16 tid,
- u16 buf_size, bool tx, bool auto_seq,
- const struct ieee80211_addba_ext_ie *addbaext);
+void __ieee80211_start_rx_ba_session(struct sta_info *sta,
+ u8 dialog_token, u16 timeout,
+ u16 start_seq_num, u16 ba_policy, u16 tid,
+ u16 buf_size, bool tx, bool auto_seq,
+ const struct ieee80211_addba_ext_ie *addbaext);
void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
enum ieee80211_agg_stop_reason reason);
void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
@@ -2122,13 +2089,11 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
enum ieee80211_agg_stop_reason reason);
-int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
- enum ieee80211_agg_stop_reason reason);
void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
struct tid_ampdu_tx *tid_tx);
void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid,
struct tid_ampdu_tx *tid_tx);
-void ieee80211_ba_session_work(struct work_struct *work);
+void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work);
void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid);
void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);
@@ -2141,6 +2106,7 @@ void
ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
const struct ieee80211_vht_cap *vht_cap_ie,
+ const struct ieee80211_vht_cap *vht_cap_ie2,
struct link_sta_info *link_sta);
enum ieee80211_sta_rx_bandwidth
ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta);
@@ -2205,7 +2171,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
* flags from &enum ieee80211_conn_flags.
* @bssid: the currently connected bssid (for reporting)
* @csa_ie: parsed 802.11 csa elements on count, mode, chandef and mesh ttl.
- All of them will be filled with if success only.
+ * All of them will be filled with if success only.
* Return: 0 on success, <0 on error and >0 if there is nothing to parse.
*/
int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
@@ -2237,8 +2203,7 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw)
/* utility functions/constants */
extern const void *const mac80211_wiphy_privid; /* for wiphy privid */
int ieee80211_frame_duration(enum nl80211_band band, size_t len,
- int rate, int erp, int short_preamble,
- int shift);
+ int rate, int erp, int short_preamble);
void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
struct ieee80211_tx_queue_params *qparam,
int ac);
@@ -2333,8 +2298,6 @@ ieee802_11_parse_elems(const u8 *start, size_t len, bool action,
return ieee802_11_parse_elems_crc(start, len, action, 0, 0, bss);
}
-void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id);
-
extern const int ieee802_1d_to_ac[8];
static inline int ieee80211_ac_from_tid(int tid)
@@ -2342,8 +2305,10 @@ static inline int ieee80211_ac_from_tid(int tid)
return ieee802_1d_to_ac[tid & 7];
}
-void ieee80211_dynamic_ps_enable_work(struct work_struct *work);
-void ieee80211_dynamic_ps_disable_work(struct work_struct *work);
+void ieee80211_dynamic_ps_enable_work(struct wiphy *wiphy,
+ struct wiphy_work *work);
+void ieee80211_dynamic_ps_disable_work(struct wiphy *wiphy,
+ struct wiphy_work *work);
void ieee80211_dynamic_ps_timer(struct timer_list *t);
void ieee80211_send_nullfunc(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
@@ -2428,6 +2393,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
struct txq_info *txq, int tid);
void ieee80211_txq_purge(struct ieee80211_local *local,
struct txq_info *txqi);
+void ieee80211_purge_sta_txqs(struct sta_info *sta);
void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata);
void ieee80211_fill_txq_stats(struct cfg80211_txq_stats *txqstats,
@@ -2521,7 +2487,7 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
const struct ieee80211_vht_operation *oper,
const struct ieee80211_ht_operation *htop,
struct cfg80211_chan_def *chandef);
-void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation *eht_oper,
+void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info,
bool support_160, bool support_320,
struct cfg80211_chan_def *chandef);
bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
@@ -2563,9 +2529,10 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
struct ieee80211_link_data *rsvd_for);
bool ieee80211_is_radar_required(struct ieee80211_local *local);
-void ieee80211_dfs_cac_timer_work(struct work_struct *work);
+void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work);
void ieee80211_dfs_cac_cancel(struct ieee80211_local *local);
-void ieee80211_dfs_radar_detected_work(struct work_struct *work);
+void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy,
+ struct wiphy_work *work);
int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
struct cfg80211_csa_settings *csa_settings);
@@ -2587,7 +2554,7 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
const u8 *extra_ies, size_t extra_ies_len);
int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
const u8 *peer, enum nl80211_tdls_operation oper);
-void ieee80211_tdls_peer_del_work(struct work_struct *wk);
+void ieee80211_tdls_peer_del_work(struct wiphy *wiphy, struct wiphy_work *wk);
int ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev,
const u8 *addr, u8 oper_class,
struct cfg80211_chan_def *chandef);
@@ -2636,4 +2603,19 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata,
const struct ieee80211_eht_cap_elem *eht_cap_ie_elem,
u8 eht_cap_len,
struct link_sta_info *link_sta);
+
+void ieee80211_check_wbrf_support(struct ieee80211_local *local);
+void ieee80211_add_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef);
+void ieee80211_remove_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef);
+
+#if IS_ENABLED(CONFIG_MAC80211_KUNIT_TEST)
+#define EXPORT_SYMBOL_IF_MAC80211_KUNIT(sym) EXPORT_SYMBOL_IF_KUNIT(sym)
+#define VISIBLE_IF_MAC80211_KUNIT
+ieee80211_rx_result
+ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx);
+#else
+#define EXPORT_SYMBOL_IF_MAC80211_KUNIT(sym)
+#define VISIBLE_IF_MAC80211_KUNIT static
+#endif
+
#endif /* IEEE80211_I_H */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index be586bc0b5b7..11c4caa4748e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -33,14 +33,13 @@
* The interface list in each struct ieee80211_local is protected
* three-fold:
*
- * (1) modifications may only be done under the RTNL
- * (2) modifications and readers are protected against each other by
- * the iflist_mtx.
- * (3) modifications are done in an RCU manner so atomic readers
+ * (1) modifications may only be done under the RTNL *and* wiphy mutex
+ * *and* iflist_mtx
+ * (2) modifications are done in an RCU manner so atomic readers
* can traverse the list in RCU-safe blocks.
*
* As a consequence, reads (traversals) of the list can be protected
- * by either the RTNL, the iflist_mtx or RCU.
+ * by either the RTNL, the wiphy mutex, the iflist_mtx or RCU.
*/
static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work);
@@ -110,7 +109,7 @@ static u32 __ieee80211_recalc_idle(struct ieee80211_local *local,
bool working, scanning, active;
unsigned int led_trig_start = 0, led_trig_stop = 0;
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
active = force_active ||
!list_empty(&local->chanctx_list) ||
@@ -160,6 +159,8 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr,
u8 *m;
int ret = 0;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (is_zero_ether_addr(local->hw.wiphy->addr_mask))
return 0;
@@ -176,7 +177,6 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr,
if (!check_dup)
return ret;
- mutex_lock(&local->iflist_mtx);
list_for_each_entry(iter, &local->interfaces, list) {
if (iter == sdata)
continue;
@@ -195,7 +195,6 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr,
break;
}
}
- mutex_unlock(&local->iflist_mtx);
return ret;
}
@@ -207,6 +206,8 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata
struct ieee80211_sub_if_data *scan_sdata;
int ret = 0;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
/* To be the most flexible here we want to only limit changing the
* address if the specific interface is doing offchannel work or
* scanning.
@@ -214,8 +215,6 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata
if (netif_carrier_ok(sdata->dev))
return -EBUSY;
- mutex_lock(&local->mtx);
-
/* First check no ROC work is happening on this iface */
list_for_each_entry(roc, &local->roc_list, list) {
if (roc->sdata != sdata)
@@ -230,7 +229,7 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata
/* And if this iface is scanning */
if (local->scanning) {
scan_sdata = rcu_dereference_protected(local->scan_sdata,
- lockdep_is_held(&local->mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (sdata == scan_sdata)
ret = -EBUSY;
}
@@ -247,13 +246,12 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata
}
unlock:
- mutex_unlock(&local->mtx);
return ret;
}
-static int ieee80211_change_mac(struct net_device *dev, void *addr)
+static int _ieee80211_change_mac(struct ieee80211_sub_if_data *sdata,
+ void *addr)
{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
struct sockaddr *sa = addr;
bool check_dup = true;
@@ -278,7 +276,7 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr)
if (live)
drv_remove_interface(local, sdata);
- ret = eth_mac_addr(dev, sa);
+ ret = eth_mac_addr(sdata->dev, sa);
if (ret == 0) {
memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN);
@@ -294,6 +292,27 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr)
return ret;
}
+static int ieee80211_change_mac(struct net_device *dev, void *addr)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ int ret;
+
+ /*
+ * This happens during unregistration if there's a bond device
+ * active (maybe other cases?) and we must get removed from it.
+ * But we really don't care anymore if it's not registered now.
+ */
+ if (!dev->ieee80211_ptr->registered)
+ return 0;
+
+ wiphy_lock(local->hw.wiphy);
+ ret = _ieee80211_change_mac(sdata, addr);
+ wiphy_unlock(local->hw.wiphy);
+
+ return ret;
+}
+
static inline int identical_mac_addr_allowed(int type1, int type2)
{
return type1 == NL80211_IFTYPE_MONITOR ||
@@ -311,9 +330,9 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_sub_if_data *nsdata;
- int ret;
ASSERT_RTNL();
+ lockdep_assert_wiphy(local->hw.wiphy);
/* we hold the RTNL here so can safely walk the list */
list_for_each_entry(nsdata, &local->interfaces, list) {
@@ -378,10 +397,7 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
}
}
- mutex_lock(&local->chanctx_mtx);
- ret = ieee80211_check_combinations(sdata, NULL, 0, 0);
- mutex_unlock(&local->chanctx_mtx);
- return ret;
+ return ieee80211_check_combinations(sdata, NULL, 0, 0);
}
static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata,
@@ -430,12 +446,13 @@ static int ieee80211_open(struct net_device *dev)
if (!is_valid_ether_addr(dev->dev_addr))
return -EADDRNOTAVAIL;
+ wiphy_lock(sdata->local->hw.wiphy);
err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type);
if (err)
- return err;
+ goto out;
- wiphy_lock(sdata->local->hw.wiphy);
err = ieee80211_do_open(&sdata->wdev, true);
+out:
wiphy_unlock(sdata->local->hw.wiphy);
return err;
@@ -453,6 +470,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
bool cancel_scan;
struct cfg80211_nan_func *func;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
clear_bit(SDATA_STATE_RUNNING, &sdata->state);
synchronize_rcu(); /* flush _ieee80211_wake_txqs() */
@@ -516,16 +535,12 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
}
del_timer_sync(&local->dynamic_ps_timer);
- cancel_work_sync(&local->dynamic_ps_enable_work);
+ wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work);
- cancel_work_sync(&sdata->recalc_smps);
-
- sdata_lock(sdata);
WARN(ieee80211_vif_is_mld(&sdata->vif),
"destroying interface with valid links 0x%04x\n",
sdata->vif.valid_links);
- mutex_lock(&local->mtx);
sdata->vif.bss_conf.csa_active = false;
if (sdata->vif.type == NL80211_IFTYPE_STATION)
sdata->deflink.u.mgd.csa_waiting_bcn = false;
@@ -534,20 +549,17 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
IEEE80211_QUEUE_STOP_REASON_CSA);
sdata->deflink.csa_block_tx = false;
}
- mutex_unlock(&local->mtx);
- sdata_unlock(sdata);
-
- cancel_work_sync(&sdata->deflink.csa_finalize_work);
- cancel_work_sync(&sdata->deflink.color_change_finalize_work);
- cancel_delayed_work_sync(&sdata->deflink.dfs_cac_timer_work);
+ wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa_finalize_work);
+ wiphy_work_cancel(local->hw.wiphy,
+ &sdata->deflink.color_change_finalize_work);
+ wiphy_delayed_work_cancel(local->hw.wiphy,
+ &sdata->deflink.dfs_cac_timer_work);
if (sdata->wdev.cac_started) {
chandef = sdata->vif.bss_conf.chandef;
WARN_ON(local->suspended);
- mutex_lock(&local->mtx);
ieee80211_link_release_channel(&sdata->deflink);
- mutex_unlock(&local->mtx);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
GFP_KERNEL);
@@ -575,9 +587,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP_VLAN:
- mutex_lock(&local->mtx);
list_del(&sdata->u.vlan.list);
- mutex_unlock(&local->mtx);
RCU_INIT_POINTER(sdata->vif.bss_conf.chanctx_conf, NULL);
/* see comment in the default case below */
ieee80211_free_keys(sdata, true);
@@ -675,9 +685,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
if (local->monitors == 0)
ieee80211_del_virtual_monitor(local);
- mutex_lock(&local->mtx);
ieee80211_recalc_idle(local);
- mutex_unlock(&local->mtx);
if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
break;
@@ -691,7 +699,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
ieee80211_recalc_ps(local);
if (cancel_scan)
- flush_delayed_work(&local->scan_work);
+ wiphy_delayed_work_flush(local->hw.wiphy, &local->scan_work);
if (local->open_count == 0) {
ieee80211_stop_device(local);
@@ -750,9 +758,9 @@ static int ieee80211_stop(struct net_device *dev)
ieee80211_stop_mbssid(sdata);
}
- cancel_work_sync(&sdata->activate_links_work);
-
wiphy_lock(sdata->local->hw.wiphy);
+ wiphy_work_cancel(sdata->local->hw.wiphy, &sdata->activate_links_work);
+
ieee80211_do_stop(sdata, true);
wiphy_unlock(sdata->local->hw.wiphy);
@@ -779,7 +787,7 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
spin_lock_bh(&local->filter_lock);
__hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
spin_unlock_bh(&local->filter_lock);
- ieee80211_queue_work(&local->hw, &local->reconfig_filter);
+ wiphy_work_queue(local->hw.wiphy, &local->reconfig_filter);
}
/*
@@ -1046,7 +1054,7 @@ void ieee80211_recalc_offload(struct ieee80211_local *local)
if (!ieee80211_hw_check(&local->hw, SUPPORTS_TX_ENCAP_OFFLOAD))
return;
- mutex_lock(&local->iflist_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(sdata, &local->interfaces, list) {
if (!ieee80211_sdata_running(sdata))
@@ -1054,8 +1062,6 @@ void ieee80211_recalc_offload(struct ieee80211_local *local)
ieee80211_recalc_sdata_offload(sdata);
}
-
- mutex_unlock(&local->iflist_mtx);
}
void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
@@ -1133,7 +1139,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
snprintf(sdata->name, IFNAMSIZ, "%s-monitor",
wiphy_name(local->hw.wiphy));
sdata->wdev.iftype = NL80211_IFTYPE_MONITOR;
- mutex_init(&sdata->wdev.mtx);
+ sdata->wdev.wiphy = local->hw.wiphy;
ieee80211_sdata_init(local, sdata);
@@ -1158,19 +1164,14 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
rcu_assign_pointer(local->monitor_sdata, sdata);
mutex_unlock(&local->iflist_mtx);
- sdata_lock(sdata);
- mutex_lock(&local->mtx);
ret = ieee80211_link_use_channel(&sdata->deflink, &local->monitor_chandef,
IEEE80211_CHANCTX_EXCLUSIVE);
- mutex_unlock(&local->mtx);
- sdata_unlock(sdata);
if (ret) {
mutex_lock(&local->iflist_mtx);
RCU_INIT_POINTER(local->monitor_sdata, NULL);
mutex_unlock(&local->iflist_mtx);
synchronize_net();
drv_remove_interface(local, sdata);
- mutex_destroy(&sdata->wdev.mtx);
kfree(sdata);
return ret;
}
@@ -1206,15 +1207,10 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
synchronize_net();
- sdata_lock(sdata);
- mutex_lock(&local->mtx);
ieee80211_link_release_channel(&sdata->deflink);
- mutex_unlock(&local->mtx);
- sdata_unlock(sdata);
drv_remove_interface(local, sdata);
- mutex_destroy(&sdata->wdev.mtx);
kfree(sdata);
}
@@ -1232,6 +1228,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
int res;
u32 hw_reconf_flags = 0;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP_VLAN: {
struct ieee80211_sub_if_data *master;
@@ -1239,9 +1237,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
if (!sdata->bss)
return -ENOLINK;
- mutex_lock(&local->mtx);
list_add(&sdata->u.vlan.list, &sdata->bss->vlans);
- mutex_unlock(&local->mtx);
master = container_of(sdata->bss,
struct ieee80211_sub_if_data, u.ap);
@@ -1258,10 +1254,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
sizeof(sdata->vif.hw_queue));
sdata->vif.bss_conf.chandef = master->vif.bss_conf.chandef;
- mutex_lock(&local->key_mtx);
sdata->crypto_tx_tailroom_needed_cnt +=
master->crypto_tx_tailroom_needed_cnt;
- mutex_unlock(&local->key_mtx);
break;
}
@@ -1352,9 +1346,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
ieee80211_adjust_monitor_flags(sdata, 1);
ieee80211_configure_filter(local);
ieee80211_recalc_offload(local);
- mutex_lock(&local->mtx);
ieee80211_recalc_idle(local);
- mutex_unlock(&local->mtx);
netif_carrier_on(dev);
break;
@@ -1459,11 +1451,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
drv_stop(local);
err_del_bss:
sdata->bss = NULL;
- if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
- mutex_lock(&local->mtx);
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
list_del(&sdata->u.vlan.list);
- mutex_unlock(&local->mtx);
- }
/* might already be clear but that doesn't matter */
clear_bit(SDATA_STATE_RUNNING, &sdata->state);
return res;
@@ -1490,12 +1479,13 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
{
struct ieee80211_mgmt *mgmt = (void *)skb->data;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (ieee80211_is_action(mgmt->frame_control) &&
mgmt->u.action.category == WLAN_CATEGORY_BACK) {
struct sta_info *sta;
int len = skb->len;
- mutex_lock(&local->sta_mtx);
sta = sta_info_get_bss(sdata, mgmt->sa);
if (sta) {
switch (mgmt->u.action.u.addba_req.action_code) {
@@ -1516,7 +1506,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
break;
}
}
- mutex_unlock(&local->sta_mtx);
} else if (ieee80211_is_action(mgmt->frame_control) &&
mgmt->u.action.category == WLAN_CATEGORY_VHT) {
switch (mgmt->u.action.u.vht_group_notif.action_code) {
@@ -1530,7 +1519,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
band = status->band;
opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
- mutex_lock(&local->sta_mtx);
sta = sta_info_get_bss(sdata, mgmt->sa);
if (sta)
@@ -1538,7 +1526,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
&sta->deflink,
opmode, band);
- mutex_unlock(&local->sta_mtx);
break;
}
case WLAN_VHT_ACTION_GROUPID_MGMT:
@@ -1585,7 +1572,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
* a block-ack session was active. That cannot be
* right, so terminate the session.
*/
- mutex_lock(&local->sta_mtx);
sta = sta_info_get_bss(sdata, mgmt->sa);
if (sta) {
u16 tid = ieee80211_get_tid(hdr);
@@ -1595,7 +1581,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
WLAN_REASON_QSTA_REQUIRE_SETUP,
true);
}
- mutex_unlock(&local->sta_mtx);
} else switch (sdata->vif.type) {
case NL80211_IFTYPE_STATION:
ieee80211_sta_rx_queued_mgmt(sdata, skb);
@@ -1692,15 +1677,8 @@ static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work)
}
}
-static void ieee80211_recalc_smps_work(struct work_struct *work)
-{
- struct ieee80211_sub_if_data *sdata =
- container_of(work, struct ieee80211_sub_if_data, recalc_smps);
-
- ieee80211_recalc_smps(sdata, &sdata->deflink);
-}
-
-static void ieee80211_activate_links_work(struct work_struct *work)
+static void ieee80211_activate_links_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data,
@@ -1745,8 +1723,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
skb_queue_head_init(&sdata->skb_queue);
skb_queue_head_init(&sdata->status_queue);
wiphy_work_init(&sdata->work, ieee80211_iface_work);
- INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work);
- INIT_WORK(&sdata->activate_links_work, ieee80211_activate_links_work);
+ wiphy_work_init(&sdata->activate_links_work,
+ ieee80211_activate_links_work);
switch (type) {
case NL80211_IFTYPE_P2P_GO:
@@ -1805,7 +1783,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
/* need to do this after the switch so vif.type is correct */
ieee80211_link_setup(&sdata->deflink);
- ieee80211_debugfs_add_netdev(sdata);
+ ieee80211_debugfs_recreate_netdev(sdata, false);
}
static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
@@ -1936,6 +1914,8 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
u8 tmp_addr[ETH_ALEN];
int i;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
/* default ... something at least */
memcpy(perm_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
@@ -1943,8 +1923,6 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
local->hw.wiphy->n_addresses <= 1)
return;
- mutex_lock(&local->iflist_mtx);
-
switch (type) {
case NL80211_IFTYPE_MONITOR:
/* doesn't matter */
@@ -1968,7 +1946,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
if (!ieee80211_sdata_running(sdata))
continue;
memcpy(perm_addr, sdata->vif.addr, ETH_ALEN);
- goto out_unlock;
+ return;
}
}
fallthrough;
@@ -2054,9 +2032,6 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
break;
}
-
- out_unlock:
- mutex_unlock(&local->iflist_mtx);
}
int ieee80211_if_add(struct ieee80211_local *local, const char *name,
@@ -2070,6 +2045,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
int ret, i;
ASSERT_RTNL();
+ lockdep_assert_wiphy(local->hw.wiphy);
if (type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN) {
struct wireless_dev *wdev;
@@ -2157,8 +2133,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
INIT_LIST_HEAD(&sdata->key_list);
- INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk,
- ieee80211_delayed_tailroom_dec);
+ wiphy_delayed_work_init(&sdata->dec_tailroom_needed_wk,
+ ieee80211_delayed_tailroom_dec);
for (i = 0; i < NUM_NL80211_BANDS; i++) {
struct ieee80211_supported_band *sband;
@@ -2236,6 +2212,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
{
ASSERT_RTNL();
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
mutex_lock(&sdata->local->iflist_mtx);
list_del_rcu(&sdata->list);
@@ -2281,19 +2258,30 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
*/
cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+ wiphy_lock(local->hw.wiphy);
+
WARN(local->open_count, "%s: open count remains %d\n",
wiphy_name(local->hw.wiphy), local->open_count);
- ieee80211_txq_teardown_flows(local);
-
mutex_lock(&local->iflist_mtx);
list_splice_init(&local->interfaces, &unreg_list);
mutex_unlock(&local->iflist_mtx);
- wiphy_lock(local->hw.wiphy);
list_for_each_entry_safe(sdata, tmp, &unreg_list, list) {
bool netdev = sdata->dev;
+ /*
+ * Remove IP addresses explicitly, since the notifier will
+ * skip the callbacks if wdev->registered is false, since
+ * we can't acquire the wiphy_lock() again there if already
+ * inside this locked section.
+ */
+ sdata->vif.cfg.arp_addr_cnt = 0;
+ if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+ sdata->u.mgd.associated)
+ ieee80211_vif_cfg_change_notify(sdata,
+ BSS_CHANGED_ARP_FILTER);
+
list_del(&sdata->list);
cfg80211_unregister_wdev(&sdata->wdev);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 13050dc9321f..af74d7f9d94d 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -53,11 +53,6 @@
static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
-static void assert_key_lock(struct ieee80211_local *local)
-{
- lockdep_assert_held(&local->key_mtx);
-}
-
static void
update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta)
{
@@ -67,7 +62,7 @@ update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta)
return;
/* crypto_tx_tailroom_needed_cnt is protected by this */
- assert_key_lock(sdata->local);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
rcu_read_lock();
@@ -98,7 +93,7 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
* http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net
*/
- assert_key_lock(sdata->local);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
update_vlan_tailroom_need_count(sdata, 1);
@@ -114,7 +109,7 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata,
int delta)
{
- assert_key_lock(sdata->local);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta);
@@ -129,6 +124,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
int ret = -EOPNOTSUPP;
might_sleep();
+ lockdep_assert_wiphy(key->local->hw.wiphy);
if (key->flags & KEY_FLAG_TAINTED) {
/* If we get here, it's during resume and the key is
@@ -151,8 +147,6 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
if (!key->local->ops->set_key)
goto out_unsupported;
- assert_key_lock(key->local);
-
sta = key->sta;
/*
@@ -242,14 +236,14 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
if (!key || !key->local->ops->set_key)
return;
- assert_key_lock(key->local);
-
if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
return;
sta = key->sta;
sdata = key->sdata;
+ lockdep_assert_wiphy(key->local->hw.wiphy);
+
if (key->conf.link_id >= 0 && sdata->vif.active_links &&
!(sdata->vif.active_links & BIT(key->conf.link_id)))
return;
@@ -275,7 +269,7 @@ static int _ieee80211_set_tx_key(struct ieee80211_key *key, bool force)
struct sta_info *sta = key->sta;
struct ieee80211_local *local = key->local;
- assert_key_lock(local);
+ lockdep_assert_wiphy(local->hw.wiphy);
set_sta_flag(sta, WLAN_STA_USES_ENCRYPTION);
@@ -300,7 +294,7 @@ static void ieee80211_pairwise_rekey(struct ieee80211_key *old,
struct sta_info *sta = new->sta;
int i;
- assert_key_lock(local);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) {
/* Extended Key ID key install, initial one or rekey */
@@ -317,11 +311,9 @@ static void ieee80211_pairwise_rekey(struct ieee80211_key *old,
* job done for the few ms we need it.)
*/
set_sta_flag(sta, WLAN_STA_BLOCK_BA);
- mutex_lock(&sta->ampdu_mlme.mtx);
for (i = 0; i < IEEE80211_NUM_TIDS; i++)
- ___ieee80211_stop_tx_ba_session(sta, i,
- AGG_STOP_LOCAL_REQUEST);
- mutex_unlock(&sta->ampdu_mlme.mtx);
+ __ieee80211_stop_tx_ba_session(sta, i,
+ AGG_STOP_LOCAL_REQUEST);
}
} else if (old) {
/* Rekey without Extended Key ID.
@@ -358,12 +350,14 @@ static void __ieee80211_set_default_key(struct ieee80211_link_data *link,
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_key *key = NULL;
- assert_key_lock(sdata->local);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (idx >= 0 && idx < NUM_DEFAULT_KEYS) {
- key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
+ key = wiphy_dereference(sdata->local->hw.wiphy,
+ sdata->keys[idx]);
if (!key)
- key = key_mtx_dereference(sdata->local, link->gtk[idx]);
+ key = wiphy_dereference(sdata->local->hw.wiphy,
+ link->gtk[idx]);
}
if (uni) {
@@ -382,9 +376,9 @@ static void __ieee80211_set_default_key(struct ieee80211_link_data *link,
void ieee80211_set_default_key(struct ieee80211_link_data *link, int idx,
bool uni, bool multi)
{
- mutex_lock(&link->sdata->local->key_mtx);
+ lockdep_assert_wiphy(link->sdata->local->hw.wiphy);
+
__ieee80211_set_default_key(link, idx, uni, multi);
- mutex_unlock(&link->sdata->local->key_mtx);
}
static void
@@ -393,11 +387,12 @@ __ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx)
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_key *key = NULL;
- assert_key_lock(sdata->local);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (idx >= NUM_DEFAULT_KEYS &&
idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
- key = key_mtx_dereference(sdata->local, link->gtk[idx]);
+ key = wiphy_dereference(sdata->local->hw.wiphy,
+ link->gtk[idx]);
rcu_assign_pointer(link->default_mgmt_key, key);
@@ -407,9 +402,9 @@ __ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx)
void ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link,
int idx)
{
- mutex_lock(&link->sdata->local->key_mtx);
+ lockdep_assert_wiphy(link->sdata->local->hw.wiphy);
+
__ieee80211_set_default_mgmt_key(link, idx);
- mutex_unlock(&link->sdata->local->key_mtx);
}
static void
@@ -418,12 +413,13 @@ __ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx)
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_key *key = NULL;
- assert_key_lock(sdata->local);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS &&
idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS +
NUM_DEFAULT_BEACON_KEYS)
- key = key_mtx_dereference(sdata->local, link->gtk[idx]);
+ key = wiphy_dereference(sdata->local->hw.wiphy,
+ link->gtk[idx]);
rcu_assign_pointer(link->default_beacon_key, key);
@@ -433,9 +429,9 @@ __ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx)
void ieee80211_set_default_beacon_key(struct ieee80211_link_data *link,
int idx)
{
- mutex_lock(&link->sdata->local->key_mtx);
+ lockdep_assert_wiphy(link->sdata->local->hw.wiphy);
+
__ieee80211_set_default_beacon_key(link, idx);
- mutex_unlock(&link->sdata->local->key_mtx);
}
static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
@@ -452,6 +448,8 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
bool defunikey, defmultikey, defmgmtkey, defbeaconkey;
bool is_wep;
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
/* caller must provide at least one old/new */
if (WARN_ON(!new && !old))
return 0;
@@ -482,7 +480,7 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
if (sta) {
link_sta = rcu_dereference_protected(sta->link[link_id],
- lockdep_is_held(&sta->local->sta_mtx));
+ lockdep_is_held(&sta->local->hw.wiphy->mtx));
if (!link_sta)
return -ENOLINK;
}
@@ -510,12 +508,10 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
ret = ieee80211_key_enable_hw_accel(new);
}
} else {
- if (!new->local->wowlan) {
+ if (!new->local->wowlan)
ret = ieee80211_key_enable_hw_accel(new);
- } else {
- assert_key_lock(new->local);
+ else
new->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
- }
}
if (ret)
@@ -541,17 +537,17 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
ieee80211_check_fast_rx(sta);
} else {
defunikey = old &&
- old == key_mtx_dereference(sdata->local,
- sdata->default_unicast_key);
+ old == wiphy_dereference(sdata->local->hw.wiphy,
+ sdata->default_unicast_key);
defmultikey = old &&
- old == key_mtx_dereference(sdata->local,
- link->default_multicast_key);
+ old == wiphy_dereference(sdata->local->hw.wiphy,
+ link->default_multicast_key);
defmgmtkey = old &&
- old == key_mtx_dereference(sdata->local,
- link->default_mgmt_key);
+ old == wiphy_dereference(sdata->local->hw.wiphy,
+ link->default_mgmt_key);
defbeaconkey = old &&
- old == key_mtx_dereference(sdata->local,
- link->default_beacon_key);
+ old == wiphy_dereference(sdata->local->hw.wiphy,
+ link->default_beacon_key);
if (defunikey && !new)
__ieee80211_set_default_key(link, -1, true, false);
@@ -775,8 +771,9 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key,
if (delay_tailroom) {
/* see ieee80211_delayed_tailroom_dec */
sdata->crypto_tx_tailroom_pending_dec++;
- schedule_delayed_work(&sdata->dec_tailroom_needed_wk,
- HZ/2);
+ wiphy_delayed_work_queue(sdata->local->hw.wiphy,
+ &sdata->dec_tailroom_needed_wk,
+ HZ / 2);
} else {
decrease_tailroom_need_count(sdata, 1);
}
@@ -802,6 +799,9 @@ static void ieee80211_key_destroy(struct ieee80211_key *key,
void ieee80211_key_free_unused(struct ieee80211_key *key)
{
+ if (!key)
+ return;
+
WARN_ON(key->sdata || key->local);
ieee80211_key_free_common(key);
}
@@ -854,49 +854,56 @@ int ieee80211_key_link(struct ieee80211_key *key,
* can cause warnings to appear.
*/
bool delay_tailroom = sdata->vif.type == NL80211_IFTYPE_STATION;
- int ret = -EOPNOTSUPP;
+ int ret;
- mutex_lock(&sdata->local->key_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (sta && pairwise) {
struct ieee80211_key *alt_key;
- old_key = key_mtx_dereference(sdata->local, sta->ptk[idx]);
- alt_key = key_mtx_dereference(sdata->local, sta->ptk[idx ^ 1]);
+ old_key = wiphy_dereference(sdata->local->hw.wiphy,
+ sta->ptk[idx]);
+ alt_key = wiphy_dereference(sdata->local->hw.wiphy,
+ sta->ptk[idx ^ 1]);
/* The rekey code assumes that the old and new key are using
* the same cipher. Enforce the assumption for pairwise keys.
*/
if ((alt_key && alt_key->conf.cipher != key->conf.cipher) ||
- (old_key && old_key->conf.cipher != key->conf.cipher))
+ (old_key && old_key->conf.cipher != key->conf.cipher)) {
+ ret = -EOPNOTSUPP;
goto out;
+ }
} else if (sta) {
struct link_sta_info *link_sta = &sta->deflink;
int link_id = key->conf.link_id;
if (link_id >= 0) {
link_sta = rcu_dereference_protected(sta->link[link_id],
- lockdep_is_held(&sta->local->sta_mtx));
+ lockdep_is_held(&sta->local->hw.wiphy->mtx));
if (!link_sta) {
ret = -ENOLINK;
goto out;
}
}
- old_key = key_mtx_dereference(sdata->local, link_sta->gtk[idx]);
+ old_key = wiphy_dereference(sdata->local->hw.wiphy,
+ link_sta->gtk[idx]);
} else {
if (idx < NUM_DEFAULT_KEYS)
- old_key = key_mtx_dereference(sdata->local,
- sdata->keys[idx]);
+ old_key = wiphy_dereference(sdata->local->hw.wiphy,
+ sdata->keys[idx]);
if (!old_key)
- old_key = key_mtx_dereference(sdata->local,
- link->gtk[idx]);
+ old_key = wiphy_dereference(sdata->local->hw.wiphy,
+ link->gtk[idx]);
}
/* Non-pairwise keys must also not switch the cipher on rekey */
if (!pairwise) {
- if (old_key && old_key->conf.cipher != key->conf.cipher)
+ if (old_key && old_key->conf.cipher != key->conf.cipher) {
+ ret = -EOPNOTSUPP;
goto out;
+ }
}
/*
@@ -904,8 +911,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
* new version of the key to avoid nonce reuse or replay issues.
*/
if (ieee80211_key_identical(sdata, old_key, key)) {
- ieee80211_key_free_unused(key);
- ret = 0;
+ ret = -EALREADY;
goto out;
}
@@ -930,9 +936,10 @@ int ieee80211_key_link(struct ieee80211_key *key,
ieee80211_key_free(key, delay_tailroom);
}
- out:
- mutex_unlock(&sdata->local->key_mtx);
+ key = NULL;
+ out:
+ ieee80211_key_free_unused(key);
return ret;
}
@@ -958,8 +965,6 @@ void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata)
lockdep_assert_wiphy(sdata->local->hw.wiphy);
- mutex_lock(&sdata->local->key_mtx);
-
sdata->crypto_tx_tailroom_needed_cnt = 0;
sdata->crypto_tx_tailroom_pending_dec = 0;
@@ -976,8 +981,6 @@ void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata)
ieee80211_key_enable_hw_accel(key);
}
}
-
- mutex_unlock(&sdata->local->key_mtx);
}
void ieee80211_iter_keys(struct ieee80211_hw *hw,
@@ -995,7 +998,6 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
lockdep_assert_wiphy(hw->wiphy);
- mutex_lock(&local->key_mtx);
if (vif) {
sdata = vif_to_sdata(vif);
list_for_each_entry_safe(key, tmp, &sdata->key_list, list)
@@ -1010,7 +1012,6 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
key->sta ? &key->sta->sta : NULL,
&key->conf, iter_data);
}
- mutex_unlock(&local->key_mtx);
}
EXPORT_SYMBOL(ieee80211_iter_keys);
@@ -1090,7 +1091,8 @@ void ieee80211_remove_link_keys(struct ieee80211_link_data *link,
struct ieee80211_local *local = sdata->local;
struct ieee80211_key *key, *tmp;
- mutex_lock(&local->key_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
list_for_each_entry_safe(key, tmp, &sdata->key_list, list) {
if (key->conf.link_id != link->link_id)
continue;
@@ -1099,7 +1101,6 @@ void ieee80211_remove_link_keys(struct ieee80211_link_data *link,
key, NULL);
list_add_tail(&key->list, keys);
}
- mutex_unlock(&local->key_mtx);
}
void ieee80211_free_key_list(struct ieee80211_local *local,
@@ -1107,10 +1108,10 @@ void ieee80211_free_key_list(struct ieee80211_local *local,
{
struct ieee80211_key *key, *tmp;
- mutex_lock(&local->key_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
list_for_each_entry_safe(key, tmp, keys, list)
__ieee80211_key_destroy(key, false);
- mutex_unlock(&local->key_mtx);
}
void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata,
@@ -1122,9 +1123,10 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata,
struct ieee80211_key *key, *tmp;
LIST_HEAD(keys);
- cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk);
+ wiphy_delayed_work_cancel(local->hw.wiphy,
+ &sdata->dec_tailroom_needed_wk);
- mutex_lock(&local->key_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
ieee80211_free_keys_iface(sdata, &keys);
@@ -1157,8 +1159,6 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata,
WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt ||
vlan->crypto_tx_tailroom_pending_dec);
}
-
- mutex_unlock(&local->key_mtx);
}
void ieee80211_free_sta_keys(struct ieee80211_local *local,
@@ -1167,9 +1167,10 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local,
struct ieee80211_key *key;
int i;
- mutex_lock(&local->key_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
for (i = 0; i < ARRAY_SIZE(sta->deflink.gtk); i++) {
- key = key_mtx_dereference(local, sta->deflink.gtk[i]);
+ key = wiphy_dereference(local->hw.wiphy, sta->deflink.gtk[i]);
if (!key)
continue;
ieee80211_key_replace(key->sdata, NULL, key->sta,
@@ -1180,7 +1181,7 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local,
}
for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
- key = key_mtx_dereference(local, sta->ptk[i]);
+ key = wiphy_dereference(local->hw.wiphy, sta->ptk[i]);
if (!key)
continue;
ieee80211_key_replace(key->sdata, NULL, key->sta,
@@ -1189,11 +1190,10 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local,
__ieee80211_key_destroy(key, key->sdata->vif.type ==
NL80211_IFTYPE_STATION);
}
-
- mutex_unlock(&local->key_mtx);
}
-void ieee80211_delayed_tailroom_dec(struct work_struct *wk)
+void ieee80211_delayed_tailroom_dec(struct wiphy *wiphy,
+ struct wiphy_work *wk)
{
struct ieee80211_sub_if_data *sdata;
@@ -1216,11 +1216,9 @@ void ieee80211_delayed_tailroom_dec(struct work_struct *wk)
* within an ESS this usually won't happen.
*/
- mutex_lock(&sdata->local->key_mtx);
decrease_tailroom_need_count(sdata,
sdata->crypto_tx_tailroom_pending_dec);
sdata->crypto_tx_tailroom_pending_dec = 0;
- mutex_unlock(&sdata->local->key_mtx);
}
void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid,
@@ -1349,7 +1347,7 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf)
key = container_of(keyconf, struct ieee80211_key, conf);
- assert_key_lock(key->local);
+ lockdep_assert_wiphy(key->local->hw.wiphy);
/*
* if key was uploaded, we assume the driver will/has remove(d)
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index f3df97df4b72..1fa0f4f78962 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -2,7 +2,7 @@
/*
* Copyright 2002-2004, Instant802 Networks, Inc.
* Copyright 2005, Devicescape Software, Inc.
- * Copyright (C) 2019, 2022 Intel Corporation
+ * Copyright (C) 2019, 2022-2023 Intel Corporation
*/
#ifndef IEEE80211_KEY_H
@@ -168,12 +168,7 @@ void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata);
int ieee80211_key_switch_links(struct ieee80211_sub_if_data *sdata,
unsigned long del_links_mask,
unsigned long add_links_mask);
-
-#define key_mtx_dereference(local, ref) \
- rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx)))
-#define rcu_dereference_check_key_mtx(local, ref) \
- rcu_dereference_check(ref, lockdep_is_held(&((local)->key_mtx)))
-
-void ieee80211_delayed_tailroom_dec(struct work_struct *wk);
+void ieee80211_delayed_tailroom_dec(struct wiphy *wiphy,
+ struct wiphy_work *wk);
#endif /* IEEE80211_KEY_H */
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index 6148208b320e..d4f86955afa6 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -37,16 +37,16 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
link_conf->link_id = link_id;
link_conf->vif = &sdata->vif;
- INIT_WORK(&link->csa_finalize_work,
- ieee80211_csa_finalize_work);
- INIT_WORK(&link->color_change_finalize_work,
- ieee80211_color_change_finalize_work);
+ wiphy_work_init(&link->csa_finalize_work,
+ ieee80211_csa_finalize_work);
+ wiphy_work_init(&link->color_change_finalize_work,
+ ieee80211_color_change_finalize_work);
INIT_DELAYED_WORK(&link->color_collision_detect_work,
ieee80211_color_collision_detection_work);
INIT_LIST_HEAD(&link->assigned_chanctx_list);
INIT_LIST_HEAD(&link->reserved_chanctx_list);
- INIT_DELAYED_WORK(&link->dfs_cac_timer_work,
- ieee80211_dfs_cac_timer_work);
+ wiphy_delayed_work_init(&link->dfs_cac_timer_work,
+ ieee80211_dfs_cac_timer_work);
if (!deflink) {
switch (sdata->vif.type) {
@@ -191,11 +191,11 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata,
struct ieee80211_link_data *old_data[IEEE80211_MLD_MAX_NUM_LINKS];
bool use_deflink = old_links == 0; /* set for error case */
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
memset(to_free, 0, sizeof(links));
- if (old_links == new_links)
+ if (old_links == new_links && dormant_links == sdata->vif.dormant_links)
return 0;
/* if there were no old links, need to clear the pointers to deflink */
@@ -235,6 +235,9 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata,
RCU_INIT_POINTER(sdata->vif.link_conf[link_id], NULL);
}
+ if (!old_links)
+ ieee80211_debugfs_recreate_netdev(sdata, true);
+
/* link them into data structures */
for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) {
WARN_ON(!use_deflink &&
@@ -261,6 +264,8 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata,
old_links & old_active,
new_links & sdata->vif.active_links,
old);
+ if (!new_links)
+ ieee80211_debugfs_recreate_netdev(sdata, false);
}
if (ret) {
@@ -303,23 +308,6 @@ int ieee80211_vif_set_links(struct ieee80211_sub_if_data *sdata,
return ret;
}
-void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata)
-{
- struct link_container *links[IEEE80211_MLD_MAX_NUM_LINKS];
-
- /*
- * The locking here is different because when we free links
- * in the station case we need to be able to cancel_work_sync()
- * something that also takes the lock.
- */
-
- sdata_lock(sdata);
- ieee80211_vif_update_links(sdata, links, 0, 0);
- sdata_unlock(sdata);
-
- ieee80211_free_links(sdata, links);
-}
-
static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
u16 active_links)
{
@@ -447,17 +435,18 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
return 0;
}
-int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links)
+int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_local *local = sdata->local;
u16 old_active;
int ret;
- sdata_assert_lock(sdata);
- mutex_lock(&local->sta_mtx);
- mutex_lock(&local->mtx);
- mutex_lock(&local->key_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
+ if (!drv_can_activate_links(local, sdata, active_links))
+ return -EINVAL;
+
old_active = sdata->vif.active_links;
if (old_active & active_links) {
/*
@@ -473,21 +462,6 @@ int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links)
/* otherwise switch directly */
ret = _ieee80211_set_active_links(sdata, active_links);
}
- mutex_unlock(&local->key_mtx);
- mutex_unlock(&local->mtx);
- mutex_unlock(&local->sta_mtx);
-
- return ret;
-}
-
-int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links)
-{
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- int ret;
-
- sdata_lock(sdata);
- ret = __ieee80211_set_active_links(vif, active_links);
- sdata_unlock(sdata);
return ret;
}
@@ -512,6 +486,6 @@ void ieee80211_set_active_links_async(struct ieee80211_vif *vif,
return;
sdata->desired_active_links = active_links;
- schedule_work(&sdata->activate_links_work);
+ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->activate_links_work);
}
EXPORT_SYMBOL_GPL(ieee80211_set_active_links_async);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 24315d7b3126..f2ece7793573 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -84,7 +84,8 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
local->filter_flags = new_flags & ~(1<<31);
}
-static void ieee80211_reconfig_filter(struct work_struct *work)
+static void ieee80211_reconfig_filter(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, reconfig_filter);
@@ -206,7 +207,8 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
BSS_CHANGED_PS |\
BSS_CHANGED_IBSS |\
BSS_CHANGED_ARP_FILTER |\
- BSS_CHANGED_SSID)
+ BSS_CHANGED_SSID |\
+ BSS_CHANGED_MLD_VALID_LINKS)
void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
u64 changed)
@@ -317,7 +319,7 @@ static void ieee80211_tasklet_handler(struct tasklet_struct *t)
break;
case IEEE80211_TX_STATUS_MSG:
skb->pkt_type = 0;
- ieee80211_tx_status(&local->hw, skb);
+ ieee80211_tx_status_skb(&local->hw, skb);
break;
default:
WARN(1, "mac80211: Packet is of unknown type %d\n",
@@ -335,14 +337,12 @@ static void ieee80211_restart_work(struct work_struct *work)
struct ieee80211_sub_if_data *sdata;
int ret;
- /* wait for scan work complete */
flush_workqueue(local->workqueue);
- flush_work(&local->sched_scan_stopped_work);
- flush_work(&local->radar_detected_work);
rtnl_lock();
/* we might do interface manipulations, so need both */
wiphy_lock(local->hw.wiphy);
+ wiphy_work_flush(local->hw.wiphy, NULL);
WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
"%s called with hardware scan in progress\n", __func__);
@@ -366,21 +366,19 @@ static void ieee80211_restart_work(struct work_struct *work)
*/
wiphy_work_cancel(local->hw.wiphy,
&sdata->u.mgd.csa_connection_drop_work);
- if (sdata->vif.bss_conf.csa_active) {
- sdata_lock(sdata);
+ if (sdata->vif.bss_conf.csa_active)
ieee80211_sta_connection_lost(sdata,
WLAN_REASON_UNSPECIFIED,
false);
- sdata_unlock(sdata);
- }
}
- flush_delayed_work(&sdata->dec_tailroom_needed_wk);
+ wiphy_delayed_work_flush(local->hw.wiphy,
+ &sdata->dec_tailroom_needed_wk);
}
ieee80211_scan_cancel(local);
/* make sure any new ROC will consider local->in_reconfig */
- flush_delayed_work(&local->roc_work);
- flush_work(&local->hw_roc_done);
+ wiphy_delayed_work_flush(local->hw.wiphy, &local->roc_work);
+ wiphy_work_flush(local->hw.wiphy, &local->hw_roc_done);
/* wait for all packet processing to be done */
synchronize_net();
@@ -439,7 +437,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
if (!wdev)
return NOTIFY_DONE;
- if (wdev->wiphy != local->hw.wiphy)
+ if (wdev->wiphy != local->hw.wiphy || !wdev->registered)
return NOTIFY_DONE;
sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
@@ -454,7 +452,25 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
return NOTIFY_DONE;
ifmgd = &sdata->u.mgd;
- sdata_lock(sdata);
+
+ /*
+ * The nested here is needed to convince lockdep that this is
+ * all OK. Yes, we lock the wiphy mutex here while we already
+ * hold the notifier rwsem, that's the normal case. And yes,
+ * we also acquire the notifier rwsem again when unregistering
+ * a netdev while we already hold the wiphy mutex, so it does
+ * look like a typical ABBA deadlock.
+ *
+ * However, both of these things happen with the RTNL held
+ * already. Therefore, they can't actually happen, since the
+ * lock orders really are ABC and ACB, which is fine due to
+ * the RTNL (A).
+ *
+ * We still need to prevent recursion, which is accomplished
+ * by the !wdev->registered check above.
+ */
+ mutex_lock_nested(&local->hw.wiphy->mtx, 1);
+ __acquire(&local->hw.wiphy->mtx);
/* Copy the addresses to the vif config list */
ifa = rtnl_dereference(idev->ifa_list);
@@ -471,7 +487,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
if (ifmgd->associated)
ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_ARP_FILTER);
- sdata_unlock(sdata);
+ wiphy_unlock(local->hw.wiphy);
return NOTIFY_OK;
}
@@ -784,9 +800,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
__hw_addr_init(&local->mc_list);
mutex_init(&local->iflist_mtx);
- mutex_init(&local->mtx);
-
- mutex_init(&local->key_mtx);
spin_lock_init(&local->filter_lock);
spin_lock_init(&local->rx_path_lock);
spin_lock_init(&local->queue_stop_reason_lock);
@@ -807,26 +820,25 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
spin_lock_init(&local->handle_wake_tx_queue_lock);
INIT_LIST_HEAD(&local->chanctx_list);
- mutex_init(&local->chanctx_mtx);
- INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work);
+ wiphy_delayed_work_init(&local->scan_work, ieee80211_scan_work);
INIT_WORK(&local->restart_work, ieee80211_restart_work);
- INIT_WORK(&local->radar_detected_work,
- ieee80211_dfs_radar_detected_work);
+ wiphy_work_init(&local->radar_detected_work,
+ ieee80211_dfs_radar_detected_work);
- INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter);
+ wiphy_work_init(&local->reconfig_filter, ieee80211_reconfig_filter);
local->smps_mode = IEEE80211_SMPS_OFF;
- INIT_WORK(&local->dynamic_ps_enable_work,
- ieee80211_dynamic_ps_enable_work);
- INIT_WORK(&local->dynamic_ps_disable_work,
- ieee80211_dynamic_ps_disable_work);
+ wiphy_work_init(&local->dynamic_ps_enable_work,
+ ieee80211_dynamic_ps_enable_work);
+ wiphy_work_init(&local->dynamic_ps_disable_work,
+ ieee80211_dynamic_ps_disable_work);
timer_setup(&local->dynamic_ps_timer, ieee80211_dynamic_ps_timer, 0);
- INIT_WORK(&local->sched_scan_stopped_work,
- ieee80211_sched_scan_stopped_work);
+ wiphy_work_init(&local->sched_scan_stopped_work,
+ ieee80211_sched_scan_stopped_work);
spin_lock_init(&local->ack_status_lock);
idr_init(&local->ack_status_frames);
@@ -1055,6 +1067,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
supp_he = false;
supp_eht = false;
for (band = 0; band < NUM_NL80211_BANDS; band++) {
+ const struct ieee80211_sband_iftype_data *iftd;
struct ieee80211_supported_band *sband;
sband = local->hw.wiphy->bands[band];
@@ -1101,11 +1114,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
supp_ht = supp_ht || sband->ht_cap.ht_supported;
supp_vht = supp_vht || sband->vht_cap.vht_supported;
- for (i = 0; i < sband->n_iftype_data; i++) {
- const struct ieee80211_sband_iftype_data *iftd;
-
- iftd = &sband->iftype_data[i];
-
+ for_each_sband_iftype_data(sband, i, iftd) {
supp_he = supp_he || iftd->he_cap.has_he;
supp_eht = supp_eht || iftd->eht_cap.has_eht;
}
@@ -1396,6 +1405,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
debugfs_hw_add(local);
rate_control_add_debugfs(local);
+ ieee80211_check_wbrf_support(local);
+
rtnl_lock();
wiphy_lock(hw->wiphy);
@@ -1446,6 +1457,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
ieee80211_remove_interfaces(local);
rtnl_unlock();
fail_rate:
+ ieee80211_txq_teardown_flows(local);
fail_flows:
ieee80211_led_exit(local);
destroy_workqueue(local->workqueue);
@@ -1482,13 +1494,17 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
*/
ieee80211_remove_interfaces(local);
+ ieee80211_txq_teardown_flows(local);
+
+ wiphy_lock(local->hw.wiphy);
+ wiphy_delayed_work_cancel(local->hw.wiphy, &local->roc_work);
+ wiphy_work_cancel(local->hw.wiphy, &local->reconfig_filter);
+ wiphy_work_cancel(local->hw.wiphy, &local->sched_scan_stopped_work);
+ wiphy_work_cancel(local->hw.wiphy, &local->radar_detected_work);
+ wiphy_unlock(local->hw.wiphy);
rtnl_unlock();
- cancel_delayed_work_sync(&local->roc_work);
cancel_work_sync(&local->restart_work);
- cancel_work_sync(&local->reconfig_filter);
- flush_work(&local->sched_scan_stopped_work);
- flush_work(&local->radar_detected_work);
ieee80211_clear_tx_pending(local);
rate_control_deinitialize(local);
@@ -1519,7 +1535,6 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
enum nl80211_band band;
mutex_destroy(&local->iflist_mtx);
- mutex_destroy(&local->mtx);
if (local->wiphy_ciphers_allocated) {
kfree(local->hw.wiphy->cipher_suites);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index af8c5fc2db14..fccbcde3359a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -56,6 +56,8 @@ static void ieee80211_mesh_housekeeping_timer(struct timer_list *t)
*
* This function checks if the mesh configuration of a mesh point matches the
* local mesh configuration, i.e. if both nodes belong to the same mesh network.
+ *
+ * Returns: %true if both nodes belong to the same mesh
*/
bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *ie)
@@ -119,6 +121,8 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
* mesh_peer_accepts_plinks - check if an mp is willing to establish peer links
*
* @ie: information elements of a management frame from the mesh peer
+ *
+ * Returns: %true if the mesh peer is willing to establish peer links
*/
bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie)
{
@@ -858,7 +862,7 @@ bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata,
* @meshsa: source address in the mesh. Same as TA, as frame is
* locally originated.
*
- * Return the length of the 802.11 (does not include a mesh control header)
+ * Returns: the length of the 802.11 frame header (excludes mesh control header)
*/
int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
const u8 *meshda, const u8 *meshsa)
@@ -891,7 +895,7 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
* @addr6: 2nd address in the ae header, which corresponds to addr6 of the
* mesh frame
*
- * Return the header length.
+ * Returns: the header length
*/
unsigned int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata,
struct ieee80211s_hdr *meshhdr,
@@ -1175,7 +1179,7 @@ void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata,
/* if we race with running work, worst case this work becomes a noop */
for_each_set_bit(bit, &bits, sizeof(changed) * BITS_PER_BYTE)
- set_bit(bit, &ifmsh->mbss_changed);
+ set_bit(bit, ifmsh->mbss_changed);
set_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags);
wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
}
@@ -1257,7 +1261,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
/* clear any mesh work (for next join) we may have accrued */
ifmsh->wrkq_flags = 0;
- ifmsh->mbss_changed = 0;
+ memset(ifmsh->mbss_changed, 0, sizeof(ifmsh->mbss_changed));
local->fif_other_bss--;
atomic_dec(&local->iff_allmultis);
@@ -1291,7 +1295,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
ieee80211_conn_flags_t conn_flags = 0;
u32 vht_cap_info = 0;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
sband = ieee80211_get_sband(sdata);
if (!sband)
@@ -1559,7 +1563,7 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata,
struct mesh_csa_settings *tmp_csa_settings;
int ret = 0;
- lockdep_assert_held(&sdata->wdev.mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
tmp_csa_settings = kmalloc(sizeof(*tmp_csa_settings),
GFP_ATOMIC);
@@ -1691,11 +1695,11 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt;
u16 stype;
- sdata_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
/* mesh already went down */
if (!sdata->u.mesh.mesh_id_len)
- goto out;
+ return;
rx_status = IEEE80211_SKB_RXCB(skb);
mgmt = (struct ieee80211_mgmt *) skb->data;
@@ -1714,8 +1718,6 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status);
break;
}
-out:
- sdata_unlock(sdata);
}
static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata)
@@ -1724,9 +1726,9 @@ static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata)
u32 bit;
u64 changed = 0;
- for_each_set_bit(bit, &ifmsh->mbss_changed,
+ for_each_set_bit(bit, ifmsh->mbss_changed,
sizeof(changed) * BITS_PER_BYTE) {
- clear_bit(bit, &ifmsh->mbss_changed);
+ clear_bit(bit, ifmsh->mbss_changed);
changed |= BIT(bit);
}
@@ -1745,11 +1747,11 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- sdata_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
/* mesh already went down */
if (!sdata->u.mesh.mesh_id_len)
- goto out;
+ return;
if (ifmsh->preq_queue_len &&
time_after(jiffies,
@@ -1767,8 +1769,6 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata)
if (test_and_clear_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags))
mesh_bss_info_changed(sdata);
-out:
- sdata_unlock(sdata);
}
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 51369072984e..024f48db6b05 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -151,7 +151,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
break;
default:
kfree_skb(skb);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
*pos++ = ie_len;
*pos++ = flags;
@@ -230,6 +230,8 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata,
* Note: This function may be called with driver locks taken that the driver
* also acquires in the TX path. To avoid a deadlock we don't transmit the
* frame directly but add it to the pending queue instead.
+ *
+ * Returns: 0 on success
*/
int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
u8 ttl, const u8 *target, u32 target_sn,
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index d32e304eeb4b..735edde1bd81 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
+ * Copyright (C) 2023 Intel Corporation
* Author: Luis Carlos Cobo <luisca@cozybit.com>
*/
@@ -173,6 +174,11 @@ static void prepare_for_gate(struct sk_buff *skb, char *dst_addr,
/**
* mesh_path_move_to_queue - Move or copy frames from one mpath queue to another
*
+ * @gate_mpath: An active mpath the frames will be sent to (i.e. the gate)
+ * @from_mpath: The failed mpath
+ * @copy: When true, copy all the frames to the new mpath queue. When false,
+ * move them.
+ *
* This function is used to transfer or copy frames from an unresolved mpath to
* a gate mpath. The function also adds the Address Extension field and
* updates the next hop.
@@ -181,11 +187,6 @@ static void prepare_for_gate(struct sk_buff *skb, char *dst_addr,
* destination addresses are updated.
*
* The gate mpath must be an active mpath with a valid mpath->next_hop.
- *
- * @gate_mpath: An active mpath the frames will be sent to (i.e. the gate)
- * @from_mpath: The failed mpath
- * @copy: When true, copy all the frames to the new mpath queue. When false,
- * move them.
*/
static void mesh_path_move_to_queue(struct mesh_path *gate_mpath,
struct mesh_path *from_mpath,
@@ -330,6 +331,8 @@ mpp_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx)
/**
* mesh_path_add_gate - add the given mpath to a mesh gate to our path table
* @mpath: gate path to add to table
+ *
+ * Returns: 0 on success, -EEXIST
*/
int mesh_path_add_gate(struct mesh_path *mpath)
{
@@ -388,6 +391,8 @@ static void mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath)
/**
* mesh_gate_num - number of gates known to this interface
* @sdata: subif data
+ *
+ * Returns: The number of gates
*/
int mesh_gate_num(struct ieee80211_sub_if_data *sdata)
{
@@ -648,7 +653,7 @@ void mesh_fast_tx_flush_addr(struct ieee80211_sub_if_data *sdata,
cache = &sdata->u.mesh.tx_cache;
spin_lock_bh(&cache->walk_lock);
- entry = rhashtable_lookup(&cache->rht, addr, fast_tx_rht_params);
+ entry = rhashtable_lookup_fast(&cache->rht, addr, fast_tx_rht_params);
if (entry)
mesh_fast_tx_entry_free(cache, entry);
spin_unlock_bh(&cache->walk_lock);
@@ -671,10 +676,10 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
if (ether_addr_equal(dst, sdata->vif.addr))
/* never add ourselves as neighbours */
- return ERR_PTR(-ENOTSUPP);
+ return ERR_PTR(-EOPNOTSUPP);
if (is_multicast_ether_addr(dst))
- return ERR_PTR(-ENOTSUPP);
+ return ERR_PTR(-EOPNOTSUPP);
if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0)
return ERR_PTR(-ENOSPC);
@@ -714,10 +719,10 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata,
if (ether_addr_equal(dst, sdata->vif.addr))
/* never add ourselves as neighbours */
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
if (is_multicast_ether_addr(dst))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
new_mpath = mesh_path_new(sdata, dst, GFP_ATOMIC);
@@ -861,10 +866,9 @@ static void table_flush_by_iface(struct mesh_table *tbl)
/**
* mesh_path_flush_by_iface - Deletes all mesh paths associated with a given iface
*
- * This function deletes both mesh paths as well as mesh portal paths.
- *
* @sdata: interface data to match
*
+ * This function deletes both mesh paths as well as mesh portal paths.
*/
void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
{
@@ -944,6 +948,8 @@ void mesh_path_tx_pending(struct mesh_path *mpath)
* queue to that gate's queue. If there are more than one gates, the frames
* are copied from each gate to the next. After frames are copied, the
* mpath queues are emptied onto the transmission queue.
+ *
+ * Returns: 0 on success, -EHOSTUNREACH
*/
int mesh_path_send_to_gates(struct mesh_path *mpath)
{
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index f3d5bb0a59f1..28bf794f67f8 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -153,6 +153,8 @@ out:
* selected if any non-HT peers are present in our MBSS. 20MHz-protection mode
* is selected if all peers in our 20/40MHz MBSS support HT and at least one
* HT20 peer is present. Otherwise no-protection mode is selected.
+ *
+ * Returns: BSS_CHANGED_HT or 0 for no change
*/
static u64 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
{
@@ -362,7 +364,7 @@ free:
* Mesh paths with this peer as next hop should be flushed
* by the caller outside of plink_lock.
*
- * Returns beacon changed flag if the beacon content changed.
+ * Returns: beacon changed flag if the beacon content changed.
*
* Locking: the caller must hold sta->mesh->plink_lock
*/
@@ -390,6 +392,8 @@ static u64 __mesh_plink_deactivate(struct sta_info *sta)
* @sta: mesh peer link to deactivate
*
* All mesh paths with this peer as next hop will be flushed
+ *
+ * Returns: beacon changed flag if the beacon content changed.
*/
u64 mesh_plink_deactivate(struct sta_info *sta)
{
@@ -451,7 +455,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
changed |= IEEE80211_RC_BW_CHANGED;
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
- elems->vht_cap_elem,
+ elems->vht_cap_elem, NULL,
&sta->deflink);
ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap,
@@ -1064,8 +1068,8 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata,
case WLAN_SP_MESH_PEERING_OPEN:
if (!matches_local)
event = OPN_RJCT;
- if (!mesh_plink_free_count(sdata) ||
- (sta->mesh->plid && sta->mesh->plid != plid))
+ else if (!mesh_plink_free_count(sdata) ||
+ (sta->mesh->plid && sta->mesh->plid != plid))
event = OPN_IGNR;
else
event = OPN_ACPT;
@@ -1073,9 +1077,9 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata,
case WLAN_SP_MESH_PEERING_CONFIRM:
if (!matches_local)
event = CNF_RJCT;
- if (!mesh_plink_free_count(sdata) ||
- sta->mesh->llid != llid ||
- (sta->mesh->plid && sta->mesh->plid != plid))
+ else if (!mesh_plink_free_count(sdata) ||
+ sta->mesh->llid != llid ||
+ (sta->mesh->plid && sta->mesh->plid != plid))
event = CNF_IGNR;
else
event = CNF_ACPT;
@@ -1243,6 +1247,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
return;
}
elems = ieee802_11_parse_elems(baseaddr, len - baselen, true, NULL);
- mesh_process_plink_frame(sdata, mgmt, elems, rx_status);
- kfree(elems);
+ if (elems) {
+ mesh_process_plink_frame(sdata, mgmt, elems, rx_status);
+ kfree(elems);
+ }
}
diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
index 35eacca43e49..20e022a03933 100644
--- a/net/mac80211/mesh_ps.c
+++ b/net/mac80211/mesh_ps.c
@@ -15,6 +15,8 @@
/**
* mps_qos_null_get - create pre-addressed QoS Null frame for mesh powersave
* @sta: the station to get the frame for
+ *
+ * Returns: A newly allocated SKB
*/
static struct sk_buff *mps_qos_null_get(struct sta_info *sta)
{
@@ -77,6 +79,8 @@ static void mps_qos_null_tx(struct sta_info *sta)
*
* sets the non-peer power mode and triggers the driver PS (re-)configuration
* Return BSS_CHANGED_BEACON if a beacon update is necessary.
+ *
+ * Returns: BSS_CHANGED_BEACON if a beacon update is in order.
*/
u64 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata)
{
@@ -147,7 +151,7 @@ u64 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata)
*
* @sta: mesh STA
* @pm: the power mode to set
- * Return BSS_CHANGED_BEACON if a beacon update is in order.
+ * Returns: BSS_CHANGED_BEACON if a beacon update is in order.
*/
u64 ieee80211_mps_set_sta_local_pm(struct sta_info *sta,
enum nl80211_mesh_power_mode pm)
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index 9e342cc2504c..8cf3f395f52f 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -3,7 +3,7 @@
* Copyright 2011-2012, Pavel Zubarev <pavel.zubarev@gmail.com>
* Copyright 2011-2012, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de>
* Copyright 2011-2012, cozybit Inc.
- * Copyright (C) 2021 Intel Corporation
+ * Copyright (C) 2021,2023 Intel Corporation
*/
#include "ieee80211_i.h"
@@ -37,6 +37,8 @@ struct sync_method {
* mesh_peer_tbtt_adjusting - check if an mp is currently adjusting its TBTT
*
* @cfg: mesh config element from the mesh peer (or %NULL)
+ *
+ * Returns: If the mesh peer is currently adjusting its TBTT
*/
static bool mesh_peer_tbtt_adjusting(const struct ieee80211_meshconf_ie *cfg)
{
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f93eb38ae0b8..2022a26eb881 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
*/
#include <linux/delay.h>
@@ -43,6 +43,9 @@
#define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10)
#define IEEE80211_ASSOC_MAX_TRIES 3
+#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS msecs_to_jiffies(100)
+#define IEEE80211_ADV_TTLM_ST_UNDERFLOW 0xff00
+
static int max_nullfunc_tries = 2;
module_param(max_nullfunc_tries, int, 0644);
MODULE_PARM_DESC(max_nullfunc_tries,
@@ -110,7 +113,8 @@ ieee80211_extract_dis_subch_bmap(const struct ieee80211_eht_operation *eht_oper,
return 0;
/* set 160/320 supported to get the full AP definition */
- ieee80211_chandef_eht_oper(eht_oper, true, true, &ap_chandef);
+ ieee80211_chandef_eht_oper((const void *)eht_oper->optional,
+ true, true, &ap_chandef);
ap_center_freq = ap_chandef.center_freq1;
ap_bw = 20 * BIT(u8_get_bits(info->control,
IEEE80211_EHT_OPER_CHAN_WIDTH));
@@ -134,6 +138,7 @@ ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link,
u16 bitmap, u64 *changed)
{
struct cfg80211_chan_def *chandef = &link->conf->chandef;
+ struct ieee80211_local *local = link->sdata->local;
u16 extracted;
u64 _changed = 0;
@@ -146,7 +151,9 @@ ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link,
bitmap);
if (cfg80211_valid_disable_subchannel_bitmap(&bitmap,
- chandef))
+ chandef) &&
+ !(bitmap && ieee80211_hw_check(&local->hw,
+ DISALLOW_PUNCTURING)))
break;
link->u.mgd.conn_flags |=
ieee80211_chandef_downgrade(chandef);
@@ -175,7 +182,7 @@ ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link,
static void run_again(struct ieee80211_sub_if_data *sdata,
unsigned long timeout)
{
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (!timer_pending(&sdata->u.mgd.timer) ||
time_before(timeout, sdata->u.mgd.timer.expires))
@@ -388,7 +395,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
if (eht_oper && (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) {
struct cfg80211_chan_def eht_chandef = *chandef;
- ieee80211_chandef_eht_oper(eht_oper,
+ ieee80211_chandef_eht_oper((const void *)eht_oper->optional,
eht_chandef.width ==
NL80211_CHAN_WIDTH_160,
false, &eht_chandef);
@@ -594,6 +601,7 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link,
return ret;
}
+ cfg80211_schedule_channels_check(&sdata->wdev);
return 0;
}
@@ -830,7 +838,6 @@ static void ieee80211_assoc_add_rates(struct sk_buff *skb,
struct ieee80211_supported_band *sband,
struct ieee80211_mgd_assoc_data *assoc_data)
{
- unsigned int shift = ieee80211_chanwidth_get_shift(width);
unsigned int rates_len, supp_rates_len;
u32 rates = 0;
int i, count;
@@ -869,8 +876,7 @@ static void ieee80211_assoc_add_rates(struct sk_buff *skb,
count = 0;
for (i = 0; i < sband->n_bitrates; i++) {
if (BIT(i) & rates) {
- int rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
- 5 * (1 << shift));
+ int rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
*pos++ = (u8)rate;
if (++count == 8)
break;
@@ -886,8 +892,7 @@ static void ieee80211_assoc_add_rates(struct sk_buff *skb,
if (BIT(i) & rates) {
int rate;
- rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
- 5 * (1 << shift));
+ rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
*pos++ = (u8)rate;
}
}
@@ -1384,7 +1389,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
struct ieee80211_mgmt *mgmt;
u8 *pos, qos_info, *ie_start;
size_t offset, noffset;
- u16 capab = WLAN_CAPABILITY_ESS, link_capab;
+ u16 capab = 0, link_capab;
__le16 listen_int;
struct element *ext_capa = NULL;
enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
@@ -1401,7 +1406,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
assoc_data->ie,
assoc_data->ie_len);
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
size = local->hw.extra_tx_headroom +
sizeof(*mgmt) + /* bit too much but doesn't matter */
@@ -1531,6 +1536,17 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
*pos++ = assoc_data->ssid_len;
memcpy(pos, assoc_data->ssid, assoc_data->ssid_len);
+ /*
+ * This bit is technically reserved, so it shouldn't matter for either
+ * the AP or us, but it also means we shouldn't set it. However, we've
+ * always set it in the past, and apparently some EHT APs check that
+ * we don't set it. To avoid interoperability issues with old APs that
+ * for some reason check it and want it to be set, set the bit for all
+ * pre-EHT connections as we used to do.
+ */
+ if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT)
+ capab |= WLAN_CAPABILITY_ESS;
+
/* add the elements for the assoc (main) link */
link_capab = capab;
offset = ieee80211_assoc_link_elems(sdata, skb, &link_capab,
@@ -1586,6 +1602,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
ifmgd->assoc_req_ies_len = pos - ie_start;
+ info.link_id = assoc_data->assoc_link_id;
drv_mgd_prepare_tx(local, sdata, &info);
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
@@ -1689,15 +1706,13 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy,
if (!ieee80211_sdata_running(sdata))
return;
- sdata_lock(sdata);
- mutex_lock(&local->mtx);
- mutex_lock(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!ifmgd->associated)
- goto out;
+ return;
if (!link->conf->csa_active)
- goto out;
+ return;
/*
* using reservation isn't immediate as it may be deferred until later
@@ -1713,7 +1728,7 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy,
* reservations
*/
if (link->reserved_ready)
- goto out;
+ return;
ret = ieee80211_link_use_reserved_context(link);
if (ret) {
@@ -1722,10 +1737,8 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy,
ret);
wiphy_work_queue(sdata->local->hw.wiphy,
&ifmgd->csa_connection_drop_work);
- goto out;
}
-
- goto out;
+ return;
}
if (!cfg80211_chandef_identical(&link->conf->chandef,
@@ -1734,18 +1747,13 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy,
"failed to finalize channel switch, disconnecting\n");
wiphy_work_queue(sdata->local->hw.wiphy,
&ifmgd->csa_connection_drop_work);
- goto out;
+ return;
}
link->u.mgd.csa_waiting_bcn = true;
ieee80211_sta_reset_beacon_monitor(sdata);
ieee80211_sta_reset_conn_monitor(sdata);
-
-out:
- mutex_unlock(&local->chanctx_mtx);
- mutex_unlock(&local->mtx);
- sdata_unlock(sdata);
}
static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link)
@@ -1755,7 +1763,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link)
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
int ret;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
WARN_ON(!link->conf->csa_active);
@@ -1773,7 +1781,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link)
*/
link->u.mgd.beacon_crc_valid = false;
- ret = drv_post_channel_switch(sdata);
+ ret = drv_post_channel_switch(link);
if (ret) {
sdata_info(sdata,
"driver post channel switch failed, disconnecting\n");
@@ -1782,28 +1790,38 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link)
return;
}
- cfg80211_ch_switch_notify(sdata->dev, &link->reserved_chandef, 0, 0);
+ cfg80211_ch_switch_notify(sdata->dev, &link->reserved_chandef,
+ link->link_id, 0);
}
-void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
+void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success,
+ unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- if (WARN_ON(ieee80211_vif_is_mld(&sdata->vif)))
- success = false;
+ trace_api_chswitch_done(sdata, success, link_id);
+
+ rcu_read_lock();
- trace_api_chswitch_done(sdata, success);
if (!success) {
sdata_info(sdata,
"driver channel switch failed, disconnecting\n");
wiphy_work_queue(sdata->local->hw.wiphy,
- &ifmgd->csa_connection_drop_work);
+ &sdata->u.mgd.csa_connection_drop_work);
} else {
+ struct ieee80211_link_data *link =
+ rcu_dereference(sdata->link[link_id]);
+
+ if (WARN_ON(!link)) {
+ rcu_read_unlock();
+ return;
+ }
+
wiphy_delayed_work_queue(sdata->local->hw.wiphy,
- &sdata->deflink.u.mgd.chswitch_work,
- 0);
+ &link->u.mgd.chswitch_work, 0);
}
+
+ rcu_read_unlock();
}
EXPORT_SYMBOL(ieee80211_chswitch_done);
@@ -1813,14 +1831,12 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link)
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!local->ops->abort_channel_switch)
return;
- mutex_lock(&local->mtx);
-
- mutex_lock(&local->chanctx_mtx);
ieee80211_link_unreserve_chanctx(link);
- mutex_unlock(&local->chanctx_mtx);
if (link->csa_block_tx)
ieee80211_wake_vif_queues(local, sdata,
@@ -1829,8 +1845,6 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link)
link->csa_block_tx = false;
link->conf->csa_active = false;
- mutex_unlock(&local->mtx);
-
drv_abort_channel_switch(sdata);
}
@@ -1853,7 +1867,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
unsigned long timeout;
int res;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!cbss)
return;
@@ -1875,7 +1889,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
}
if (res < 0)
- goto lock_and_drop_connection;
+ goto drop_connection;
if (beacon && link->conf->csa_active &&
!link->u.mgd.csa_waiting_bcn) {
@@ -1897,7 +1911,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
csa_ie.chandef.chan->center_freq,
csa_ie.chandef.width, csa_ie.chandef.center_freq1,
csa_ie.chandef.center_freq2);
- goto lock_and_drop_connection;
+ goto drop_connection;
}
if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chandef,
@@ -1912,7 +1926,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
csa_ie.chandef.width, csa_ie.chandef.center_freq1,
csa_ie.chandef.freq1_offset,
csa_ie.chandef.center_freq2);
- goto lock_and_drop_connection;
+ goto drop_connection;
}
if (cfg80211_chandef_identical(&csa_ie.chandef,
@@ -1935,10 +1949,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
*/
ieee80211_teardown_tdls_peers(sdata);
- mutex_lock(&local->mtx);
- mutex_lock(&local->chanctx_mtx);
conf = rcu_dereference_protected(link->conf->chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (!conf) {
sdata_info(sdata,
"no channel context assigned to vif?, disconnecting\n");
@@ -1968,7 +1980,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
res);
goto drop_connection;
}
- mutex_unlock(&local->chanctx_mtx);
link->conf->csa_active = true;
link->csa_chandef = csa_ie.chandef;
@@ -1979,7 +1990,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
if (link->csa_block_tx)
ieee80211_stop_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- mutex_unlock(&local->mtx);
cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef,
link->link_id, csa_ie.count,
@@ -1998,9 +2008,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
&link->u.mgd.chswitch_work,
timeout);
return;
- lock_and_drop_connection:
- mutex_lock(&local->mtx);
- mutex_lock(&local->chanctx_mtx);
drop_connection:
/*
* This is just so that the disconnect flow will know that
@@ -2014,8 +2021,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
wiphy_work_queue(sdata->local->hw.wiphy,
&ifmgd->csa_connection_drop_work);
- mutex_unlock(&local->chanctx_mtx);
- mutex_unlock(&local->mtx);
}
static bool
@@ -2211,7 +2216,8 @@ static void ieee80211_change_ps(struct ieee80211_local *local)
conf->flags &= ~IEEE80211_CONF_PS;
ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
del_timer_sync(&local->dynamic_ps_timer);
- cancel_work_sync(&local->dynamic_ps_enable_work);
+ wiphy_work_cancel(local->hw.wiphy,
+ &local->dynamic_ps_enable_work);
}
}
@@ -2308,7 +2314,8 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata)
}
}
-void ieee80211_dynamic_ps_disable_work(struct work_struct *work)
+void ieee80211_dynamic_ps_disable_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local,
@@ -2325,7 +2332,8 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work)
false);
}
-void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
+void ieee80211_dynamic_ps_enable_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local,
@@ -2398,26 +2406,25 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t)
{
struct ieee80211_local *local = from_timer(local, t, dynamic_ps_timer);
- ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work);
+ wiphy_work_queue(local->hw.wiphy, &local->dynamic_ps_enable_work);
}
-void ieee80211_dfs_cac_timer_work(struct work_struct *work)
+void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work)
{
- struct delayed_work *delayed_work = to_delayed_work(work);
struct ieee80211_link_data *link =
- container_of(delayed_work, struct ieee80211_link_data,
- dfs_cac_timer_work);
+ container_of(work, struct ieee80211_link_data,
+ dfs_cac_timer_work.work);
struct cfg80211_chan_def chandef = link->conf->chandef;
struct ieee80211_sub_if_data *sdata = link->sdata;
- mutex_lock(&sdata->local->mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
if (sdata->wdev.cac_started) {
ieee80211_link_release_channel(link);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_FINISHED,
GFP_KERNEL);
}
- mutex_unlock(&sdata->local->mtx);
}
static bool
@@ -2487,8 +2494,10 @@ __ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata)
ac);
tx_tspec->action = TX_TSPEC_ACTION_NONE;
ret = true;
- schedule_delayed_work(&ifmgd->tx_tspec_wk,
- tx_tspec->time_slice_start + HZ - now + 1);
+ wiphy_delayed_work_queue(local->hw.wiphy,
+ &ifmgd->tx_tspec_wk,
+ tx_tspec->time_slice_start +
+ HZ - now + 1);
break;
case TX_TSPEC_ACTION_NONE:
/* nothing now */
@@ -2506,7 +2515,8 @@ void ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata)
BSS_CHANGED_QOS);
}
-static void ieee80211_sta_handle_tspec_ac_params_wk(struct work_struct *work)
+static void ieee80211_sta_handle_tspec_ac_params_wk(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_sub_if_data *sdata;
@@ -2681,7 +2691,7 @@ ieee80211_sta_wmm_params(struct ieee80211_local *local,
static void __ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata)
{
- lockdep_assert_held(&sdata->local->mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
sdata->u.mgd.flags &= ~IEEE80211_STA_CONNECTION_POLL;
ieee80211_run_deferred_scan(sdata->local);
@@ -2689,9 +2699,9 @@ static void __ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata)
static void ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata)
{
- mutex_lock(&sdata->local->mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
__ieee80211_stop_poll(sdata);
- mutex_unlock(&sdata->local->mtx);
}
static u64 ieee80211_handle_bss_capability(struct ieee80211_link_data *link,
@@ -2809,6 +2819,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
u64 vif_changed = BSS_CHANGED_ASSOC;
unsigned int link_id;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
sdata->u.mgd.associated = true;
for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
@@ -2870,9 +2882,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
vif_changed | changed[0]);
}
- mutex_lock(&local->iflist_mtx);
ieee80211_recalc_ps(local);
- mutex_unlock(&local->iflist_mtx);
/* leave this here to not change ordering in non-MLO cases */
if (!ieee80211_vif_is_mld(&sdata->vif))
@@ -2894,7 +2904,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
.subtype = stype,
};
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (WARN_ON_ONCE(tx && !frame_buf))
return;
@@ -2908,6 +2918,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
/* other links will be destroyed */
sdata->deflink.u.mgd.bss = NULL;
+ sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
netif_carrier_off(sdata->dev);
@@ -2945,9 +2956,22 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
* deauthentication frame by calling mgd_prepare_tx, if the
* driver requested so.
*/
- if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) &&
- !sdata->deflink.u.mgd.have_beacon) {
- drv_mgd_prepare_tx(sdata->local, sdata, &info);
+ if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP)) {
+ for (link_id = 0; link_id < ARRAY_SIZE(sdata->link);
+ link_id++) {
+ struct ieee80211_link_data *link;
+
+ link = sdata_dereference(sdata->link[link_id],
+ sdata);
+ if (!link)
+ continue;
+ if (link->u.mgd.have_beacon)
+ break;
+ }
+ if (link_id == IEEE80211_MLD_MAX_NUM_LINKS) {
+ info.link_id = ffs(sdata->vif.active_links) - 1;
+ drv_mgd_prepare_tx(sdata->local, sdata, &info);
+ }
}
ieee80211_send_deauth_disassoc(sdata, sdata->vif.cfg.ap_addr,
@@ -3003,7 +3027,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
sdata->deflink.ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
del_timer_sync(&local->dynamic_ps_timer);
- cancel_work_sync(&local->dynamic_ps_enable_work);
+ wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work);
/* Disable ARP filtering */
if (sdata->vif.cfg.arp_addr_cnt)
@@ -3035,7 +3059,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
ifmgd->flags = 0;
sdata->deflink.u.mgd.conn_flags = 0;
- mutex_lock(&local->mtx);
for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
struct ieee80211_link_data *link;
@@ -3054,17 +3077,19 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
sdata->deflink.csa_block_tx = false;
}
- mutex_unlock(&local->mtx);
/* existing TX TSPEC sessions no longer exist */
memset(ifmgd->tx_tspec, 0, sizeof(ifmgd->tx_tspec));
- cancel_delayed_work_sync(&ifmgd->tx_tspec_wk);
+ wiphy_delayed_work_cancel(local->hw.wiphy, &ifmgd->tx_tspec_wk);
sdata->vif.bss_conf.pwr_reduction = 0;
sdata->vif.bss_conf.tx_pwr_env_num = 0;
memset(sdata->vif.bss_conf.tx_pwr_env, 0,
sizeof(sdata->vif.bss_conf.tx_pwr_env));
+ memset(&sdata->u.mgd.ttlm_info, 0,
+ sizeof(sdata->u.mgd.ttlm_info));
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work);
ieee80211_vif_set_links(sdata, 0, 0);
}
@@ -3073,18 +3098,17 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!(ifmgd->flags & IEEE80211_STA_CONNECTION_POLL))
- goto out;
+ return;
__ieee80211_stop_poll(sdata);
- mutex_lock(&local->iflist_mtx);
ieee80211_recalc_ps(local);
- mutex_unlock(&local->iflist_mtx);
if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
- goto out;
+ return;
/*
* We've received a probe response, but are not sure whether
@@ -3096,8 +3120,6 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
mod_timer(&ifmgd->conn_mon_timer,
round_jiffies_up(jiffies +
IEEE80211_CONNECTION_IDLE_TIME));
-out:
- mutex_unlock(&local->mtx);
}
static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata,
@@ -3126,7 +3148,8 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata,
if (tx_tspec->downgraded) {
tx_tspec->action = TX_TSPEC_ACTION_STOP_DOWNGRADE;
- schedule_delayed_work(&ifmgd->tx_tspec_wk, 0);
+ wiphy_delayed_work_queue(sdata->local->hw.wiphy,
+ &ifmgd->tx_tspec_wk, 0);
}
}
@@ -3138,7 +3161,8 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata,
if (tx_tspec->consumed_tx_time >= tx_tspec->admitted_time) {
tx_tspec->downgraded = true;
tx_tspec->action = TX_TSPEC_ACTION_DOWNGRADE;
- schedule_delayed_work(&ifmgd->tx_tspec_wk, 0);
+ wiphy_delayed_work_queue(sdata->local->hw.wiphy,
+ &ifmgd->tx_tspec_wk, 0);
}
}
@@ -3179,6 +3203,8 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
u8 unicast_limit = max(1, max_probe_tries - 3);
struct sta_info *sta;
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
if (WARN_ON(ieee80211_vif_is_mld(&sdata->vif)))
return;
@@ -3200,11 +3226,9 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
ifmgd->probe_send_count++;
if (dst) {
- mutex_lock(&sdata->local->sta_mtx);
sta = sta_info_get(sdata, dst);
if (!WARN_ON(!sta))
ieee80211_check_fast_rx(sta);
- mutex_unlock(&sdata->local->sta_mtx);
}
if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) {
@@ -3227,29 +3251,24 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
bool already = false;
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
if (WARN_ON_ONCE(ieee80211_vif_is_mld(&sdata->vif)))
return;
if (!ieee80211_sdata_running(sdata))
return;
- sdata_lock(sdata);
-
if (!ifmgd->associated)
- goto out;
-
- mutex_lock(&sdata->local->mtx);
+ return;
- if (sdata->local->tmp_channel || sdata->local->scanning) {
- mutex_unlock(&sdata->local->mtx);
- goto out;
- }
+ if (sdata->local->tmp_channel || sdata->local->scanning)
+ return;
if (sdata->local->suspending) {
/* reschedule after resume */
- mutex_unlock(&sdata->local->mtx);
ieee80211_reset_ap_probe(sdata);
- goto out;
+ return;
}
if (beacon) {
@@ -3276,19 +3295,13 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
ifmgd->flags |= IEEE80211_STA_CONNECTION_POLL;
- mutex_unlock(&sdata->local->mtx);
-
if (already)
- goto out;
+ return;
- mutex_lock(&sdata->local->iflist_mtx);
ieee80211_recalc_ps(sdata->local);
- mutex_unlock(&sdata->local->iflist_mtx);
ifmgd->probe_send_count = 0;
ieee80211_mgd_probe_ap_send(sdata);
- out:
- sdata_unlock(sdata);
}
struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
@@ -3301,12 +3314,12 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
const struct element *ssid;
int ssid_len;
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION ||
ieee80211_vif_is_mld(&sdata->vif)))
return NULL;
- sdata_assert_lock(sdata);
-
if (ifmgd->associated)
cbss = sdata->deflink.u.mgd.bss;
else if (ifmgd->auth_data)
@@ -3353,13 +3366,15 @@ static void ieee80211_report_disconnect(struct ieee80211_sub_if_data *sdata,
drv_event_callback(sdata->local, sdata, &event);
}
-static void ___ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
+static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
bool tx;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!ifmgd->associated)
return;
@@ -3395,7 +3410,6 @@ static void ___ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
WLAN_REASON_DEAUTH_LEAVING :
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
tx, frame_buf);
- mutex_lock(&local->mtx);
/* the other links will be destroyed */
sdata->vif.bss_conf.csa_active = false;
sdata->deflink.u.mgd.csa_waiting_bcn = false;
@@ -3404,7 +3418,6 @@ static void ___ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
IEEE80211_QUEUE_STOP_REASON_CSA);
sdata->deflink.csa_block_tx = false;
}
- mutex_unlock(&local->mtx);
ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx,
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
@@ -3412,13 +3425,6 @@ static void ___ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
ifmgd->reconnect = false;
}
-static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
-{
- sdata_lock(sdata);
- ___ieee80211_disconnect(sdata);
- sdata_unlock(sdata);
-}
-
static void ieee80211_beacon_connection_loss_work(struct wiphy *wiphy,
struct wiphy_work *work)
{
@@ -3500,7 +3506,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (!assoc) {
/*
@@ -3518,10 +3524,8 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
BSS_CHANGED_BSSID);
sdata->u.mgd.flags = 0;
- mutex_lock(&sdata->local->mtx);
ieee80211_link_release_channel(&sdata->deflink);
ieee80211_vif_set_links(sdata, 0, 0);
- mutex_unlock(&sdata->local->mtx);
}
cfg80211_put_bss(sdata->local->hw.wiphy, auth_data->bss);
@@ -3541,7 +3545,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (status != ASSOC_SUCCESS) {
/*
@@ -3577,10 +3581,8 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
cfg80211_assoc_failure(sdata->dev, &data);
}
- mutex_lock(&sdata->local->mtx);
ieee80211_link_release_channel(&sdata->deflink);
ieee80211_vif_set_links(sdata, 0, 0);
- mutex_unlock(&sdata->local->mtx);
}
kfree(assoc_data);
@@ -3597,6 +3599,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
u32 tx_flags = 0;
struct ieee80211_prep_tx_info info = {
.subtype = IEEE80211_STYPE_AUTH,
+ .link_id = auth_data->link_id,
};
pos = mgmt->u.auth.variable;
@@ -3622,7 +3625,8 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata)
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
const u8 *ap_addr = ifmgd->auth_data->ap_addr;
struct sta_info *sta;
- bool result = true;
+
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
sdata_info(sdata, "authenticated\n");
ifmgd->auth_data->done = true;
@@ -3631,22 +3635,17 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata)
run_again(sdata, ifmgd->auth_data->timeout);
/* move station state to auth */
- mutex_lock(&sdata->local->sta_mtx);
sta = sta_info_get(sdata, ap_addr);
if (!sta) {
WARN_ONCE(1, "%s: STA %pM not found", sdata->name, ap_addr);
- result = false;
- goto out;
+ return false;
}
if (sta_info_move_state(sta, IEEE80211_STA_AUTH)) {
sdata_info(sdata, "failed moving %pM to auth\n", ap_addr);
- result = false;
- goto out;
+ return false;
}
-out:
- mutex_unlock(&sdata->local->sta_mtx);
- return result;
+ return true;
}
static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
@@ -3662,7 +3661,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
.subtype = IEEE80211_STYPE_AUTH,
};
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (len < 24 + 6)
return;
@@ -3820,7 +3819,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (len < 24 + 2)
return;
@@ -3864,7 +3863,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u16 reason_code;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (len < 24 + 2)
return;
@@ -3894,8 +3893,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
u8 *supp_rates, unsigned int supp_rates_len,
u32 *rates, u32 *basic_rates,
bool *have_higher_than_11mbit,
- int *min_rate, int *min_rate_index,
- int shift)
+ int *min_rate, int *min_rate_index)
{
int i, j;
@@ -3903,7 +3901,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
int rate = supp_rates[i] & 0x7f;
bool is_basic = !!(supp_rates[i] & 0x80);
- if ((rate * 5 * (1 << shift)) > 110)
+ if ((rate * 5) > 110)
*have_higher_than_11mbit = true;
/*
@@ -3927,7 +3925,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
br = &sband->bitrates[j];
- brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5);
+ brate = DIV_ROUND_UP(br->bitrate, 5);
if (brate == rate) {
*rates |= BIT(j);
if (is_basic)
@@ -4202,10 +4200,33 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
elems->ht_cap_elem,
link_sta);
- if (elems->vht_cap_elem && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT))
+ if (elems->vht_cap_elem &&
+ !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
+ const struct ieee80211_vht_cap *bss_vht_cap = NULL;
+ const struct cfg80211_bss_ies *ies;
+
+ /*
+ * Cisco AP module 9115 with FW 17.3 has a bug and sends a
+ * too large maximum MPDU length in the association response
+ * (indicating 12k) that it cannot actually process ...
+ * Work around that.
+ */
+ rcu_read_lock();
+ ies = rcu_dereference(cbss->ies);
+ if (ies) {
+ const struct element *elem;
+
+ elem = cfg80211_find_elem(WLAN_EID_VHT_CAPABILITY,
+ ies->data, ies->len);
+ if (elem && elem->datalen >= sizeof(*bss_vht_cap))
+ bss_vht_cap = (const void *)elem->data;
+ }
+
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
elems->vht_cap_elem,
- link_sta);
+ bss_vht_cap, link_sta);
+ rcu_read_unlock();
+ }
if (elems->he_operation && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
elems->he_cap) {
@@ -4371,8 +4392,6 @@ static int ieee80211_mgd_setup_link_sta(struct ieee80211_link_data *link,
u32 rates = 0, basic_rates = 0;
bool have_higher_than_11mbit = false;
int min_rate = INT_MAX, min_rate_index = -1;
- /* this is clearly wrong for MLO but we'll just remove it later */
- int shift = ieee80211_vif_get_shift(&sdata->vif);
struct ieee80211_supported_band *sband;
memcpy(link_sta->addr, cbss->bssid, ETH_ALEN);
@@ -4388,7 +4407,7 @@ static int ieee80211_mgd_setup_link_sta(struct ieee80211_link_data *link,
ieee80211_get_rates(sband, bss->supp_rates, bss->supp_rates_len,
&rates, &basic_rates, &have_higher_than_11mbit,
- &min_rate, &min_rate_index, shift);
+ &min_rate, &min_rate_index);
/*
* This used to be a workaround for basic rates missing
@@ -4794,6 +4813,7 @@ ieee80211_verify_sta_eht_mcs_support(struct ieee80211_sub_if_data *sdata,
static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
struct ieee80211_link_data *link,
struct cfg80211_bss *cbss,
+ bool mlo,
ieee80211_conn_flags_t *conn_flags)
{
struct ieee80211_local *local = sdata->local;
@@ -4807,6 +4827,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
struct cfg80211_chan_def chandef;
bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ;
+ bool supports_mlo = false;
struct ieee80211_bss *bss = (void *)cbss->priv;
struct ieee80211_elems_parse_params parse_params = {
.link_id = -1,
@@ -4818,6 +4839,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
u32 i;
bool have_80mhz;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
rcu_read_lock();
ies = rcu_dereference(cbss->ies);
@@ -4958,6 +4981,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
ieee80211_mle_type_ok(eht_ml_elem->data + 1,
IEEE80211_ML_CONTROL_TYPE_BASIC,
eht_ml_elem->datalen - 1)) {
+ supports_mlo = true;
+
sdata->vif.cfg.eml_cap =
ieee80211_mle_get_eml_cap(eht_ml_elem->data + 1);
sdata->vif.cfg.eml_med_sync_delay =
@@ -5013,13 +5038,14 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
return -EINVAL;
}
+ if (mlo && !supports_mlo) {
+ sdata_info(sdata, "Rejecting MLO as it is not supported by AP\n");
+ return -EINVAL;
+ }
+
if (!link)
return 0;
- /* will change later if needed */
- link->smps_mode = IEEE80211_SMPS_OFF;
-
- mutex_lock(&local->mtx);
/*
* If this fails (possibly due to channel context sharing
* on incompatible channels, e.g. 80+80 and 160 sharing the
@@ -5040,7 +5066,6 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
IEEE80211_CHANCTX_SHARED);
}
out:
- mutex_unlock(&local->mtx);
return ret;
}
@@ -5092,7 +5117,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
u16 valid_links = 0, dormant_links = 0;
int err;
- mutex_lock(&sdata->local->sta_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
/*
* station info was already allocated and inserted before
* the association and should be available to us
@@ -5107,9 +5132,10 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
continue;
valid_links |= BIT(link_id);
- if (assoc_data->link[link_id].disabled) {
+ if (assoc_data->link[link_id].disabled)
dormant_links |= BIT(link_id);
- } else if (link_id != assoc_data->assoc_link_id) {
+
+ if (link_id != assoc_data->assoc_link_id) {
err = ieee80211_sta_allocate_link(sta, link_id);
if (err)
goto out_err;
@@ -5124,7 +5150,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
struct ieee80211_link_data *link;
struct link_sta_info *link_sta;
- if (!cbss || assoc_data->link[link_id].disabled)
+ if (!cbss)
continue;
link = sdata_dereference(sdata->link[link_id], sdata);
@@ -5140,7 +5166,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
" (assoc)" : "");
link_sta = rcu_dereference_protected(sta->link[link_id],
- lockdep_is_held(&local->sta_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (WARN_ON(!link_sta))
goto out_err;
@@ -5163,7 +5189,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
link->conf->dtim_period = link->u.mgd.dtim_period ?: 1;
if (link_id != assoc_data->assoc_link_id) {
- err = ieee80211_prep_channel(sdata, link, cbss,
+ err = ieee80211_prep_channel(sdata, link, cbss, true,
&link->u.mgd.conn_flags);
if (err) {
link_info(link, "prep_channel failed\n");
@@ -5227,8 +5253,6 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
if (sdata->wdev.use_4addr)
drv_sta_set_4addr(local, sdata, &sta->sta, true);
- mutex_unlock(&sdata->local->sta_mtx);
-
ieee80211_set_associated(sdata, assoc_data, changed);
/*
@@ -5248,7 +5272,6 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
return true;
out_err:
eth_zero_addr(sdata->vif.cfg.ap_addr);
- mutex_unlock(&sdata->local->sta_mtx);
return false;
}
@@ -5274,13 +5297,13 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
.u.mlme.data = ASSOC_EVENT,
};
struct ieee80211_prep_tx_info info = {};
- struct cfg80211_rx_assoc_resp resp = {
+ struct cfg80211_rx_assoc_resp_data resp = {
.uapsd_queues = -1,
};
u8 ap_mld_addr[ETH_ALEN] __aligned(2);
unsigned int link_id;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (!assoc_data)
return;
@@ -5360,6 +5383,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
assoc_data->ap_addr, tu, ms);
assoc_data->timeout = jiffies + msecs_to_jiffies(ms);
assoc_data->timeout_started = true;
+ assoc_data->comeback = true;
if (ms > IEEE80211_ASSOC_TIMEOUT)
run_again(sdata, assoc_data->timeout);
goto notify_driver;
@@ -5381,33 +5405,24 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
}
if (ieee80211_vif_is_mld(&sdata->vif)) {
+ struct ieee80211_mle_basic_common_info *common;
+
if (!elems->ml_basic) {
sdata_info(sdata,
- "MLO association with %pM but no multi-link element in response!\n",
+ "MLO association with %pM but no (basic) multi-link element in response!\n",
assoc_data->ap_addr);
goto abandon_assoc;
}
- if (le16_get_bits(elems->ml_basic->control,
- IEEE80211_ML_CONTROL_TYPE) !=
- IEEE80211_ML_CONTROL_TYPE_BASIC) {
+ common = (void *)elems->ml_basic->variable;
+
+ if (memcmp(assoc_data->ap_addr,
+ common->mld_mac_addr, ETH_ALEN)) {
sdata_info(sdata,
- "bad multi-link element (control=0x%x)\n",
- le16_to_cpu(elems->ml_basic->control));
+ "AP MLD MAC address mismatch: got %pM expected %pM\n",
+ common->mld_mac_addr,
+ assoc_data->ap_addr);
goto abandon_assoc;
- } else {
- struct ieee80211_mle_basic_common_info *common;
-
- common = (void *)elems->ml_basic->variable;
-
- if (memcmp(assoc_data->ap_addr,
- common->mld_mac_addr, ETH_ALEN)) {
- sdata_info(sdata,
- "AP MLD MAC address mismatch: got %pM expected %pM\n",
- common->mld_mac_addr,
- assoc_data->ap_addr);
- goto abandon_assoc;
- }
}
}
@@ -5429,17 +5444,18 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
struct ieee80211_link_data *link;
- link = sdata_dereference(sdata->link[link_id], sdata);
- if (!link)
- continue;
-
if (!assoc_data->link[link_id].bss)
continue;
resp.links[link_id].bss = assoc_data->link[link_id].bss;
- resp.links[link_id].addr = link->conf->addr;
+ ether_addr_copy(resp.links[link_id].addr,
+ assoc_data->link[link_id].addr);
resp.links[link_id].status = assoc_data->link[link_id].status;
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link)
+ continue;
+
/* get uapsd queues configuration - same for all links */
resp.uapsd_queues = 0;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
@@ -5480,7 +5496,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_link_data *link,
struct ieee80211_bss *bss;
struct ieee80211_channel *channel;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
channel = ieee80211_get_channel_khz(local->hw.wiphy,
ieee80211_rx_status_to_khz(rx_status));
@@ -5507,7 +5523,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_link_data *link,
ifmgd = &sdata->u.mgd;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
/*
* According to Draft P802.11ax D6.0 clause 26.17.2.3.2:
@@ -5674,6 +5690,7 @@ static bool ieee80211_config_puncturing(struct ieee80211_link_data *link,
const struct ieee80211_eht_operation *eht_oper,
u64 *changed)
{
+ struct ieee80211_local *local = link->sdata->local;
u16 bitmap = 0, extracted;
if ((eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) &&
@@ -5705,6 +5722,9 @@ static bool ieee80211_config_puncturing(struct ieee80211_link_data *link,
return false;
}
+ if (bitmap && ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING))
+ return false;
+
ieee80211_handle_puncturing_bitmap(link, eht_oper, bitmap, changed);
return true;
}
@@ -5718,21 +5738,16 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy,
u16 new_valid_links, new_active_links, new_dormant_links;
int ret;
- sdata_lock(sdata);
- if (!sdata->u.mgd.removed_links) {
- sdata_unlock(sdata);
+ if (!sdata->u.mgd.removed_links)
return;
- }
sdata_info(sdata,
"MLO Reconfiguration: work: valid=0x%x, removed=0x%x\n",
sdata->vif.valid_links, sdata->u.mgd.removed_links);
new_valid_links = sdata->vif.valid_links & ~sdata->u.mgd.removed_links;
- if (new_valid_links == sdata->vif.valid_links) {
- sdata_unlock(sdata);
+ if (new_valid_links == sdata->vif.valid_links)
return;
- }
if (!new_valid_links ||
!(new_valid_links & ~sdata->vif.dormant_links)) {
@@ -5748,8 +5763,7 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy,
BIT(ffs(new_valid_links &
~sdata->vif.dormant_links) - 1);
- ret = __ieee80211_set_active_links(&sdata->vif,
- new_active_links);
+ ret = ieee80211_set_active_links(&sdata->vif, new_active_links);
if (ret) {
sdata_info(sdata,
"Failed setting active links\n");
@@ -5764,15 +5778,15 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy,
if (ret)
sdata_info(sdata, "Failed setting valid links\n");
+ ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_MLD_VALID_LINKS);
+
out:
if (!ret)
cfg80211_links_removed(sdata->dev, sdata->u.mgd.removed_links);
else
- ___ieee80211_disconnect(sdata);
+ __ieee80211_disconnect(sdata);
sdata->u.mgd.removed_links = 0;
-
- sdata_unlock(sdata);
}
static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
@@ -5780,7 +5794,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
{
const struct ieee80211_multi_link_elem *ml;
const struct element *sub;
- size_t ml_len;
+ ssize_t ml_len;
unsigned long removed_links = 0;
u16 link_removal_timeout[IEEE80211_MLD_MAX_NUM_LINKS] = {};
u8 link_id;
@@ -5796,6 +5810,8 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
elems->scratch + elems->scratch_len -
elems->scratch_pos,
WLAN_EID_FRAGMENT);
+ if (ml_len < 0)
+ return;
elems->ml_reconf = (const void *)elems->scratch_pos;
elems->ml_reconf_len = ml_len;
@@ -5872,6 +5888,222 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
TU_TO_JIFFIES(delay));
}
+static void ieee80211_tid_to_link_map_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
+{
+ u16 new_active_links, new_dormant_links;
+ struct ieee80211_sub_if_data *sdata =
+ container_of(work, struct ieee80211_sub_if_data,
+ u.mgd.ttlm_work.work);
+ int ret;
+
+ new_active_links = sdata->u.mgd.ttlm_info.map &
+ sdata->vif.valid_links;
+ new_dormant_links = ~sdata->u.mgd.ttlm_info.map &
+ sdata->vif.valid_links;
+ if (!new_active_links) {
+ ieee80211_disconnect(&sdata->vif, false);
+ return;
+ }
+
+ ieee80211_vif_set_links(sdata, sdata->vif.valid_links, 0);
+ new_active_links = BIT(ffs(new_active_links) - 1);
+ ieee80211_set_active_links(&sdata->vif, new_active_links);
+
+ ret = ieee80211_vif_set_links(sdata, sdata->vif.valid_links,
+ new_dormant_links);
+
+ sdata->u.mgd.ttlm_info.active = true;
+ sdata->u.mgd.ttlm_info.switch_time = 0;
+
+ if (!ret)
+ ieee80211_vif_cfg_change_notify(sdata,
+ BSS_CHANGED_MLD_VALID_LINKS);
+}
+
+static u16 ieee80211_get_ttlm(u8 bm_size, u8 *data)
+{
+ if (bm_size == 1)
+ return *data;
+ else
+ return get_unaligned_le16(data);
+}
+
+static int
+ieee80211_parse_adv_t2l(struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_ttlm_elem *ttlm,
+ struct ieee80211_adv_ttlm_info *ttlm_info)
+{
+ /* The element size was already validated in
+ * ieee80211_tid_to_link_map_size_ok()
+ */
+ u8 control, link_map_presence, map_size, tid;
+ u8 *pos;
+
+ memset(ttlm_info, 0, sizeof(*ttlm_info));
+ pos = (void *)ttlm->optional;
+ control = ttlm->control;
+
+ if ((control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP) ||
+ !(control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT))
+ return 0;
+
+ if ((control & IEEE80211_TTLM_CONTROL_DIRECTION) !=
+ IEEE80211_TTLM_DIRECTION_BOTH) {
+ sdata_info(sdata, "Invalid advertised T2L map direction\n");
+ return -EINVAL;
+ }
+
+ link_map_presence = *pos;
+ pos++;
+
+ ttlm_info->switch_time = get_unaligned_le16(pos);
+
+ /* Since ttlm_info->switch_time == 0 means no switch time, bump it
+ * by 1.
+ */
+ if (!ttlm_info->switch_time)
+ ttlm_info->switch_time = 1;
+
+ pos += 2;
+
+ if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) {
+ ttlm_info->duration = pos[0] | pos[1] << 8 | pos[2] << 16;
+ pos += 3;
+ }
+
+ if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE)
+ map_size = 1;
+ else
+ map_size = 2;
+
+ /* According to Draft P802.11be_D3.0 clause 35.3.7.1.7, an AP MLD shall
+ * not advertise a TID-to-link mapping that does not map all TIDs to the
+ * same link set, reject frame if not all links have mapping
+ */
+ if (link_map_presence != 0xff) {
+ sdata_info(sdata,
+ "Invalid advertised T2L mapping presence indicator\n");
+ return -EINVAL;
+ }
+
+ ttlm_info->map = ieee80211_get_ttlm(map_size, pos);
+ if (!ttlm_info->map) {
+ sdata_info(sdata,
+ "Invalid advertised T2L map for TID 0\n");
+ return -EINVAL;
+ }
+
+ pos += map_size;
+
+ for (tid = 1; tid < 8; tid++) {
+ u16 map = ieee80211_get_ttlm(map_size, pos);
+
+ if (map != ttlm_info->map) {
+ sdata_info(sdata, "Invalid advertised T2L map for tid %d\n",
+ tid);
+ return -EINVAL;
+ }
+
+ pos += map_size;
+ }
+ return 0;
+}
+
+static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata,
+ struct ieee802_11_elems *elems,
+ u64 beacon_ts)
+{
+ u8 i;
+ int ret;
+
+ if (!ieee80211_vif_is_mld(&sdata->vif))
+ return;
+
+ if (!elems->ttlm_num) {
+ if (sdata->u.mgd.ttlm_info.switch_time) {
+ /* if a planned TID-to-link mapping was cancelled -
+ * abort it
+ */
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+ &sdata->u.mgd.ttlm_work);
+ } else if (sdata->u.mgd.ttlm_info.active) {
+ /* if no TID-to-link element, set to default mapping in
+ * which all TIDs are mapped to all setup links
+ */
+ ret = ieee80211_vif_set_links(sdata,
+ sdata->vif.valid_links,
+ 0);
+ if (ret) {
+ sdata_info(sdata, "Failed setting valid/dormant links\n");
+ return;
+ }
+ ieee80211_vif_cfg_change_notify(sdata,
+ BSS_CHANGED_MLD_VALID_LINKS);
+ }
+ memset(&sdata->u.mgd.ttlm_info, 0,
+ sizeof(sdata->u.mgd.ttlm_info));
+ return;
+ }
+
+ for (i = 0; i < elems->ttlm_num; i++) {
+ struct ieee80211_adv_ttlm_info ttlm_info;
+ u32 res;
+
+ res = ieee80211_parse_adv_t2l(sdata, elems->ttlm[i],
+ &ttlm_info);
+
+ if (res) {
+ __ieee80211_disconnect(sdata);
+ return;
+ }
+
+ if (ttlm_info.switch_time) {
+ u16 beacon_ts_tu, st_tu, delay;
+ u32 delay_jiffies;
+ u64 mask;
+
+ /* The t2l map switch time is indicated with a partial
+ * TSF value (bits 10 to 25), get the partial beacon TS
+ * as well, and calc the delay to the start time.
+ */
+ mask = GENMASK_ULL(25, 10);
+ beacon_ts_tu = (beacon_ts & mask) >> 10;
+ st_tu = ttlm_info.switch_time;
+ delay = st_tu - beacon_ts_tu;
+
+ /*
+ * If the switch time is far in the future, then it
+ * could also be the previous switch still being
+ * announced.
+ * We can simply ignore it for now, if it is a future
+ * switch the AP will continue to announce it anyway.
+ */
+ if (delay > IEEE80211_ADV_TTLM_ST_UNDERFLOW)
+ return;
+
+ delay_jiffies = TU_TO_JIFFIES(delay);
+
+ /* Link switching can take time, so schedule it
+ * 100ms before to be ready on time
+ */
+ if (delay_jiffies > IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS)
+ delay_jiffies -=
+ IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS;
+ else
+ delay_jiffies = 0;
+
+ sdata->u.mgd.ttlm_info = ttlm_info;
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+ &sdata->u.mgd.ttlm_work);
+ wiphy_delayed_work_queue(sdata->local->hw.wiphy,
+ &sdata->u.mgd.ttlm_work,
+ delay_jiffies);
+ return;
+ }
+ }
+}
+
static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
struct ieee80211_hdr *hdr, size_t len,
struct ieee80211_rx_status *rx_status)
@@ -5900,7 +6132,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
.from_ap = true,
};
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(local->hw.wiphy);
/* Process beacon from the current BSS */
bssid = ieee80211_get_bssid(hdr, len, sdata->vif.type);
@@ -6116,9 +6348,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
changed |= BSS_CHANGED_BEACON_INFO;
link->u.mgd.have_beacon = true;
- mutex_lock(&local->iflist_mtx);
ieee80211_recalc_ps(local);
- mutex_unlock(&local->iflist_mtx);
ieee80211_recalc_ps_vif(sdata);
}
@@ -6135,16 +6365,13 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
le16_to_cpu(mgmt->u.beacon.capab_info),
erp_valid, erp_value);
- mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr);
if (WARN_ON(!sta)) {
- mutex_unlock(&local->sta_mtx);
goto free;
}
link_sta = rcu_dereference_protected(sta->link[link->link_id],
- lockdep_is_held(&local->sta_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (WARN_ON(!link_sta)) {
- mutex_unlock(&local->sta_mtx);
goto free;
}
@@ -6160,7 +6387,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
elems->vht_operation, elems->he_operation,
elems->eht_operation,
elems->s1g_oper, bssid, &changed)) {
- mutex_unlock(&local->sta_mtx);
sdata_info(sdata,
"failed to follow AP %pM bandwidth change, disconnect\n",
bssid);
@@ -6178,7 +6404,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
ieee80211_vht_handle_opmode(sdata, link_sta,
*elems->opmode_notif,
rx_status->band);
- mutex_unlock(&local->sta_mtx);
changed |= ieee80211_handle_pwr_constr(link, chan, mgmt,
elems->country_elem,
@@ -6202,6 +6427,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
}
ieee80211_ml_reconfiguration(sdata, elems);
+ ieee80211_process_adv_ttlm(sdata, elems,
+ le64_to_cpu(mgmt->u.beacon.timestamp));
ieee80211_link_info_change_notify(sdata, link, changed);
free:
@@ -6216,17 +6443,17 @@ void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata,
struct ieee80211_hdr *hdr;
u16 fc;
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
rx_status = (struct ieee80211_rx_status *) skb->cb;
hdr = (struct ieee80211_hdr *) skb->data;
fc = le16_to_cpu(hdr->frame_control);
- sdata_lock(sdata);
switch (fc & IEEE80211_FCTL_STYPE) {
case IEEE80211_STYPE_S1G_BEACON:
ieee80211_rx_mgmt_beacon(link, hdr, skb->len, rx_status);
break;
}
- sdata_unlock(sdata);
}
void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
@@ -6238,17 +6465,17 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
u16 fc;
int ies_len;
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
rx_status = (struct ieee80211_rx_status *) skb->cb;
mgmt = (struct ieee80211_mgmt *) skb->data;
fc = le16_to_cpu(mgmt->frame_control);
- sdata_lock(sdata);
-
if (rx_status->link_valid) {
link = sdata_dereference(sdata->link[rx_status->link_id],
sdata);
if (!link)
- goto out;
+ return;
}
switch (fc & IEEE80211_FCTL_STYPE) {
@@ -6331,8 +6558,6 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
}
break;
}
-out:
- sdata_unlock(sdata);
}
static void ieee80211_sta_timer(struct timer_list *t)
@@ -6367,7 +6592,7 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
.subtype = IEEE80211_STYPE_AUTH,
};
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (WARN_ON_ONCE(!auth_data))
return -EINVAL;
@@ -6390,6 +6615,7 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
if (auth_data->algorithm == WLAN_AUTH_SAE)
info.duration = jiffies_to_msecs(IEEE80211_AUTH_TIMEOUT_SAE);
+ info.link_id = auth_data->link_id;
drv_mgd_prepare_tx(local, sdata, &info);
sdata_info(sdata, "send auth to %pM (try %d/%d)\n",
@@ -6436,7 +6662,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
struct ieee80211_local *local = sdata->local;
int ret;
- sdata_assert_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
assoc_data->tries++;
if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) {
@@ -6492,7 +6718,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- sdata_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (ifmgd->status_received) {
__le16 fc = ifmgd->status_fc;
@@ -6516,8 +6742,18 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
}
ifmgd->auth_data->timeout_started = true;
} else if (ifmgd->assoc_data &&
+ !ifmgd->assoc_data->comeback &&
(ieee80211_is_assoc_req(fc) ||
ieee80211_is_reassoc_req(fc))) {
+ /*
+ * Update association timeout based on the TX status
+ * for the (Re)Association Request frame. Skip this if
+ * we have already processed a (Re)Association Response
+ * frame that indicated need for association comeback
+ * at a specific time in the future. This could happen
+ * if the TX status information is delayed enough for
+ * the response to be received and processed first.
+ */
if (status_acked) {
ifmgd->assoc_data->timeout =
jiffies + IEEE80211_ASSOC_TIMEOUT_SHORT;
@@ -6627,8 +6863,6 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false);
}
}
-
- sdata_unlock(sdata);
}
static void ieee80211_sta_bcn_mon_timer(struct timer_list *t)
@@ -6684,10 +6918,11 @@ static void ieee80211_sta_conn_mon_timer(struct timer_list *t)
return;
}
- ieee80211_queue_work(&local->hw, &ifmgd->monitor_work);
+ wiphy_work_queue(local->hw.wiphy, &sdata->u.mgd.monitor_work);
}
-static void ieee80211_sta_monitor_work(struct work_struct *work)
+static void ieee80211_sta_monitor_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data,
@@ -6703,8 +6938,8 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
/* let's probe the connection once */
if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
- ieee80211_queue_work(&sdata->local->hw,
- &sdata->u.mgd.monitor_work);
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &sdata->u.mgd.monitor_work);
}
}
@@ -6714,7 +6949,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata)
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
- sdata_lock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (ifmgd->auth_data || ifmgd->assoc_data) {
const u8 *ap_addr = ifmgd->auth_data ?
@@ -6766,8 +7001,6 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata)
memcpy(bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
ieee80211_mgd_deauth(sdata, &req);
}
-
- sdata_unlock(sdata);
}
#endif
@@ -6775,11 +7008,10 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- sdata_lock(sdata);
- if (!ifmgd->associated) {
- sdata_unlock(sdata);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
+ if (!ifmgd->associated)
return;
- }
if (sdata->flags & IEEE80211_SDATA_DISCONNECT_RESUME) {
sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME;
@@ -6787,7 +7019,6 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
ieee80211_sta_connection_lost(sdata,
WLAN_REASON_UNSPECIFIED,
true);
- sdata_unlock(sdata);
return;
}
@@ -6797,11 +7028,8 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
ieee80211_sta_connection_lost(sdata,
WLAN_REASON_UNSPECIFIED,
true);
- sdata_unlock(sdata);
return;
}
-
- sdata_unlock(sdata);
}
static void ieee80211_request_smps_mgd_work(struct wiphy *wiphy,
@@ -6811,10 +7039,8 @@ static void ieee80211_request_smps_mgd_work(struct wiphy *wiphy,
container_of(work, struct ieee80211_link_data,
u.mgd.request_smps_work);
- sdata_lock(link->sdata);
__ieee80211_request_smps_mgd(link->sdata, link,
link->u.mgd.driver_smps_mode);
- sdata_unlock(link->sdata);
}
/* interface setup */
@@ -6822,20 +7048,22 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work);
+ wiphy_work_init(&ifmgd->monitor_work, ieee80211_sta_monitor_work);
wiphy_work_init(&ifmgd->beacon_connection_loss_work,
ieee80211_beacon_connection_loss_work);
wiphy_work_init(&ifmgd->csa_connection_drop_work,
ieee80211_csa_connection_drop_work);
- INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work,
- ieee80211_tdls_peer_del_work);
+ wiphy_delayed_work_init(&ifmgd->tdls_peer_del_work,
+ ieee80211_tdls_peer_del_work);
wiphy_delayed_work_init(&ifmgd->ml_reconf_work,
ieee80211_ml_reconf_work);
timer_setup(&ifmgd->timer, ieee80211_sta_timer, 0);
timer_setup(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 0);
timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0);
- INIT_DELAYED_WORK(&ifmgd->tx_tspec_wk,
- ieee80211_sta_handle_tspec_ac_params_wk);
+ wiphy_delayed_work_init(&ifmgd->tx_tspec_wk,
+ ieee80211_sta_handle_tspec_ac_params_wk);
+ wiphy_delayed_work_init(&ifmgd->ttlm_work,
+ ieee80211_tid_to_link_map_work);
ifmgd->flags = 0;
ifmgd->powersave = sdata->wdev.ps;
@@ -6847,6 +7075,16 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
ifmgd->orig_teardown_skb = NULL;
}
+static void ieee80211_recalc_smps_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
+{
+ struct ieee80211_link_data *link =
+ container_of(work, struct ieee80211_link_data,
+ u.mgd.recalc_smps);
+
+ ieee80211_recalc_smps(link->sdata, link);
+}
+
void ieee80211_mgd_setup_link(struct ieee80211_link_data *link)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
@@ -6856,9 +7094,12 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link)
link->u.mgd.p2p_noa_index = -1;
link->u.mgd.conn_flags = 0;
link->conf->bssid = link->u.mgd.bssid;
+ link->smps_mode = IEEE80211_SMPS_OFF;
wiphy_work_init(&link->u.mgd.request_smps_work,
ieee80211_request_smps_mgd_work);
+ wiphy_work_init(&link->u.mgd.recalc_smps,
+ ieee80211_recalc_smps_work);
if (local->hw.wiphy->features & NL80211_FEATURE_DYNAMIC_SMPS)
link->u.mgd.req_smps = IEEE80211_SMPS_AUTOMATIC;
else
@@ -7022,7 +7263,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
}
if (new_sta || override) {
- err = ieee80211_prep_channel(sdata, link, cbss,
+ err = ieee80211_prep_channel(sdata, link, cbss, mlo,
&link->u.mgd.conn_flags);
if (err) {
if (new_sta)
@@ -7067,6 +7308,75 @@ out_err:
return err;
}
+static bool ieee80211_mgd_csa_present(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_bss_ies *ies,
+ u8 cur_channel, bool ignore_ecsa)
+{
+ const struct element *csa_elem, *ecsa_elem;
+ struct ieee80211_channel_sw_ie *csa = NULL;
+ struct ieee80211_ext_chansw_ie *ecsa = NULL;
+
+ if (!ies)
+ return false;
+
+ csa_elem = cfg80211_find_elem(WLAN_EID_CHANNEL_SWITCH,
+ ies->data, ies->len);
+ if (csa_elem && csa_elem->datalen == sizeof(*csa))
+ csa = (void *)csa_elem->data;
+
+ ecsa_elem = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ ies->data, ies->len);
+ if (ecsa_elem && ecsa_elem->datalen == sizeof(*ecsa))
+ ecsa = (void *)ecsa_elem->data;
+
+ if (csa && csa->count == 0)
+ csa = NULL;
+ if (csa && !csa->mode && csa->new_ch_num == cur_channel)
+ csa = NULL;
+
+ if (ecsa && ecsa->count == 0)
+ ecsa = NULL;
+ if (ecsa && !ecsa->mode && ecsa->new_ch_num == cur_channel)
+ ecsa = NULL;
+
+ if (ignore_ecsa && ecsa) {
+ sdata_info(sdata,
+ "Ignoring ECSA in probe response - was considered stuck!\n");
+ return csa;
+ }
+
+ return csa || ecsa;
+}
+
+static bool ieee80211_mgd_csa_in_process(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_bss *bss)
+{
+ u8 cur_channel;
+ bool ret;
+
+ cur_channel = ieee80211_frequency_to_channel(bss->channel->center_freq);
+
+ rcu_read_lock();
+ if (ieee80211_mgd_csa_present(sdata,
+ rcu_dereference(bss->beacon_ies),
+ cur_channel, false)) {
+ ret = true;
+ goto out;
+ }
+
+ if (ieee80211_mgd_csa_present(sdata,
+ rcu_dereference(bss->proberesp_ies),
+ cur_channel, bss->proberesp_ecsa_stuck)) {
+ ret = true;
+ goto out;
+ }
+
+ ret = false;
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
/* config hooks */
int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
struct cfg80211_auth_request *req)
@@ -7074,10 +7384,13 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_mgd_auth_data *auth_data;
+ struct ieee80211_link_data *link;
u16 auth_alg;
int err;
bool cont_auth;
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
/* prepare auth data structure */
switch (req->auth_type) {
@@ -7114,6 +7427,11 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
if (ifmgd->assoc_data)
return -EBUSY;
+ if (ieee80211_mgd_csa_in_process(sdata, req->bss)) {
+ sdata_info(sdata, "AP is in CSA process, reject auth\n");
+ return -EINVAL;
+ }
+
auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len +
req->ie_len, GFP_KERNEL);
if (!auth_data)
@@ -7197,8 +7515,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
false);
}
- sdata_info(sdata, "authenticate with %pM\n", auth_data->ap_addr);
-
/* needed for transmitting the auth frame(s) properly */
memcpy(sdata->vif.cfg.ap_addr, auth_data->ap_addr, ETH_ALEN);
@@ -7207,6 +7523,19 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
if (err)
goto err_clear;
+ if (req->link_id > 0)
+ link = sdata_dereference(sdata->link[req->link_id], sdata);
+ else
+ link = sdata_dereference(sdata->link[0], sdata);
+
+ if (WARN_ON(!link)) {
+ err = -ENOLINK;
+ goto err_clear;
+ }
+
+ sdata_info(sdata, "authenticate with %pM (local address=%pM)\n",
+ auth_data->ap_addr, link->conf->addr);
+
err = ieee80211_auth(sdata);
if (err) {
sta_info_destroy_addr(sdata, auth_data->ap_addr);
@@ -7222,9 +7551,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
eth_zero_addr(sdata->deflink.u.mgd.bssid);
ieee80211_link_info_change_notify(sdata, &sdata->deflink,
BSS_CHANGED_BSSID);
- mutex_lock(&sdata->local->mtx);
ieee80211_link_release_channel(&sdata->deflink);
- mutex_unlock(&sdata->local->mtx);
}
ifmgd->auth_data = NULL;
kfree(auth_data);
@@ -7239,7 +7566,7 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
unsigned int link_id)
{
struct ieee80211_local *local = sdata->local;
- const struct cfg80211_bss_ies *beacon_ies;
+ const struct cfg80211_bss_ies *bss_ies;
struct ieee80211_supported_band *sband;
const struct element *ht_elem, *vht_elem;
struct ieee80211_link_data *link;
@@ -7314,32 +7641,37 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
link->conf->eht_puncturing = 0;
rcu_read_lock();
- beacon_ies = rcu_dereference(cbss->beacon_ies);
- if (beacon_ies) {
- const struct ieee80211_eht_operation *eht_oper;
- const struct element *elem;
+ bss_ies = rcu_dereference(cbss->beacon_ies);
+ if (bss_ies) {
u8 dtim_count = 0;
- ieee80211_get_dtim(beacon_ies, &dtim_count,
+ ieee80211_get_dtim(bss_ies, &dtim_count,
&link->u.mgd.dtim_period);
sdata->deflink.u.mgd.have_beacon = true;
if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
- link->conf->sync_tsf = beacon_ies->tsf;
+ link->conf->sync_tsf = bss_ies->tsf;
link->conf->sync_device_ts = bss->device_ts_beacon;
link->conf->sync_dtim_count = dtim_count;
}
+ } else {
+ bss_ies = rcu_dereference(cbss->ies);
+ }
+
+ if (bss_ies) {
+ const struct ieee80211_eht_operation *eht_oper;
+ const struct element *elem;
elem = cfg80211_find_ext_elem(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION,
- beacon_ies->data, beacon_ies->len);
+ bss_ies->data, bss_ies->len);
if (elem && elem->datalen >= 3)
link->conf->profile_periodicity = elem->data[2];
else
link->conf->profile_periodicity = 0;
elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY,
- beacon_ies->data, beacon_ies->len);
+ bss_ies->data, bss_ies->len);
if (elem && elem->datalen >= 11 &&
(elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
link->conf->ema_ap = true;
@@ -7347,7 +7679,7 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
link->conf->ema_ap = false;
elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_OPERATION,
- beacon_ies->data, beacon_ies->len);
+ bss_ies->data, bss_ies->len);
eht_oper = (const void *)(elem->data + 1);
if (elem &&
@@ -7362,7 +7694,8 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
bitmap = get_unaligned_le16(disable_subchannel_bitmap);
if (cfg80211_valid_disable_subchannel_bitmap(&bitmap,
- &link->conf->chandef))
+ &link->conf->chandef) &&
+ !(bitmap && ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING)))
ieee80211_handle_puncturing_bitmap(link,
eht_oper,
bitmap,
@@ -7430,6 +7763,12 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
cbss = req->link_id < 0 ? req->bss : req->links[req->link_id].bss;
+ if (ieee80211_mgd_csa_in_process(sdata, cbss)) {
+ sdata_info(sdata, "AP is in CSA process, reject assoc\n");
+ kfree(assoc_data);
+ return -EINVAL;
+ }
+
rcu_read_lock();
ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID);
if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) {
@@ -7437,6 +7776,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
kfree(assoc_data);
return -EINVAL;
}
+
memcpy(assoc_data->ssid, ssid_elem->data, ssid_elem->datalen);
assoc_data->ssid_len = ssid_elem->datalen;
memcpy(vif_cfg->ssid, assoc_data->ssid, assoc_data->ssid_len);
@@ -7497,7 +7837,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
match = ether_addr_equal(ifmgd->auth_data->ap_addr,
assoc_data->ap_addr) &&
ifmgd->auth_data->link_id == req->link_id;
- ieee80211_destroy_auth_data(sdata, match);
+
+ /* Cleanup is delayed if auth_data matches */
+ if (!match)
+ ieee80211_destroy_auth_data(sdata, false);
}
/* prepare assoc data */
@@ -7678,10 +8021,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
if (i == assoc_data->assoc_link_id)
continue;
/* only calculate the flags, hence link == NULL */
- err = ieee80211_prep_channel(sdata, NULL, assoc_data->link[i].bss,
+ err = ieee80211_prep_channel(sdata, NULL,
+ assoc_data->link[i].bss, true,
&assoc_data->link[i].conn_flags);
- if (err)
+ if (err) {
+ req->links[i].error = err;
goto err_clear;
+ }
}
/* needed for transmitting the assoc frames properly */
@@ -7700,8 +8046,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
rcu_read_lock();
beacon_ies = rcu_dereference(req->bss->beacon_ies);
-
- if (beacon_ies) {
+ if (!beacon_ies) {
/*
* Wait up to one beacon interval ...
* should this be more if we miss one?
@@ -7717,11 +8062,17 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
run_again(sdata, assoc_data->timeout);
+ /* We are associating, clean up auth_data */
+ if (ifmgd->auth_data)
+ ieee80211_destroy_auth_data(sdata, true);
+
return 0;
err_clear:
- eth_zero_addr(sdata->deflink.u.mgd.bssid);
- ieee80211_link_info_change_notify(sdata, &sdata->deflink,
- BSS_CHANGED_BSSID);
+ if (!ifmgd->auth_data) {
+ eth_zero_addr(sdata->deflink.u.mgd.bssid);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_BSSID);
+ }
ifmgd->assoc_data = NULL;
err_free:
kfree(assoc_data);
@@ -7745,6 +8096,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
req->bssid, req->reason_code,
ieee80211_get_reason_code_string(req->reason_code));
+ info.link_id = ifmgd->auth_data->link_id;
drv_mgd_prepare_tx(sdata->local, sdata, &info);
ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid,
IEEE80211_STYPE_DEAUTH,
@@ -7765,6 +8117,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
req->bssid, req->reason_code,
ieee80211_get_reason_code_string(req->reason_code));
+ info.link_id = ifmgd->assoc_data->assoc_link_id;
drv_mgd_prepare_tx(sdata->local, sdata, &info);
ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid,
IEEE80211_STYPE_DEAUTH,
@@ -7774,6 +8127,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ieee80211_report_disconnect(sdata, frame_buf,
sizeof(frame_buf), true,
req->reason_code, false);
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
return 0;
}
@@ -7824,6 +8178,8 @@ void ieee80211_mgd_stop_link(struct ieee80211_link_data *link)
{
wiphy_work_cancel(link->sdata->local->hw.wiphy,
&link->u.mgd.request_smps_work);
+ wiphy_work_cancel(link->sdata->local->hw.wiphy,
+ &link->u.mgd.recalc_smps);
wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy,
&link->u.mgd.chswitch_work);
}
@@ -7837,16 +8193,18 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
* they will not do anything but might not have been
* cancelled when disconnecting.
*/
- cancel_work_sync(&ifmgd->monitor_work);
+ wiphy_work_cancel(sdata->local->hw.wiphy,
+ &ifmgd->monitor_work);
wiphy_work_cancel(sdata->local->hw.wiphy,
&ifmgd->beacon_connection_loss_work);
wiphy_work_cancel(sdata->local->hw.wiphy,
&ifmgd->csa_connection_drop_work);
- cancel_delayed_work_sync(&ifmgd->tdls_peer_del_work);
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+ &ifmgd->tdls_peer_del_work);
wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
&ifmgd->ml_reconf_work);
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work);
- sdata_lock(sdata);
if (ifmgd->assoc_data)
ieee80211_destroy_assoc_data(sdata, ASSOC_TIMEOUT);
if (ifmgd->auth_data)
@@ -7862,7 +8220,6 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
ifmgd->assoc_req_ies_len = 0;
spin_unlock_bh(&ifmgd->teardown_lock);
del_timer_sync(&ifmgd->timer);
- sdata_unlock(sdata);
}
void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif,
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index b44896e14522..449af4e1cca4 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -44,7 +44,6 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_supported_band *sband;
- enum nl80211_bss_scan_width scan_width;
struct sta_info *sta;
int band;
@@ -66,7 +65,6 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata,
return;
}
band = chanctx_conf->def.chan->band;
- scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def);
rcu_read_unlock();
sta = sta_info_alloc(sdata, addr, GFP_ATOMIC);
@@ -75,8 +73,7 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata,
/* Add only mandatory rates for now */
sband = local->hw.wiphy->bands[band];
- sta->sta.deflink.supp_rates[band] =
- ieee80211_mandatory_rates(sband, scan_width);
+ sta->sta.deflink.supp_rates[band] = ieee80211_mandatory_rates(sband);
spin_lock(&ifocb->incomplete_lock);
list_add(&sta->list, &ifocb->incomplete_stations);
@@ -124,11 +121,11 @@ void ieee80211_ocb_work(struct ieee80211_sub_if_data *sdata)
struct ieee80211_if_ocb *ifocb = &sdata->u.ocb;
struct sta_info *sta;
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
if (ifocb->joined != true)
return;
- sdata_lock(sdata);
-
spin_lock_bh(&ifocb->incomplete_lock);
while (!list_empty(&ifocb->incomplete_stations)) {
sta = list_first_entry(&ifocb->incomplete_stations,
@@ -144,8 +141,6 @@ void ieee80211_ocb_work(struct ieee80211_sub_if_data *sdata)
if (test_and_clear_bit(OCB_WORK_HOUSEKEEPING, &ifocb->wrkq_flags))
ieee80211_ocb_housekeeping(sdata);
-
- sdata_unlock(sdata);
}
static void ieee80211_ocb_housekeeping_timer(struct timer_list *t)
@@ -178,6 +173,8 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata,
u64 changed = BSS_CHANGED_OCB | BSS_CHANGED_BSSID;
int err;
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
if (ifocb->joined == true)
return -EINVAL;
@@ -185,10 +182,8 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata,
sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
sdata->deflink.needed_rx_chains = sdata->local->rx_chains;
- mutex_lock(&sdata->local->mtx);
err = ieee80211_link_use_channel(&sdata->deflink, &setup->chandef,
IEEE80211_CHANCTX_SHARED);
- mutex_unlock(&sdata->local->mtx);
if (err)
return err;
@@ -209,6 +204,8 @@ int ieee80211_ocb_leave(struct ieee80211_sub_if_data *sdata)
struct ieee80211_local *local = sdata->local;
struct sta_info *sta;
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
ifocb->joined = false;
sta_info_flush(sdata);
@@ -228,9 +225,7 @@ int ieee80211_ocb_leave(struct ieee80211_sub_if_data *sdata)
clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_OCB);
- mutex_lock(&sdata->local->mtx);
ieee80211_link_release_channel(&sdata->deflink);
- mutex_unlock(&sdata->local->mtx);
skb_queue_purge(&sdata->skb_queue);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index cdf991e74ab9..6c4080202573 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -34,7 +34,7 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
del_timer_sync(&ifmgd->bcn_mon_timer);
del_timer_sync(&ifmgd->conn_mon_timer);
- cancel_work_sync(&local->dynamic_ps_enable_work);
+ wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work);
if (local->hw.conf.flags & IEEE80211_CONF_PS) {
offchannel_ps_enabled = true;
@@ -84,6 +84,8 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (WARN_ON(local->use_chanctx))
return;
@@ -101,7 +103,6 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
false);
ieee80211_flush_queues(local, NULL, false);
- mutex_lock(&local->iflist_mtx);
list_for_each_entry(sdata, &local->interfaces, list) {
if (!ieee80211_sdata_running(sdata))
continue;
@@ -127,17 +128,17 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
sdata->u.mgd.associated)
ieee80211_offchannel_ps_enable(sdata);
}
- mutex_unlock(&local->iflist_mtx);
}
void ieee80211_offchannel_return(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (WARN_ON(local->use_chanctx))
return;
- mutex_lock(&local->iflist_mtx);
list_for_each_entry(sdata, &local->interfaces, list) {
if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE)
continue;
@@ -161,7 +162,6 @@ void ieee80211_offchannel_return(struct ieee80211_local *local)
BSS_CHANGED_BEACON_ENABLED);
}
}
- mutex_unlock(&local->iflist_mtx);
ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL,
@@ -197,7 +197,7 @@ static unsigned long ieee80211_end_finished_rocs(struct ieee80211_local *local,
struct ieee80211_roc_work *roc, *tmp;
long remaining_dur_min = LONG_MAX;
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
long remaining;
@@ -230,7 +230,7 @@ static bool ieee80211_recalc_sw_work(struct ieee80211_local *local,
if (dur == LONG_MAX)
return false;
- mod_delayed_work(local->workqueue, &local->roc_work, dur);
+ wiphy_delayed_work_queue(local->hw.wiphy, &local->roc_work, dur);
return true;
}
@@ -258,13 +258,13 @@ static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
roc->notified = true;
}
-static void ieee80211_hw_roc_start(struct work_struct *work)
+static void ieee80211_hw_roc_start(struct wiphy *wiphy, struct wiphy_work *work)
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, hw_roc_start);
struct ieee80211_roc_work *roc;
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(roc, &local->roc_list, list) {
if (!roc->started)
@@ -273,8 +273,6 @@ static void ieee80211_hw_roc_start(struct work_struct *work)
roc->hw_begun = true;
ieee80211_handle_roc_started(roc, local->hw_roc_start_time);
}
-
- mutex_unlock(&local->mtx);
}
void ieee80211_ready_on_channel(struct ieee80211_hw *hw)
@@ -285,7 +283,7 @@ void ieee80211_ready_on_channel(struct ieee80211_hw *hw)
trace_api_ready_on_channel(local);
- ieee80211_queue_work(hw, &local->hw_roc_start);
+ wiphy_work_queue(hw->wiphy, &local->hw_roc_start);
}
EXPORT_SYMBOL_GPL(ieee80211_ready_on_channel);
@@ -295,7 +293,7 @@ static void _ieee80211_start_next_roc(struct ieee80211_local *local)
enum ieee80211_roc_type type;
u32 min_dur, max_dur;
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (WARN_ON(list_empty(&local->roc_list)))
return;
@@ -338,7 +336,7 @@ static void _ieee80211_start_next_roc(struct ieee80211_local *local)
tmp->started = true;
tmp->abort = true;
}
- ieee80211_queue_work(&local->hw, &local->hw_roc_done);
+ wiphy_work_queue(local->hw.wiphy, &local->hw_roc_done);
return;
}
@@ -368,8 +366,8 @@ static void _ieee80211_start_next_roc(struct ieee80211_local *local)
ieee80211_hw_config(local, 0);
}
- ieee80211_queue_delayed_work(&local->hw, &local->roc_work,
- msecs_to_jiffies(min_dur));
+ wiphy_delayed_work_queue(local->hw.wiphy, &local->roc_work,
+ msecs_to_jiffies(min_dur));
/* tell userspace or send frame(s) */
list_for_each_entry(tmp, &local->roc_list, list) {
@@ -386,7 +384,7 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
{
struct ieee80211_roc_work *roc;
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (list_empty(&local->roc_list)) {
ieee80211_run_deferred_scan(local);
@@ -407,8 +405,8 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
_ieee80211_start_next_roc(local);
} else {
/* delay it a bit */
- ieee80211_queue_delayed_work(&local->hw, &local->roc_work,
- round_jiffies_relative(HZ/2));
+ wiphy_delayed_work_queue(local->hw.wiphy, &local->roc_work,
+ round_jiffies_relative(HZ / 2));
}
}
@@ -417,7 +415,7 @@ static void __ieee80211_roc_work(struct ieee80211_local *local)
struct ieee80211_roc_work *roc;
bool on_channel;
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (WARN_ON(local->ops->remain_on_channel))
return;
@@ -451,29 +449,27 @@ static void __ieee80211_roc_work(struct ieee80211_local *local)
}
}
-static void ieee80211_roc_work(struct work_struct *work)
+static void ieee80211_roc_work(struct wiphy *wiphy, struct wiphy_work *work)
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, roc_work.work);
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
__ieee80211_roc_work(local);
- mutex_unlock(&local->mtx);
}
-static void ieee80211_hw_roc_done(struct work_struct *work)
+static void ieee80211_hw_roc_done(struct wiphy *wiphy, struct wiphy_work *work)
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, hw_roc_done);
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
ieee80211_end_finished_rocs(local, jiffies);
/* if there's another roc, start it now */
ieee80211_start_next_roc(local);
-
- mutex_unlock(&local->mtx);
}
void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw)
@@ -482,7 +478,7 @@ void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw)
trace_api_remain_on_channel_expired(local);
- ieee80211_queue_work(hw, &local->hw_roc_done);
+ wiphy_work_queue(hw->wiphy, &local->hw_roc_done);
}
EXPORT_SYMBOL_GPL(ieee80211_remain_on_channel_expired);
@@ -537,7 +533,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
bool queued = false, combine_started = true;
int ret;
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (channel->freq_offset)
/* this may work, but is untested */
@@ -586,8 +582,8 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
/* if not HW assist, just queue & schedule work */
if (!local->ops->remain_on_channel) {
list_add_tail(&roc->list, &local->roc_list);
- ieee80211_queue_delayed_work(&local->hw,
- &local->roc_work, 0);
+ wiphy_delayed_work_queue(local->hw.wiphy,
+ &local->roc_work, 0);
} else {
/* otherwise actually kick it off here
* (for error handling)
@@ -675,15 +671,12 @@ int ieee80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
struct ieee80211_local *local = sdata->local;
- int ret;
- mutex_lock(&local->mtx);
- ret = ieee80211_start_roc_work(local, sdata, chan,
- duration, cookie, NULL,
- IEEE80211_ROC_TYPE_NORMAL);
- mutex_unlock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
- return ret;
+ return ieee80211_start_roc_work(local, sdata, chan,
+ duration, cookie, NULL,
+ IEEE80211_ROC_TYPE_NORMAL);
}
static int ieee80211_cancel_roc(struct ieee80211_local *local,
@@ -692,12 +685,13 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
struct ieee80211_roc_work *roc, *tmp, *found = NULL;
int ret;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!cookie)
return -ENOENT;
- flush_work(&local->hw_roc_start);
+ wiphy_work_flush(local->hw.wiphy, &local->hw_roc_start);
- mutex_lock(&local->mtx);
list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
if (!mgmt_tx && roc->cookie != cookie)
continue;
@@ -709,7 +703,6 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
}
if (!found) {
- mutex_unlock(&local->mtx);
return -ENOENT;
}
@@ -721,10 +714,26 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
if (local->ops->remain_on_channel) {
ret = drv_cancel_remain_on_channel(local, roc->sdata);
if (WARN_ON_ONCE(ret)) {
- mutex_unlock(&local->mtx);
return ret;
}
+ /*
+ * We could be racing against the notification from the driver:
+ * + driver is handling the notification on CPU0
+ * + user space is cancelling the remain on channel and
+ * schedules the hw_roc_done worker.
+ *
+ * Now hw_roc_done might start to run after the next roc will
+ * start and mac80211 will think that this second roc has
+ * ended prematurely.
+ * Cancel the work to make sure that all the pending workers
+ * have completed execution.
+ * Note that this assumes that by the time the driver returns
+ * from drv_cancel_remain_on_channel, it has completed all
+ * the processing of related notifications.
+ */
+ wiphy_work_cancel(local->hw.wiphy, &local->hw_roc_done);
+
/* TODO:
* if multiple items were combined here then we really shouldn't
* cancel them all - we should wait for as much time as needed
@@ -745,11 +754,10 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
} else {
/* go through work struct to return to the operating channel */
found->abort = true;
- mod_delayed_work(local->workqueue, &local->roc_work, 0);
+ wiphy_delayed_work_queue(local->hw.wiphy, &local->roc_work, 0);
}
out_unlock:
- mutex_unlock(&local->mtx);
return 0;
}
@@ -778,6 +786,8 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
int ret;
u8 *data;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (params->dont_wait_for_ack)
flags = IEEE80211_TX_CTL_NO_ACK;
else
@@ -833,13 +843,16 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
break;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
- sdata_lock(sdata);
if (!sdata->u.mgd.associated ||
(params->offchan && params->wait &&
local->ops->remain_on_channel &&
- memcmp(sdata->vif.cfg.ap_addr, mgmt->bssid, ETH_ALEN)))
+ memcmp(sdata->vif.cfg.ap_addr, mgmt->bssid, ETH_ALEN))) {
need_offchan = true;
- sdata_unlock(sdata);
+ } else if (sdata->u.mgd.associated &&
+ ether_addr_equal(sdata->vif.cfg.ap_addr, mgmt->da)) {
+ sta = sta_info_get_bss(sdata, mgmt->da);
+ mlo_sta = sta && sta->sta.mlo;
+ }
break;
case NL80211_IFTYPE_P2P_DEVICE:
need_offchan = true;
@@ -855,8 +868,6 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
if (need_offchan && !params->chan)
return -EINVAL;
- mutex_lock(&local->mtx);
-
/* Check if the operating channel is the requested channel */
if (!params->chan && mlo_sta) {
need_offchan = false;
@@ -980,7 +991,6 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
if (ret)
ieee80211_free_txskb(&local->hw, skb);
out_unlock:
- mutex_unlock(&local->mtx);
return ret;
}
@@ -994,9 +1004,9 @@ int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy,
void ieee80211_roc_setup(struct ieee80211_local *local)
{
- INIT_WORK(&local->hw_roc_start, ieee80211_hw_roc_start);
- INIT_WORK(&local->hw_roc_done, ieee80211_hw_roc_done);
- INIT_DELAYED_WORK(&local->roc_work, ieee80211_roc_work);
+ wiphy_work_init(&local->hw_roc_start, ieee80211_hw_roc_start);
+ wiphy_work_init(&local->hw_roc_done, ieee80211_hw_roc_done);
+ wiphy_delayed_work_init(&local->roc_work, ieee80211_roc_work);
INIT_LIST_HEAD(&local->roc_list);
}
@@ -1006,7 +1016,8 @@ void ieee80211_roc_purge(struct ieee80211_local *local,
struct ieee80211_roc_work *roc, *tmp;
bool work_to_do = false;
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
if (sdata && roc->sdata != sdata)
continue;
@@ -1026,5 +1037,4 @@ void ieee80211_roc_purge(struct ieee80211_local *local,
}
if (work_to_do)
__ieee80211_roc_work(local);
- mutex_unlock(&local->mtx);
}
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 0ccb5701c7f3..c1fa26e09479 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Portions
- * Copyright (C) 2020-2021 Intel Corporation
+ * Copyright (C) 2020-2021, 2023 Intel Corporation
*/
#include <net/mac80211.h>
#include <net/rtnetlink.h>
@@ -40,13 +40,12 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
if (ieee80211_hw_check(hw, AMPDU_AGGREGATION) &&
!(wowlan && wowlan->any)) {
- mutex_lock(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(sta, &local->sta_list, list) {
set_sta_flag(sta, WLAN_STA_BLOCK_BA);
ieee80211_sta_tear_down_BA_sessions(
sta, AGG_STOP_LOCAL_REQUEST);
}
- mutex_unlock(&local->sta_mtx);
}
/* keep sched_scan only in case of 'any' trigger */
@@ -76,7 +75,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
* Note that this particular timer doesn't need to be
* restarted at resume.
*/
- cancel_work_sync(&local->dynamic_ps_enable_work);
+ wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work);
del_timer_sync(&local->dynamic_ps_timer);
local->wowlan = wowlan;
@@ -119,12 +118,11 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
local->quiescing = false;
local->wowlan = false;
if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) {
- mutex_lock(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(sta,
&local->sta_list, list) {
clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
}
- mutex_unlock(&local->sta_mtx);
}
ieee80211_wake_queues_by_reason(hw,
IEEE80211_MAX_QUEUE_MAP,
@@ -161,7 +159,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
break;
}
- flush_delayed_work(&sdata->dec_tailroom_needed_wk);
+ wiphy_delayed_work_flush(local->hw.wiphy,
+ &sdata->dec_tailroom_needed_wk);
drv_remove_interface(local, sdata);
}
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index b34c80522047..6bf3b4444a43 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1725,16 +1725,15 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
mi->band = sband->band;
mi->last_stats_update = jiffies;
- ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1, 0);
- mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1, 0);
+ ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1);
+ mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1);
mi->overhead += ack_dur;
mi->overhead_rtscts = mi->overhead + 2 * ack_dur;
ctl_rate = &sband->bitrates[rate_lowest_index(sband, sta)];
erp = ctl_rate->flags & IEEE80211_RATE_ERP_G;
ack_dur = ieee80211_frame_duration(sband->band, 10,
- ctl_rate->bitrate, erp, 1,
- ieee80211_chandef_get_shift(chandef));
+ ctl_rate->bitrate, erp, 1);
mi->overhead_legacy = ack_dur;
mi->overhead_legacy_rtscts = mi->overhead_legacy + 2 * ack_dur;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e751cda5eef6..0bf72928ccfc 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -19,6 +19,7 @@
#include <linux/export.h>
#include <linux/kcov.h>
#include <linux/bitops.h>
+#include <kunit/visibility.h>
#include <net/mac80211.h>
#include <net/ieee80211_radiotap.h>
#include <asm/unaligned.h>
@@ -566,7 +567,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
if (local->hw.radiotap_timestamp.units_pos >= 0) {
u16 accuracy = 0;
- u8 flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT;
+ u8 flags;
+ u64 ts;
rthdr->it_present |=
cpu_to_le32(BIT(IEEE80211_RADIOTAP_TIMESTAMP));
@@ -575,7 +577,15 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
while ((pos - (u8 *)rthdr) & 7)
pos++;
- put_unaligned_le64(status->device_timestamp, pos);
+ if (status->flag & RX_FLAG_MACTIME_IS_RTAP_TS64) {
+ flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT;
+ ts = status->mactime;
+ } else {
+ flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT;
+ ts = status->device_timestamp;
+ }
+
+ put_unaligned_le64(ts, pos);
pos += sizeof(u64);
if (local->hw.radiotap_timestamp.accuracy >= 0) {
@@ -920,7 +930,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
* Drivers always need to pass packets that are aligned to two-byte boundaries
* to the stack.
*
- * Additionally, should, if possible, align the payload data in a way that
+ * Additionally, they should, if possible, align the payload data in a way that
* guarantees that the contained IP header is aligned to a four-byte
* boundary. In the case of regular frames, this simply means aligning the
* payload to a four-byte boundary (because either the IP header is directly
@@ -936,7 +946,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
* subframe to a length that is a multiple of four.
*
* Padding like Atheros hardware adds which is between the 802.11 header and
- * the payload is not supported, the driver is required to move the 802.11
+ * the payload is not supported; the driver is required to move the 802.11
* header to be directly in front of the payload in that case.
*/
static void ieee80211_verify_alignment(struct ieee80211_rx_data *rx)
@@ -1436,7 +1446,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) {
I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount);
rx->link_sta->rx_stats.num_duplicates++;
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_DUP;
} else if (!(status->flag & RX_FLAG_AMSDU_MORE)) {
rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl;
}
@@ -1490,7 +1500,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
cfg80211_rx_spurious_frame(rx->sdata->dev,
hdr->addr2,
GFP_ATOMIC))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_SPURIOUS;
return RX_DROP_MONITOR;
}
@@ -1883,7 +1893,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
int keyidx;
- ieee80211_rx_result result = RX_DROP_UNUSABLE;
+ ieee80211_rx_result result = RX_DROP_U_DECRYPT_FAIL;
struct ieee80211_key *sta_ptk = NULL;
struct ieee80211_key *ptk_idx = NULL;
int mmie_keyidx = -1;
@@ -1933,7 +1943,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
keyid = ieee80211_get_keyid(rx->skb);
if (unlikely(keyid < 0))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_NO_KEY_ID;
ptk_idx = rcu_dereference(rx->sta->ptk[keyid]);
}
@@ -2038,7 +2048,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
keyidx = ieee80211_get_keyid(rx->skb);
if (unlikely(keyidx < 0))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_NO_KEY_ID;
/* check per-station GTK first, if multicast packet */
if (is_multicast_ether_addr(hdr->addr1) && rx->link_sta)
@@ -2104,7 +2114,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
result = ieee80211_crypto_gcmp_decrypt(rx);
break;
default:
- result = RX_DROP_UNUSABLE;
+ result = RX_DROP_U_BAD_CIPHER;
}
/* the hdr variable is invalid after the decrypt handlers */
@@ -2112,7 +2122,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
/* either the frame has been decrypted or will be dropped */
status->flag |= RX_FLAG_DECRYPTED;
- if (unlikely(ieee80211_is_beacon(fc) && (result & RX_DROP_UNUSABLE) &&
+ if (unlikely(ieee80211_is_beacon(fc) && RX_RES_IS_UNUSABLE(result) &&
rx->sdata->dev))
cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
skb->data, skb->len);
@@ -2249,7 +2259,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
I802_DEBUG_INC(rx->local->rx_handlers_fragments);
if (skb_linearize(rx->skb))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_OOM;
/*
* skb_linearize() might change the skb->data and
@@ -2312,11 +2322,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
if (!requires_sequential_pn(rx, fc))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_NONSEQ_PN;
/* Prevent mixed key and fragment cache attacks */
if (entry->key_color != rx->key->color)
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_BAD_KEY_COLOR;
memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
@@ -2327,7 +2337,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
rpn = rx->ccm_gcm.pn;
if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_REPLAY;
memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
} else if (entry->is_protected &&
(!rx->key ||
@@ -2338,11 +2348,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
* if for TKIP Michael MIC should protect us, and WEP is a
* lost cause anyway.
*/
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_EXPECT_DEFRAG_PROT;
} else if (entry->is_protected && rx->key &&
entry->key_color != rx->key->color &&
(status->flag & RX_FLAG_DECRYPTED)) {
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_BAD_KEY_COLOR;
}
skb_pull(rx->skb, ieee80211_hdrlen(fc));
@@ -2361,7 +2371,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
GFP_ATOMIC))) {
I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag);
__skb_queue_purge(&entry->skb_list);
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_OOM;
}
}
while ((skb = __skb_dequeue(&entry->skb_list))) {
@@ -2405,7 +2415,8 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
return 0;
}
-static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
+VISIBLE_IF_MAC80211_KUNIT ieee80211_rx_result
+ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
{
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
struct ieee80211_mgmt *mgmt = (void *)rx->skb->data;
@@ -2416,12 +2427,12 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
* decrypted them already.
*/
if (status->flag & RX_FLAG_DECRYPTED)
- return 0;
+ return RX_CONTINUE;
/* drop unicast protected dual (that wasn't protected) */
if (ieee80211_is_action(fc) &&
mgmt->u.action.category == WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION)
- return -EACCES;
+ return RX_DROP_U_UNPROT_DUAL;
if (rx->sta && test_sta_flag(rx->sta, WLAN_STA_MFP)) {
if (unlikely(!ieee80211_has_protected(fc) &&
@@ -2433,13 +2444,13 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
* during 4-way-HS (key is installed after HS).
*/
if (!rx->key)
- return 0;
+ return RX_CONTINUE;
cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
rx->skb->data,
rx->skb->len);
}
- return -EACCES;
+ return RX_DROP_U_UNPROT_UCAST_MGMT;
}
/* BIP does not use Protected field, so need to check MMIE */
if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) &&
@@ -2449,14 +2460,14 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
rx->skb->data,
rx->skb->len);
- return -EACCES;
+ return RX_DROP_U_UNPROT_MCAST_MGMT;
}
if (unlikely(ieee80211_is_beacon(fc) && rx->key &&
ieee80211_get_mmie_keyidx(rx->skb) < 0)) {
cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
rx->skb->data,
rx->skb->len);
- return -EACCES;
+ return RX_DROP_U_UNPROT_BEACON;
}
/*
* When using MFP, Action frames are not allowed prior to
@@ -2464,19 +2475,28 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
*/
if (unlikely(ieee80211_is_action(fc) && !rx->key &&
ieee80211_is_robust_mgmt_frame(rx->skb)))
- return -EACCES;
+ return RX_DROP_U_UNPROT_ACTION;
/* drop unicast public action frames when using MPF */
if (is_unicast_ether_addr(mgmt->da) &&
- ieee80211_is_public_action((void *)rx->skb->data,
- rx->skb->len))
- return -EACCES;
+ ieee80211_is_protected_dual_of_public_action(rx->skb))
+ return RX_DROP_U_UNPROT_UNICAST_PUB_ACTION;
}
- return 0;
+ /*
+ * Drop robust action frames before assoc regardless of MFP state,
+ * after assoc we also have decided on MFP or not.
+ */
+ if (ieee80211_is_action(fc) &&
+ ieee80211_is_robust_mgmt_frame(rx->skb) &&
+ (!rx->sta || !test_sta_flag(rx->sta, WLAN_STA_ASSOC)))
+ return RX_DROP_U_UNPROT_ROBUST_ACTION;
+
+ return RX_CONTINUE;
}
+EXPORT_SYMBOL_IF_MAC80211_KUNIT(ieee80211_drop_unencrypted_mgmt);
-static int
+static ieee80211_rx_result
__ieee80211_data_to_8023(struct ieee80211_rx_data *rx, bool *port_control)
{
struct ieee80211_sub_if_data *sdata = rx->sdata;
@@ -2488,32 +2508,31 @@ __ieee80211_data_to_8023(struct ieee80211_rx_data *rx, bool *port_control)
*port_control = false;
if (ieee80211_has_a4(hdr->frame_control) &&
sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta)
- return -1;
+ return RX_DROP_U_UNEXPECTED_VLAN_4ADDR;
if (sdata->vif.type == NL80211_IFTYPE_STATION &&
!!sdata->u.mgd.use_4addr != !!ieee80211_has_a4(hdr->frame_control)) {
-
if (!sdata->u.mgd.use_4addr)
- return -1;
+ return RX_DROP_U_UNEXPECTED_STA_4ADDR;
else if (!ether_addr_equal(hdr->addr1, sdata->vif.addr))
check_port_control = true;
}
if (is_multicast_ether_addr(hdr->addr1) &&
sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta)
- return -1;
+ return RX_DROP_U_UNEXPECTED_VLAN_MCAST;
ret = ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type);
if (ret < 0)
- return ret;
+ return RX_DROP_U_INVALID_8023;
ehdr = (struct ethhdr *) rx->skb->data;
if (ehdr->h_proto == rx->sdata->control_port_protocol)
*port_control = true;
else if (check_port_control)
- return -1;
+ return RX_DROP_U_NOT_PORT_CONTROL;
- return 0;
+ return RX_CONTINUE;
}
bool ieee80211_is_our_addr(struct ieee80211_sub_if_data *sdata,
@@ -2904,10 +2923,10 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta
skb = NULL;
if (skb_cow_head(fwd_skb, hdrlen - sizeof(struct ethhdr)))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_OOM;
if (skb_linearize(fwd_skb))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_OOM;
}
fwd_hdr = skb_push(fwd_skb, hdrlen - sizeof(struct ethhdr));
@@ -3003,7 +3022,7 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
rx->sdata->vif.addr,
rx->sdata->vif.type,
data_offset, true))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_BAD_AMSDU;
if (rx->sta->amsdu_mesh_control < 0) {
s8 valid = -1;
@@ -3078,21 +3097,21 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
switch (rx->sdata->vif.type) {
case NL80211_IFTYPE_AP_VLAN:
if (!rx->sdata->u.vlan.sta)
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_BAD_4ADDR;
break;
case NL80211_IFTYPE_STATION:
if (!rx->sdata->u.mgd.use_4addr)
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_BAD_4ADDR;
break;
case NL80211_IFTYPE_MESH_POINT:
break;
default:
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_BAD_4ADDR;
}
}
if (is_multicast_ether_addr(hdr->addr1) || !rx->sta)
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_BAD_AMSDU;
if (rx->key) {
/*
@@ -3105,7 +3124,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
case WLAN_CIPHER_SUITE_WEP40:
case WLAN_CIPHER_SUITE_WEP104:
case WLAN_CIPHER_SUITE_TKIP:
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_BAD_AMSDU_CIPHER;
default:
break;
}
@@ -3124,7 +3143,6 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
__le16 fc = hdr->frame_control;
ieee80211_rx_result res;
bool port_control;
- int err;
if (unlikely(!ieee80211_is_data(hdr->frame_control)))
return RX_CONTINUE;
@@ -3145,9 +3163,9 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
return RX_DROP_MONITOR;
}
- err = __ieee80211_data_to_8023(rx, &port_control);
- if (unlikely(err))
- return RX_DROP_UNUSABLE;
+ res = __ieee80211_data_to_8023(rx, &port_control);
+ if (unlikely(res != RX_CONTINUE))
+ return res;
res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb);
if (res != RX_CONTINUE)
@@ -3379,7 +3397,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
/* drop too small action frames */
if (ieee80211_is_action(mgmt->frame_control) &&
rx->skb->len < IEEE80211_MIN_ACTION_SIZE)
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_RUNT_ACTION;
if (rx->sdata->vif.type == NL80211_IFTYPE_AP &&
ieee80211_is_beacon(mgmt->frame_control) &&
@@ -3400,10 +3418,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
rx->flags |= IEEE80211_RX_BEACON_REPORTED;
}
- if (ieee80211_drop_unencrypted_mgmt(rx))
- return RX_DROP_UNUSABLE;
-
- return RX_CONTINUE;
+ return ieee80211_drop_unencrypted_mgmt(rx);
}
static bool
@@ -3473,7 +3488,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC &&
mgmt->u.action.category != WLAN_CATEGORY_SELF_PROTECTED &&
mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT)
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_ACTION_UNKNOWN_SRC;
switch (mgmt->u.action.category) {
case WLAN_CATEGORY_HT:
@@ -3878,7 +3893,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
/* do not return rejected action frames */
if (mgmt->u.action.category & 0x80)
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_REJECTED_ACTION_RESPONSE;
nskb = skb_copy_expand(rx->skb, local->hw.extra_tx_headroom, 0,
GFP_ATOMIC);
@@ -4669,7 +4684,7 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata)
struct ieee80211_local *local = sdata->local;
struct sta_info *sta;
- lockdep_assert_held(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(sta, &local->sta_list, list) {
if (sdata != sta->sdata &&
@@ -4683,9 +4698,9 @@ void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
- mutex_lock(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
__ieee80211_check_fast_rx_iface(sdata);
- mutex_unlock(&local->sta_mtx);
}
static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c
index c1f964e9991c..d4ed0c0a335c 100644
--- a/net/mac80211/s1g.c
+++ b/net/mac80211/s1g.c
@@ -2,6 +2,7 @@
/*
* S1G handling
* Copyright(c) 2020 Adapt-IP
+ * Copyright (C) 2023 Intel Corporation
*/
#include <linux/ieee80211.h>
#include <net/mac80211.h>
@@ -153,11 +154,11 @@ void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct sta_info *sta;
- mutex_lock(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
sta = sta_info_get_bss(sdata, mgmt->sa);
if (!sta)
- goto out;
+ return;
switch (mgmt->u.action.u.s1g.action_code) {
case WLAN_S1G_TWT_SETUP:
@@ -169,9 +170,6 @@ void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata,
default:
break;
}
-
-out:
- mutex_unlock(&local->sta_mtx);
}
void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata,
@@ -181,11 +179,11 @@ void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct sta_info *sta;
- mutex_lock(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
sta = sta_info_get_bss(sdata, mgmt->da);
if (!sta)
- goto out;
+ return;
switch (mgmt->u.action.u.s1g.action_code) {
case WLAN_S1G_TWT_SETUP:
@@ -195,7 +193,4 @@ void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata,
default:
break;
}
-
-out:
- mutex_unlock(&local->sta_mtx);
}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 0805aa8603c6..f9d5842601fa 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/if_arp.h>
@@ -187,12 +187,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC))
bss_meta.signal = (rx_status->signal * 100) / local->hw.max_signal;
- bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_20;
- if (rx_status->bw == RATE_INFO_BW_5)
- bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_5;
- else if (rx_status->bw == RATE_INFO_BW_10)
- bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_10;
-
bss_meta.chan = channel;
rcu_read_lock();
@@ -200,11 +194,32 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
if (scan_sdata && scan_sdata->vif.type == NL80211_IFTYPE_STATION &&
scan_sdata->vif.cfg.assoc &&
ieee80211_have_rx_timestamp(rx_status)) {
- bss_meta.parent_tsf =
- ieee80211_calculate_rx_timestamp(local, rx_status,
- len + FCS_LEN, 24);
- ether_addr_copy(bss_meta.parent_bssid,
- scan_sdata->vif.bss_conf.bssid);
+ struct ieee80211_bss_conf *link_conf = NULL;
+
+ /* for an MLO connection, set the TSF data only in case we have
+ * an indication on which of the links the frame was received
+ */
+ if (ieee80211_vif_is_mld(&scan_sdata->vif)) {
+ if (rx_status->link_valid) {
+ s8 link_id = rx_status->link_id;
+
+ link_conf =
+ rcu_dereference(scan_sdata->vif.link_conf[link_id]);
+ }
+ } else {
+ link_conf = &scan_sdata->vif.bss_conf;
+ }
+
+ if (link_conf) {
+ bss_meta.parent_tsf =
+ ieee80211_calculate_rx_timestamp(local,
+ rx_status,
+ len + FCS_LEN,
+ 24);
+
+ ether_addr_copy(bss_meta.parent_bssid,
+ link_conf->bssid);
+ }
}
rcu_read_unlock();
@@ -222,14 +237,18 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
}
static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel *channel,
u32 scan_flags, const u8 *da)
{
if (!sdata)
return false;
- /* accept broadcast for OCE */
- if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP &&
- is_broadcast_ether_addr(da))
+
+ /* accept broadcast on 6 GHz and for OCE */
+ if (is_broadcast_ether_addr(da) &&
+ (channel->band == NL80211_BAND_6GHZ ||
+ scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP))
return true;
+
if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
return true;
return ether_addr_equal(da, sdata->vif.addr);
@@ -274,10 +293,16 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
* the beacon/proberesp rx gives us an opportunity to upgrade
* to active scan
*/
- set_bit(SCAN_BEACON_DONE, &local->scanning);
- ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
+ set_bit(SCAN_BEACON_DONE, &local->scanning);
+ wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0);
}
+ channel = ieee80211_get_channel_khz(local->hw.wiphy,
+ ieee80211_rx_status_to_khz(rx_status));
+
+ if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
+ return;
+
if (ieee80211_is_probe_resp(mgmt->frame_control)) {
struct cfg80211_scan_request *scan_req;
struct cfg80211_sched_scan_request *sched_scan_req;
@@ -295,19 +320,15 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
/* ignore ProbeResp to foreign address or non-bcast (OCE)
* unless scanning with randomised address
*/
- if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags,
+ if (!ieee80211_scan_accept_presp(sdata1, channel,
+ scan_req_flags,
mgmt->da) &&
- !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags,
+ !ieee80211_scan_accept_presp(sdata2, channel,
+ sched_scan_req_flags,
mgmt->da))
return;
}
- channel = ieee80211_get_channel_khz(local->hw.wiphy,
- ieee80211_rx_status_to_khz(rx_status));
-
- if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
- return;
-
bss = ieee80211_bss_info_update(local, rx_status,
mgmt, skb->len,
channel);
@@ -315,22 +336,11 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
ieee80211_rx_bss_put(local, bss);
}
-static void
-ieee80211_prepare_scan_chandef(struct cfg80211_chan_def *chandef,
- enum nl80211_bss_scan_width scan_width)
+static void ieee80211_prepare_scan_chandef(struct cfg80211_chan_def *chandef)
{
memset(chandef, 0, sizeof(*chandef));
- switch (scan_width) {
- case NL80211_BSS_CHAN_WIDTH_5:
- chandef->width = NL80211_CHAN_WIDTH_5;
- break;
- case NL80211_BSS_CHAN_WIDTH_10:
- chandef->width = NL80211_CHAN_WIDTH_10;
- break;
- default:
- chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
- break;
- }
+
+ chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
}
/* return false if no more work */
@@ -344,7 +354,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata)
u32 flags = 0;
req = rcu_dereference_protected(local->scan_req,
- lockdep_is_held(&local->mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (test_bit(SCAN_HW_CANCELLED, &local->scanning))
return false;
@@ -378,7 +388,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata)
}
local->hw_scan_req->req.n_channels = n_chans;
- ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
+ ieee80211_prepare_scan_chandef(&chandef);
if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT)
flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT;
@@ -409,7 +419,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
struct ieee80211_sub_if_data *scan_sdata;
struct ieee80211_sub_if_data *sdata;
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
/*
* It's ok to abort a not-yet-running scan (that
@@ -424,7 +434,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
return;
scan_sdata = rcu_dereference_protected(local->scan_sdata,
- lockdep_is_held(&local->mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (hw_scan && !aborted &&
!ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS) &&
@@ -433,7 +443,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
rc = drv_hw_scan(local,
rcu_dereference_protected(local->scan_sdata,
- lockdep_is_held(&local->mtx)),
+ lockdep_is_held(&local->hw.wiphy->mtx)),
local->hw_scan_req);
if (rc == 0)
@@ -450,7 +460,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
local->hw_scan_req = NULL;
scan_req = rcu_dereference_protected(local->scan_req,
- lockdep_is_held(&local->mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
RCU_INIT_POINTER(local->scan_req, NULL);
RCU_INIT_POINTER(local->scan_sdata, NULL);
@@ -505,7 +515,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw,
memcpy(&local->scan_info, info, sizeof(*info));
- ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
+ wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0);
}
EXPORT_SYMBOL(ieee80211_scan_completed);
@@ -545,8 +555,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local,
/* We need to set power level at maximum rate for scanning. */
ieee80211_hw_config(local, 0);
- ieee80211_queue_delayed_work(&local->hw,
- &local->scan_work, 0);
+ wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0);
return 0;
}
@@ -556,20 +565,18 @@ static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata)
struct ieee80211_local *local = sdata->local;
struct ieee80211_sub_if_data *sdata_iter;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!ieee80211_is_radar_required(local))
return true;
if (!regulatory_pre_cac_allowed(local->hw.wiphy))
return false;
- mutex_lock(&local->iflist_mtx);
list_for_each_entry(sdata_iter, &local->interfaces, list) {
- if (sdata_iter->wdev.cac_started) {
- mutex_unlock(&local->iflist_mtx);
+ if (sdata_iter->wdev.cac_started)
return false;
- }
}
- mutex_unlock(&local->iflist_mtx);
return true;
}
@@ -592,7 +599,7 @@ static bool ieee80211_can_scan(struct ieee80211_local *local,
void ieee80211_run_deferred_scan(struct ieee80211_local *local)
{
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!local->scan_req || local->scanning)
return;
@@ -600,11 +607,11 @@ void ieee80211_run_deferred_scan(struct ieee80211_local *local)
if (!ieee80211_can_scan(local,
rcu_dereference_protected(
local->scan_sdata,
- lockdep_is_held(&local->mtx))))
+ lockdep_is_held(&local->hw.wiphy->mtx))))
return;
- ieee80211_queue_delayed_work(&local->hw, &local->scan_work,
- round_jiffies_relative(0));
+ wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work,
+ round_jiffies_relative(0));
}
static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata,
@@ -645,7 +652,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
u32 flags = 0, tx_flags;
scan_req = rcu_dereference_protected(local->scan_req,
- lockdep_is_held(&local->mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
if (scan_req->no_cck)
@@ -656,7 +663,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
flags |= IEEE80211_PROBE_FLAG_RANDOM_SN;
sdata = rcu_dereference_protected(local->scan_sdata,
- lockdep_is_held(&local->mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
for (i = 0; i < scan_req->n_ssids; i++)
ieee80211_send_scan_probe_req(
@@ -681,11 +688,26 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
bool hw_scan = local->ops->hw_scan;
int rc;
- lockdep_assert_held(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (local->scan_req)
return -EBUSY;
+ /* For an MLO connection, if a link ID was specified, validate that it
+ * is indeed active. If no link ID was specified, select one of the
+ * active links.
+ */
+ if (ieee80211_vif_is_mld(&sdata->vif)) {
+ if (req->tsf_report_link_id >= 0) {
+ if (!(sdata->vif.active_links &
+ BIT(req->tsf_report_link_id)))
+ return -EINVAL;
+ } else {
+ req->tsf_report_link_id =
+ __ffs(sdata->vif.active_links);
+ }
+ }
+
if (!__ieee80211_can_leave_ch(sdata))
return -EBUSY;
@@ -734,6 +756,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
local->hw_scan_req->req.duration = req->duration;
local->hw_scan_req->req.duration_mandatory =
req->duration_mandatory;
+ local->hw_scan_req->req.tsf_report_link_id =
+ req->tsf_report_link_id;
local->hw_scan_band = 0;
local->hw_scan_req->req.n_6ghz_params = req->n_6ghz_params;
@@ -795,8 +819,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
}
/* Now, just wait a bit and we are all done! */
- ieee80211_queue_delayed_work(&local->hw, &local->scan_work,
- next_delay);
+ wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work,
+ next_delay);
return 0;
} else {
/* Do normal software scan */
@@ -861,12 +885,13 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
enum mac80211_scan_state next_scan_state;
struct cfg80211_scan_request *scan_req;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
/*
* check if at least one STA interface is associated,
* check if at least one STA interface has pending tx frames
* and grab the lowest used beacon interval
*/
- mutex_lock(&local->iflist_mtx);
list_for_each_entry(sdata, &local->interfaces, list) {
if (!ieee80211_sdata_running(sdata))
continue;
@@ -882,10 +907,9 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
}
}
}
- mutex_unlock(&local->iflist_mtx);
scan_req = rcu_dereference_protected(local->scan_req,
- lockdep_is_held(&local->mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
next_chan = scan_req->channels[local->scan_channel_idx];
@@ -922,11 +946,10 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
{
int skip;
struct ieee80211_channel *chan;
- enum nl80211_bss_scan_width oper_scan_width;
struct cfg80211_scan_request *scan_req;
scan_req = rcu_dereference_protected(local->scan_req,
- lockdep_is_held(&local->mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
skip = 0;
chan = scan_req->channels[local->scan_channel_idx];
@@ -936,42 +959,21 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
local->scan_chandef.freq1_offset = chan->freq_offset;
local->scan_chandef.center_freq2 = 0;
- /* For scanning on the S1G band, ignore scan_width (which is constant
- * across all channels) for now since channel width is specific to each
- * channel. Detect the required channel width here and likely revisit
- * later. Maybe scan_width could be used to build the channel scan list?
+ /* For scanning on the S1G band, detect the channel width according to
+ * the channel being scanned.
*/
if (chan->band == NL80211_BAND_S1GHZ) {
local->scan_chandef.width = ieee80211_s1g_channel_width(chan);
goto set_channel;
}
- switch (scan_req->scan_width) {
- case NL80211_BSS_CHAN_WIDTH_5:
- local->scan_chandef.width = NL80211_CHAN_WIDTH_5;
- break;
- case NL80211_BSS_CHAN_WIDTH_10:
- local->scan_chandef.width = NL80211_CHAN_WIDTH_10;
- break;
- default:
- case NL80211_BSS_CHAN_WIDTH_20:
- /* If scanning on oper channel, use whatever channel-type
- * is currently in use.
- */
- oper_scan_width = cfg80211_chandef_to_scan_width(
- &local->_oper_chandef);
- if (chan == local->_oper_chandef.chan &&
- oper_scan_width == scan_req->scan_width)
- local->scan_chandef = local->_oper_chandef;
- else
- local->scan_chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
- break;
- case NL80211_BSS_CHAN_WIDTH_1:
- case NL80211_BSS_CHAN_WIDTH_2:
- /* shouldn't get here, S1G handled above */
- WARN_ON(1);
- break;
- }
+ /* If scanning on oper channel, use whatever channel-type
+ * is currently in use.
+ */
+ if (chan == local->_oper_chandef.chan)
+ local->scan_chandef = local->_oper_chandef;
+ else
+ local->scan_chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
set_channel:
if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL))
@@ -1043,7 +1045,7 @@ static void ieee80211_scan_state_resume(struct ieee80211_local *local,
local->next_scan_state = SCAN_SET_CHANNEL;
}
-void ieee80211_scan_work(struct work_struct *work)
+void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work)
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, scan_work.work);
@@ -1052,7 +1054,7 @@ void ieee80211_scan_work(struct work_struct *work)
unsigned long next_delay = 0;
bool aborted;
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (!ieee80211_can_run_worker(local)) {
aborted = true;
@@ -1060,9 +1062,9 @@ void ieee80211_scan_work(struct work_struct *work)
}
sdata = rcu_dereference_protected(local->scan_sdata,
- lockdep_is_held(&local->mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
scan_req = rcu_dereference_protected(local->scan_req,
- lockdep_is_held(&local->mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
/* When scanning on-channel, the first-callback means completed. */
if (test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning)) {
@@ -1076,7 +1078,7 @@ void ieee80211_scan_work(struct work_struct *work)
}
if (!sdata || !scan_req)
- goto out;
+ return;
if (!local->scanning) {
int rc;
@@ -1085,13 +1087,12 @@ void ieee80211_scan_work(struct work_struct *work)
RCU_INIT_POINTER(local->scan_sdata, NULL);
rc = __ieee80211_start_scan(sdata, scan_req);
- if (rc) {
- /* need to complete scan in cfg80211 */
- rcu_assign_pointer(local->scan_req, scan_req);
- aborted = true;
- goto out_complete;
- } else
- goto out;
+ if (!rc)
+ return;
+ /* need to complete scan in cfg80211 */
+ rcu_assign_pointer(local->scan_req, scan_req);
+ aborted = true;
+ goto out_complete;
}
clear_bit(SCAN_BEACON_WAIT, &local->scanning);
@@ -1137,38 +1138,32 @@ void ieee80211_scan_work(struct work_struct *work)
}
} while (next_delay == 0);
- ieee80211_queue_delayed_work(&local->hw, &local->scan_work, next_delay);
- goto out;
+ wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work,
+ next_delay);
+ return;
out_complete:
__ieee80211_scan_completed(&local->hw, aborted);
-out:
- mutex_unlock(&local->mtx);
}
int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
struct cfg80211_scan_request *req)
{
- int res;
-
- mutex_lock(&sdata->local->mtx);
- res = __ieee80211_start_scan(sdata, req);
- mutex_unlock(&sdata->local->mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
- return res;
+ return __ieee80211_start_scan(sdata, req);
}
int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
const u8 *ssid, u8 ssid_len,
struct ieee80211_channel **channels,
- unsigned int n_channels,
- enum nl80211_bss_scan_width scan_width)
+ unsigned int n_channels)
{
struct ieee80211_local *local = sdata->local;
int ret = -EBUSY, i, n_ch = 0;
enum nl80211_band band;
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
/* busy scanning */
if (local->scan_req)
@@ -1219,13 +1214,11 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
local->int_scan_req->ssids = &local->scan_ssid;
local->int_scan_req->n_ssids = 1;
- local->int_scan_req->scan_width = scan_width;
memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN);
local->int_scan_req->ssids[0].ssid_len = ssid_len;
ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req);
unlock:
- mutex_unlock(&local->mtx);
return ret;
}
@@ -1252,9 +1245,8 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
* after the scan was completed/aborted.
*/
- mutex_lock(&local->mtx);
if (!local->scan_req)
- goto out;
+ return;
/*
* We have a scan running and the driver already reported completion,
@@ -1264,7 +1256,7 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
if (test_bit(SCAN_HW_SCANNING, &local->scanning) &&
test_bit(SCAN_COMPLETED, &local->scanning)) {
set_bit(SCAN_HW_CANCELLED, &local->scanning);
- goto out;
+ return;
}
if (test_bit(SCAN_HW_SCANNING, &local->scanning)) {
@@ -1276,21 +1268,14 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
if (local->ops->cancel_hw_scan)
drv_cancel_hw_scan(local,
rcu_dereference_protected(local->scan_sdata,
- lockdep_is_held(&local->mtx)));
- goto out;
+ lockdep_is_held(&local->hw.wiphy->mtx)));
+ return;
}
- /*
- * If the work is currently running, it must be blocked on
- * the mutex, but we'll set scan_sdata = NULL and it'll
- * simply exit once it acquires the mutex.
- */
- cancel_delayed_work(&local->scan_work);
+ wiphy_delayed_work_cancel(local->hw.wiphy, &local->scan_work);
/* and clean up */
memset(&local->scan_info, 0, sizeof(local->scan_info));
__ieee80211_scan_completed(&local->hw, true);
-out:
- mutex_unlock(&local->mtx);
}
int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
@@ -1305,12 +1290,12 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
u8 *ie;
u32 flags = 0;
- iebufsz = local->scan_ies_len + req->ie_len;
+ lockdep_assert_wiphy(local->hw.wiphy);
- lockdep_assert_held(&local->mtx);
+ iebufsz = local->scan_ies_len + req->ie_len;
if (!local->ops->sched_scan_start)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
for (i = 0; i < NUM_NL80211_BANDS; i++) {
if (local->hw.wiphy->bands[i]) {
@@ -1329,7 +1314,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
goto out;
}
- ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
+ ieee80211_prepare_scan_chandef(&chandef);
ieee80211_build_preq_ies(sdata, ie, num_bands * iebufsz,
&sched_scan_ies, req->ie,
@@ -1358,19 +1343,13 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
struct cfg80211_sched_scan_request *req)
{
struct ieee80211_local *local = sdata->local;
- int ret;
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
- if (rcu_access_pointer(local->sched_scan_sdata)) {
- mutex_unlock(&local->mtx);
+ if (rcu_access_pointer(local->sched_scan_sdata))
return -EBUSY;
- }
-
- ret = __ieee80211_request_sched_scan_start(sdata, req);
- mutex_unlock(&local->mtx);
- return ret;
+ return __ieee80211_request_sched_scan_start(sdata, req);
}
int ieee80211_request_sched_scan_stop(struct ieee80211_local *local)
@@ -1378,25 +1357,21 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_local *local)
struct ieee80211_sub_if_data *sched_scan_sdata;
int ret = -ENOENT;
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
- if (!local->ops->sched_scan_stop) {
- ret = -ENOTSUPP;
- goto out;
- }
+ if (!local->ops->sched_scan_stop)
+ return -EOPNOTSUPP;
/* We don't want to restart sched scan anymore. */
RCU_INIT_POINTER(local->sched_scan_req, NULL);
sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata,
- lockdep_is_held(&local->mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (sched_scan_sdata) {
ret = drv_sched_scan_stop(local, sched_scan_sdata);
if (!ret)
RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
}
-out:
- mutex_unlock(&local->mtx);
return ret;
}
@@ -1413,24 +1388,21 @@ EXPORT_SYMBOL(ieee80211_sched_scan_results);
void ieee80211_sched_scan_end(struct ieee80211_local *local)
{
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
- if (!rcu_access_pointer(local->sched_scan_sdata)) {
- mutex_unlock(&local->mtx);
+ if (!rcu_access_pointer(local->sched_scan_sdata))
return;
- }
RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
/* If sched scan was aborted by the driver. */
RCU_INIT_POINTER(local->sched_scan_req, NULL);
- mutex_unlock(&local->mtx);
-
- cfg80211_sched_scan_stopped(local->hw.wiphy, 0);
+ cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0);
}
-void ieee80211_sched_scan_stopped_work(struct work_struct *work)
+void ieee80211_sched_scan_stopped_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local,
@@ -1453,6 +1425,6 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw)
if (local->in_reconfig)
return;
- schedule_work(&local->sched_scan_stopped_work);
+ wiphy_work_queue(hw->wiphy, &local->sched_scan_stopped_work);
}
EXPORT_SYMBOL(ieee80211_sched_scan_stopped);
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 871cdac2d0f4..55959b0b24c5 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2008, Intel Corporation
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018, 2020, 2022 Intel Corporation
+ * Copyright (C) 2018, 2020, 2022-2023 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -33,12 +33,14 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
struct cfg80211_chan_def new_vht_chandef = {};
const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
+ const struct ieee80211_bandwidth_indication *bwi;
int secondary_channel_offset = -1;
memset(csa_ie, 0, sizeof(*csa_ie));
sec_chan_offs = elems->sec_chan_offs;
wide_bw_chansw_ie = elems->wide_bw_chansw_ie;
+ bwi = elems->bandwidth_indication;
if (conn_flags & (IEEE80211_CONN_DISABLE_HT |
IEEE80211_CONN_DISABLE_40MHZ)) {
@@ -132,7 +134,14 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
break;
}
- if (wide_bw_chansw_ie) {
+ if (bwi) {
+ /* start with the CSA one */
+ new_vht_chandef = csa_ie->chandef;
+ /* and update the width accordingly */
+ /* FIXME: support 160/320 */
+ ieee80211_chandef_eht_oper(&bwi->info, true, true,
+ &new_vht_chandef);
+ } else if (wide_bw_chansw_ie) {
u8 new_seg1 = wide_bw_chansw_ie->new_center_freq_seg1;
struct ieee80211_vht_operation vht_oper = {
.chan_width =
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 7751f8ba960e..4391d8dd634b 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -40,7 +40,7 @@
* either sta_info_insert() or sta_info_insert_rcu(); only in the latter
* case (which acquires an rcu read section but must not be called from
* within one) will the pointer still be valid after the call. Note that
- * the caller may not do much with the STA info before inserting it, in
+ * the caller may not do much with the STA info before inserting it; in
* particular, it may not start any mesh peer link management or add
* encryption keys.
*
@@ -58,7 +58,7 @@
* In order to remove a STA info structure, various sta_info_destroy_*()
* calls are available.
*
- * There is no concept of ownership on a STA entry, each structure is
+ * There is no concept of ownership on a STA entry; each structure is
* owned by the global hash table/list until it is removed. All users of
* the structure need to be RCU protected so that the structure won't be
* freed before they are done using it.
@@ -88,7 +88,6 @@ static const struct rhashtable_params link_sta_rht_params = {
.max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE,
};
-/* Caller must hold local->sta_mtx */
static int sta_info_hash_del(struct ieee80211_local *local,
struct sta_info *sta)
{
@@ -99,19 +98,36 @@ static int sta_info_hash_del(struct ieee80211_local *local,
static int link_sta_info_hash_add(struct ieee80211_local *local,
struct link_sta_info *link_sta)
{
- lockdep_assert_held(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
return rhltable_insert(&local->link_sta_hash,
- &link_sta->link_hash_node,
- link_sta_rht_params);
+ &link_sta->link_hash_node, link_sta_rht_params);
}
static int link_sta_info_hash_del(struct ieee80211_local *local,
struct link_sta_info *link_sta)
{
- lockdep_assert_held(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
return rhltable_remove(&local->link_sta_hash,
- &link_sta->link_hash_node,
- link_sta_rht_params);
+ &link_sta->link_hash_node, link_sta_rht_params);
+}
+
+void ieee80211_purge_sta_txqs(struct sta_info *sta)
+{
+ struct ieee80211_local *local = sta->sdata->local;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
+ struct txq_info *txqi;
+
+ if (!sta->sta.txq[i])
+ continue;
+
+ txqi = to_txq_info(sta->sta.txq[i]);
+
+ ieee80211_txq_purge(local, txqi);
+ }
}
static void __cleanup_single_sta(struct sta_info *sta)
@@ -140,16 +156,7 @@ static void __cleanup_single_sta(struct sta_info *sta)
atomic_dec(&ps->num_sta_ps);
}
- for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
- struct txq_info *txqi;
-
- if (!sta->sta.txq[i])
- continue;
-
- txqi = to_txq_info(sta->sta.txq[i]);
-
- ieee80211_txq_purge(local, txqi);
- }
+ ieee80211_purge_sta_txqs(sta);
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]);
@@ -331,7 +338,7 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata,
int i = 0;
list_for_each_entry_rcu(sta, &local->sta_list, list,
- lockdep_is_held(&local->sta_mtx)) {
+ lockdep_is_held(&local->hw.wiphy->mtx)) {
if (sdata != sta->sdata)
continue;
if (i < idx) {
@@ -355,10 +362,9 @@ static void sta_remove_link(struct sta_info *sta, unsigned int link_id,
struct sta_link_alloc *alloc = NULL;
struct link_sta_info *link_sta;
- link_sta = rcu_access_pointer(sta->link[link_id]);
- if (link_sta != &sta->deflink)
- lockdep_assert_held(&sta->local->sta_mtx);
+ lockdep_assert_wiphy(sta->local->hw.wiphy);
+ link_sta = rcu_access_pointer(sta->link[link_id]);
if (WARN_ON(!link_sta))
return;
@@ -398,7 +404,10 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
int i;
for (i = 0; i < ARRAY_SIZE(sta->link); i++) {
- if (!(sta->sta.valid_links & BIT(i)))
+ struct link_sta_info *link_sta;
+
+ link_sta = rcu_access_pointer(sta->link[i]);
+ if (!link_sta)
continue;
sta_remove_link(sta, i, false);
@@ -437,7 +446,6 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
kfree(sta);
}
-/* Caller must hold local->sta_mtx */
static int sta_info_hash_add(struct ieee80211_local *local,
struct sta_info *sta)
{
@@ -556,8 +564,7 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata,
spin_lock_init(&sta->lock);
spin_lock_init(&sta->ps_lock);
INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames);
- INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
- mutex_init(&sta->ampdu_mlme.mtx);
+ wiphy_work_init(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
#ifdef CONFIG_MAC80211_MESH
if (ieee80211_vif_is_mesh(&sdata->vif)) {
sta->mesh = kzalloc(sizeof(*sta->mesh), gfp);
@@ -717,6 +724,8 @@ static int sta_info_insert_check(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
/*
* Can't be a WARN_ON because it can be triggered through a race:
* something inserts a STA (on one CPU) without holding the RTNL
@@ -734,7 +743,6 @@ static int sta_info_insert_check(struct sta_info *sta)
* for correctness.
*/
rcu_read_lock();
- lockdep_assert_held(&sdata->local->sta_mtx);
if (ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR) &&
ieee80211_find_sta_by_ifaddr(&sdata->local->hw, sta->addr, NULL)) {
rcu_read_unlock();
@@ -808,11 +816,6 @@ ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
}
}
-/*
- * should be called with sta_mtx locked
- * this function replaces the mutex lock
- * with a RCU lock
- */
static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
{
struct ieee80211_local *local = sta->local;
@@ -820,7 +823,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
struct station_info *sinfo = NULL;
int err = 0;
- lockdep_assert_held(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
/* check if STA exists already */
if (sta_info_get_bss(sdata, sta->sta.addr)) {
@@ -884,7 +887,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
struct link_sta_info *link_sta;
link_sta = rcu_dereference_protected(sta->link[i],
- lockdep_is_held(&local->sta_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (!link_sta)
continue;
@@ -906,11 +909,12 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
/* move reference to rcu-protected */
rcu_read_lock();
- mutex_unlock(&local->sta_mtx);
if (ieee80211_vif_is_mesh(&sdata->vif))
mesh_accept_plinks_update(sdata);
+ ieee80211_check_fast_xmit(sta);
+
return 0;
out_remove:
if (sta->sta.valid_links)
@@ -922,7 +926,6 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
synchronize_net();
out_cleanup:
cleanup_single_sta(sta);
- mutex_unlock(&local->sta_mtx);
kfree(sinfo);
rcu_read_lock();
return err;
@@ -934,13 +937,11 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
int err;
might_sleep();
-
- mutex_lock(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
err = sta_info_insert_check(sta);
if (err) {
sta_info_free(local, sta);
- mutex_unlock(&local->sta_mtx);
rcu_read_lock();
return err;
}
@@ -1219,7 +1220,7 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta)
local = sta->local;
sdata = sta->sdata;
- lockdep_assert_held(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
/*
* Before removing the station from the driver and
@@ -1244,7 +1245,7 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta)
continue;
link_sta = rcu_dereference_protected(sta->link[i],
- lockdep_is_held(&local->sta_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
link_sta_info_hash_del(local, link_sta);
}
@@ -1279,6 +1280,8 @@ static int _sta_info_move_state(struct sta_info *sta,
enum ieee80211_sta_state new_state,
bool recalc)
{
+ struct ieee80211_local *local = sta->local;
+
might_sleep();
if (sta->sta_state == new_state)
@@ -1354,6 +1357,24 @@ static int _sta_info_move_state(struct sta_info *sta,
} else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
ieee80211_vif_dec_num_mcast(sta->sdata);
clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
+
+ /*
+ * If we have encryption offload, flush (station) queues
+ * (after ensuring concurrent TX completed) so we won't
+ * transmit anything later unencrypted if/when keys are
+ * also removed, which might otherwise happen depending
+ * on how the hardware offload works.
+ */
+ if (local->ops->set_key) {
+ synchronize_net();
+ if (local->ops->flush_sta)
+ drv_flush_sta(local, sta->sdata, sta);
+ else
+ ieee80211_flush_queues(local,
+ sta->sdata,
+ false);
+ }
+
ieee80211_clear_fast_xmit(sta);
ieee80211_clear_fast_rx(sta);
}
@@ -1397,26 +1418,28 @@ static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc)
* after _part1 and before _part2!
*/
+ /*
+ * There's a potential race in _part1 where we set WLAN_STA_BLOCK_BA
+ * but someone might have just gotten past a check, and not yet into
+ * queuing the work/creating the data/etc.
+ *
+ * Do another round of destruction so that the worker is certainly
+ * canceled before we later free the station.
+ *
+ * Since this is after synchronize_rcu()/synchronize_net() we're now
+ * certain that nobody can actually hold a reference to the STA and
+ * be calling e.g. ieee80211_start_tx_ba_session().
+ */
+ ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA);
+
might_sleep();
- lockdep_assert_held(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
ret = _sta_info_move_state(sta, IEEE80211_STA_ASSOC, recalc);
WARN_ON_ONCE(ret);
}
- /* Flush queues before removing keys, as that might remove them
- * from hardware, and then depending on the offload method, any
- * frames sitting on hardware queues might be sent out without
- * any encryption at all.
- */
- if (local->ops->set_key) {
- if (local->ops->flush_sta)
- drv_flush_sta(local, sta->sdata, sta);
- else
- ieee80211_flush_queues(local, sta->sdata, false);
- }
-
/* now keys can no longer be reached */
ieee80211_free_sta_keys(local, sta);
@@ -1474,28 +1497,22 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr)
{
struct sta_info *sta;
- int ret;
- mutex_lock(&sdata->local->sta_mtx);
- sta = sta_info_get(sdata, addr);
- ret = __sta_info_destroy(sta);
- mutex_unlock(&sdata->local->sta_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
- return ret;
+ sta = sta_info_get(sdata, addr);
+ return __sta_info_destroy(sta);
}
int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata,
const u8 *addr)
{
struct sta_info *sta;
- int ret;
- mutex_lock(&sdata->local->sta_mtx);
- sta = sta_info_get_bss(sdata, addr);
- ret = __sta_info_destroy(sta);
- mutex_unlock(&sdata->local->sta_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
- return ret;
+ sta = sta_info_get_bss(sdata, addr);
+ return __sta_info_destroy(sta);
}
static void sta_info_cleanup(struct timer_list *t)
@@ -1535,7 +1552,6 @@ int sta_info_init(struct ieee80211_local *local)
}
spin_lock_init(&local->tim_lock);
- mutex_init(&local->sta_mtx);
INIT_LIST_HEAD(&local->sta_list);
timer_setup(&local->sta_cleanup, sta_info_cleanup, 0);
@@ -1558,11 +1574,11 @@ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans)
int ret = 0;
might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
WARN_ON(vlans && sdata->vif.type != NL80211_IFTYPE_AP);
WARN_ON(vlans && !sdata->bss);
- mutex_lock(&local->sta_mtx);
list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
if (sdata == sta->sdata ||
(vlans && sdata->bss == sta->sdata->bss)) {
@@ -1586,7 +1602,6 @@ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans)
if (!support_p2p_ps)
ieee80211_recalc_p2p_go_ps_allowed(sdata);
}
- mutex_unlock(&local->sta_mtx);
return ret;
}
@@ -1597,7 +1612,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct sta_info *sta, *tmp;
- mutex_lock(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
unsigned long last_active = ieee80211_sta_last_active(sta);
@@ -1616,8 +1631,6 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
WARN_ON(__sta_info_destroy(sta));
}
}
-
- mutex_unlock(&local->sta_mtx);
}
struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw,
@@ -2260,7 +2273,6 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
struct ieee80211_local *local = sta->sdata->local;
u8 ac = ieee80211_ac_from_tid(tid);
u32 airtime = 0;
- u32 diff;
if (sta->local->airtime_flags & AIRTIME_USE_TX)
airtime += tx_airtime;
@@ -2271,8 +2283,7 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
sta->airtime[ac].tx_airtime += tx_airtime;
sta->airtime[ac].rx_airtime += rx_airtime;
- diff = (u32)jiffies - sta->airtime[ac].last_active;
- if (diff <= AIRTIME_ACTIVE_DURATION)
+ if (ieee80211_sta_keep_active(sta, ac))
sta->airtime[ac].deficit -= airtime;
spin_unlock_bh(&local->active_txq_lock[ac]);
@@ -2711,7 +2722,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) &&
- !sta->sta.valid_links) {
+ !sta->sta.valid_links &&
+ ieee80211_rate_valid(&sta->deflink.tx_stats.last_rate)) {
sta_set_rate_info_tx(sta, &sta->deflink.tx_stats.last_rate,
&sinfo->txrate);
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
@@ -2872,7 +2884,9 @@ int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id)
struct sta_link_alloc *alloc;
int ret;
- lockdep_assert_held(&sdata->local->sta_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
+ WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED));
/* must represent an MLD from the start */
if (WARN_ON(!sta->sta.valid_links))
@@ -2901,7 +2915,9 @@ int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id)
void ieee80211_sta_free_link(struct sta_info *sta, unsigned int link_id)
{
- lockdep_assert_held(&sta->sdata->local->sta_mtx);
+ lockdep_assert_wiphy(sta->sdata->local->hw.wiphy);
+
+ WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED));
sta_remove_link(sta, link_id, false);
}
@@ -2915,7 +2931,7 @@ int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id)
int ret;
link_sta = rcu_dereference_protected(sta->link[link_id],
- lockdep_is_held(&sdata->local->sta_mtx));
+ lockdep_is_held(&sdata->local->hw.wiphy->mtx));
if (WARN_ON(old_links == new_links || !link_sta))
return -EINVAL;
@@ -2930,7 +2946,7 @@ int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id)
sta->sta.valid_links = new_links;
- if (!test_sta_flag(sta, WLAN_STA_INSERTED))
+ if (WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED)))
goto hash;
ieee80211_recalc_min_chandef(sdata, link_id);
@@ -2959,11 +2975,11 @@ void ieee80211_sta_remove_link(struct sta_info *sta, unsigned int link_id)
struct ieee80211_sub_if_data *sdata = sta->sdata;
u16 old_links = sta->sta.valid_links;
- lockdep_assert_held(&sdata->local->sta_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
sta->sta.valid_links &= ~BIT(link_id);
- if (test_sta_flag(sta, WLAN_STA_INSERTED))
+ if (!WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED)))
drv_change_sta_links(sdata->local, sdata, &sta->sta,
old_links, sta->sta.valid_links);
@@ -2990,7 +3006,7 @@ void ieee80211_sta_set_max_amsdu_subframes(struct sta_info *sta,
WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB) << 1;
if (val)
- sta->sta.max_amsdu_subframes = 4 << val;
+ sta->sta.max_amsdu_subframes = 4 << (4 - val);
}
#ifdef CONFIG_LOCKDEP
@@ -2998,7 +3014,7 @@ bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta)
{
struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
- return lockdep_is_held(&sta->local->sta_mtx);
+ return lockdep_is_held(&sta->local->hw.wiphy->mtx);
}
EXPORT_SYMBOL(lockdep_sta_mutex_held);
#endif
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 195b563132d6..5ef1554f991f 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -3,7 +3,7 @@
* Copyright 2002-2005, Devicescape Software, Inc.
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright(c) 2020-2022 Intel Corporation
+ * Copyright(c) 2020-2023 Intel Corporation
*/
#ifndef STA_INFO_H
@@ -138,7 +138,7 @@ enum ieee80211_agg_stop_reason {
struct airtime_info {
u64 rx_airtime;
u64 tx_airtime;
- u32 last_active;
+ unsigned long last_active;
s32 deficit;
atomic_t aql_tx_pending; /* Estimated airtime for frames pending */
u32 aql_limit_low;
@@ -259,9 +259,6 @@ struct tid_ampdu_rx {
/**
* struct sta_ampdu_mlme - STA aggregation information.
*
- * @mtx: mutex to protect all TX data (except non-NULL assignments
- * to tid_tx[idx], which are protected by the sta spinlock)
- * tid_start_tx is also protected by sta->lock.
* @tid_rx: aggregation info for Rx per TID -- RCU protected
* @tid_rx_token: dialog tokens for valid aggregation sessions
* @tid_rx_timer_expired: bitmap indicating on which TIDs the
@@ -275,13 +272,13 @@ struct tid_ampdu_rx {
* unexpected aggregation related frames outside a session
* @work: work struct for starting/stopping aggregation
* @tid_tx: aggregation info for Tx per TID
- * @tid_start_tx: sessions where start was requested
+ * @tid_start_tx: sessions where start was requested, not just protected
+ * by wiphy mutex but also sta->lock
* @last_addba_req_time: timestamp of the last addBA request.
* @addba_req_num: number of times addBA request has been sent.
* @dialog_token_allocator: dialog token enumerator for each new session;
*/
struct sta_ampdu_mlme {
- struct mutex mtx;
/* rx */
struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS];
u8 tid_rx_token[IEEE80211_NUM_TIDS];
@@ -291,7 +288,7 @@ struct sta_ampdu_mlme {
unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
unsigned long unexpected_agg[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
/* tx */
- struct work_struct work;
+ struct wiphy_work work;
struct tid_ampdu_tx __rcu *tid_tx[IEEE80211_NUM_TIDS];
struct tid_ampdu_tx *tid_start_tx[IEEE80211_NUM_TIDS];
unsigned long last_addba_req_time[IEEE80211_NUM_TIDS];
@@ -618,8 +615,6 @@ struct link_sta_info {
* @sta: station information we share with the driver
* @sta_state: duplicates information about station state (for debug)
* @rcu_head: RCU head used for freeing this station struct
- * @cur_max_bandwidth: maximum bandwidth to use for TX to the station,
- * taken from HT/VHT capabilities or VHT operating mode notification
* @cparams: CoDel parameters for this station.
* @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED)
* @amsdu_mesh_control: track the mesh A-MSDU format used by the peer:
@@ -796,13 +791,10 @@ static inline void sta_info_pre_move_state(struct sta_info *sta,
void ieee80211_assign_tid_tx(struct sta_info *sta, int tid,
struct tid_ampdu_tx *tid_tx);
-static inline struct tid_ampdu_tx *
-rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid)
-{
- return rcu_dereference_protected(sta->ampdu_mlme.tid_tx[tid],
- lockdep_is_held(&sta->lock) ||
- lockdep_is_held(&sta->ampdu_mlme.mtx));
-}
+#define rcu_dereference_protected_tid_tx(sta, tid) \
+ rcu_dereference_protected((sta)->ampdu_mlme.tid_tx[tid], \
+ lockdep_is_held(&(sta)->lock) || \
+ lockdep_is_held(&(sta)->local->hw.wiphy->mtx));
/* Maximum number of frames to buffer per power saving station per AC */
#define STA_MAX_TX_BUFFER 64
@@ -827,7 +819,7 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
const u8 *addr);
-/* user must hold sta_mtx or be in RCU critical section */
+/* user must hold wiphy mutex or be in RCU critical section */
struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local,
const u8 *sta_addr, const u8 *vif_addr);
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 44d83da60aee..1708b33cdc5e 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -184,8 +184,6 @@ static void ieee80211_check_pending_bar(struct sta_info *sta, u8 *addr, u8 tid)
static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
{
struct ieee80211_mgmt *mgmt = (void *) skb->data;
- struct ieee80211_local *local = sta->local;
- struct ieee80211_sub_if_data *sdata = sta->sdata;
if (ieee80211_is_data_qos(mgmt->frame_control)) {
struct ieee80211_hdr *hdr = (void *) skb->data;
@@ -194,39 +192,6 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
ieee80211_check_pending_bar(sta, hdr->addr1, tid);
}
-
- if (ieee80211_is_action(mgmt->frame_control) &&
- !ieee80211_has_protected(mgmt->frame_control) &&
- mgmt->u.action.category == WLAN_CATEGORY_HT &&
- mgmt->u.action.u.ht_smps.action == WLAN_HT_ACTION_SMPS &&
- ieee80211_sdata_running(sdata)) {
- enum ieee80211_smps_mode smps_mode;
-
- switch (mgmt->u.action.u.ht_smps.smps_control) {
- case WLAN_HT_SMPS_CONTROL_DYNAMIC:
- smps_mode = IEEE80211_SMPS_DYNAMIC;
- break;
- case WLAN_HT_SMPS_CONTROL_STATIC:
- smps_mode = IEEE80211_SMPS_STATIC;
- break;
- case WLAN_HT_SMPS_CONTROL_DISABLED:
- default: /* shouldn't happen since we don't send that */
- smps_mode = IEEE80211_SMPS_OFF;
- break;
- }
-
- if (sdata->vif.type == NL80211_IFTYPE_STATION) {
- /*
- * This update looks racy, but isn't -- if we come
- * here we've definitely got a station that we're
- * talking to, and on a managed interface that can
- * only be the AP. And the only other place updating
- * this variable in managed mode is before association.
- */
- sdata->deflink.smps_mode = smps_mode;
- ieee80211_queue_work(&local->hw, &sdata->recalc_smps);
- }
- }
}
static void ieee80211_set_bar_pending(struct sta_info *sta, u8 tid, u16 ssn)
@@ -291,7 +256,7 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info,
static void
ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
struct sk_buff *skb, int retry_count,
- int rtap_len, int shift,
+ int rtap_len,
struct ieee80211_tx_status *status)
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -342,7 +307,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
if (legacy_rate) {
rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_RATE));
- *pos = DIV_ROUND_UP(legacy_rate, 5 * (1 << shift));
+ *pos = DIV_ROUND_UP(legacy_rate, 5);
/* padding for tx flags */
pos += 2;
}
@@ -633,7 +598,7 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
unsigned long flags;
spin_lock_irqsave(&local->ack_status_lock, flags);
- skb = idr_remove(&local->ack_status_frames, info->ack_frame_id);
+ skb = idr_remove(&local->ack_status_frames, info->status_data);
spin_unlock_irqrestore(&local->ack_status_lock, flags);
if (!skb)
@@ -695,6 +660,42 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
}
}
+static void ieee80211_handle_smps_status(struct ieee80211_sub_if_data *sdata,
+ bool acked, u16 status_data)
+{
+ u16 sub_data = u16_get_bits(status_data, IEEE80211_STATUS_SUBDATA_MASK);
+ enum ieee80211_smps_mode smps_mode = sub_data & 3;
+ int link_id = (sub_data >> 2);
+ struct ieee80211_link_data *link;
+
+ if (!sdata || !ieee80211_sdata_running(sdata))
+ return;
+
+ if (!acked)
+ return;
+
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ return;
+
+ if (WARN(link_id >= ARRAY_SIZE(sdata->link),
+ "bad SMPS status link: %d\n", link_id))
+ return;
+
+ link = rcu_dereference(sdata->link[link_id]);
+ if (!link)
+ return;
+
+ /*
+ * This update looks racy, but isn't, the only other place
+ * updating this variable is in managed mode before assoc,
+ * and we have to be associated to have a status from the
+ * action frame TX, since we cannot send it while we're not
+ * associated yet.
+ */
+ link->smps_mode = smps_mode;
+ wiphy_work_queue(sdata->local->hw.wiphy, &link->u.mgd.recalc_smps);
+}
+
static void ieee80211_report_used_skb(struct ieee80211_local *local,
struct sk_buff *skb, bool dropped,
ktime_t ack_hwtstamp)
@@ -730,12 +731,9 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
if (!sdata) {
skb->dev = NULL;
} else if (!dropped) {
- unsigned int hdr_size =
- ieee80211_hdrlen(hdr->frame_control);
-
/* Check to see if packet is a TDLS teardown packet */
if (ieee80211_is_data(hdr->frame_control) &&
- (ieee80211_get_tdls_action(skb, hdr_size) ==
+ (ieee80211_get_tdls_action(skb) ==
WLAN_TDLS_TEARDOWN)) {
ieee80211_tdls_td_tx_handle(local, sdata, skb,
info->flags);
@@ -759,9 +757,24 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
}
rcu_read_unlock();
- } else if (info->ack_frame_id) {
+ } else if (info->status_data_idr) {
ieee80211_report_ack_skb(local, skb, acked, dropped,
ack_hwtstamp);
+ } else if (info->status_data) {
+ struct ieee80211_sub_if_data *sdata;
+
+ rcu_read_lock();
+
+ sdata = ieee80211_sdata_from_skb(local, skb);
+
+ switch (u16_get_bits(info->status_data,
+ IEEE80211_STATUS_TYPE_MASK)) {
+ case IEEE80211_STATUS_TYPE_SMPS:
+ ieee80211_handle_smps_status(sdata, acked,
+ info->status_data);
+ break;
+ }
+ rcu_read_unlock();
}
if (!dropped && skb->destructor) {
@@ -862,7 +875,7 @@ static int ieee80211_tx_get_rates(struct ieee80211_hw *hw,
}
void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
- int retry_count, int shift, bool send_to_cooked,
+ int retry_count, bool send_to_cooked,
struct ieee80211_tx_status *status)
{
struct sk_buff *skb2;
@@ -879,7 +892,7 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
return;
}
ieee80211_add_tx_radiotap_header(local, skb, retry_count,
- rtap_len, shift, status);
+ rtap_len, status);
/* XXX: is this sufficient for BPF? */
skb_reset_mac_header(skb);
@@ -932,14 +945,12 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
bool acked;
bool noack_success;
struct ieee80211_bar *bar;
- int shift = 0;
int tid = IEEE80211_NUM_TIDS;
fc = hdr->frame_control;
if (status->sta) {
sta = container_of(status->sta, struct sta_info, sta);
- shift = ieee80211_vif_get_shift(&sta->sdata->vif);
if (info->flags & IEEE80211_TX_STATUS_EOSP)
clear_sta_flag(sta, WLAN_STA_SP);
@@ -1077,11 +1088,11 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
}
/* send to monitor interfaces */
- ieee80211_tx_monitor(local, skb, retry_count, shift,
+ ieee80211_tx_monitor(local, skb, retry_count,
send_to_cooked, status);
}
-void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
+void ieee80211_tx_status_skb(struct ieee80211_hw *hw, struct sk_buff *skb)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct ieee80211_local *local = hw_to_local(hw);
@@ -1100,7 +1111,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
ieee80211_tx_status_ext(hw, &status);
rcu_read_unlock();
}
-EXPORT_SYMBOL(ieee80211_tx_status);
+EXPORT_SYMBOL(ieee80211_tx_status_skb);
void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
struct ieee80211_tx_status *status)
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index a4af3b7675ef..49730b424141 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -21,7 +21,7 @@
/* give usermode some time for retries in setting up the TDLS session */
#define TDLS_PEER_SETUP_TIMEOUT (15 * HZ)
-void ieee80211_tdls_peer_del_work(struct work_struct *wk)
+void ieee80211_tdls_peer_del_work(struct wiphy *wiphy, struct wiphy_work *wk)
{
struct ieee80211_sub_if_data *sdata;
struct ieee80211_local *local;
@@ -30,13 +30,13 @@ void ieee80211_tdls_peer_del_work(struct work_struct *wk)
u.mgd.tdls_peer_del_work.work);
local = sdata->local;
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!is_zero_ether_addr(sdata->u.mgd.tdls_peer)) {
tdls_dbg(sdata, "TDLS del peer %pM\n", sdata->u.mgd.tdls_peer);
sta_info_destroy_addr(sdata, sdata->u.mgd.tdls_peer);
eth_zero_addr(sdata->u.mgd.tdls_peer);
}
- mutex_unlock(&local->mtx);
}
static void ieee80211_tdls_add_ext_capab(struct ieee80211_link_data *link,
@@ -309,7 +309,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
{
/* IEEE802.11ac-2013 Table E-4 */
- u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 };
+ static const u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 };
struct cfg80211_chan_def uc = sta->tdls_chandef;
enum nl80211_chan_width max_width =
ieee80211_sta_cap_chan_bw(&sta->deflink);
@@ -1001,7 +1001,7 @@ ieee80211_tdls_build_mgmt_packet_data(struct ieee80211_sub_if_data *sdata,
skb);
break;
default:
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
break;
}
@@ -1071,7 +1071,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
/* any value is ok */
break;
default:
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
break;
}
@@ -1177,10 +1177,10 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev,
smps_mode != IEEE80211_SMPS_OFF) {
tdls_dbg(sdata, "Aborting TDLS setup due to SMPS mode %d\n",
smps_mode);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
- mutex_lock(&local->mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
/* we don't support concurrent TDLS peer setups */
if (!is_zero_ether_addr(sdata->u.mgd.tdls_peer) &&
@@ -1208,7 +1208,6 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev,
ieee80211_flush_queues(local, sdata, false);
memcpy(sdata->u.mgd.tdls_peer, peer, ETH_ALEN);
- mutex_unlock(&local->mtx);
/* we cannot take the mutex while preparing the setup packet */
ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer,
@@ -1218,19 +1217,16 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev,
extra_ies, extra_ies_len, 0,
NULL);
if (ret < 0) {
- mutex_lock(&local->mtx);
eth_zero_addr(sdata->u.mgd.tdls_peer);
- mutex_unlock(&local->mtx);
return ret;
}
- ieee80211_queue_delayed_work(&sdata->local->hw,
- &sdata->u.mgd.tdls_peer_del_work,
- TDLS_PEER_SETUP_TIMEOUT);
+ wiphy_delayed_work_queue(sdata->local->hw.wiphy,
+ &sdata->u.mgd.tdls_peer_del_work,
+ TDLS_PEER_SETUP_TIMEOUT);
return 0;
out_unlock:
- mutex_unlock(&local->mtx);
return ret;
}
@@ -1293,7 +1289,7 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
int ret;
if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
/* make sure we are in managed mode, and associated */
if (sdata->vif.type != NL80211_IFTYPE_STATION ||
@@ -1322,7 +1318,7 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
* response frame. It is transmitted directly and not buffered
* by the AP.
*/
- drv_mgd_protect_tdls_discover(sdata->local, sdata);
+ drv_mgd_protect_tdls_discover(sdata->local, sdata, link_id);
fallthrough;
case WLAN_TDLS_SETUP_CONFIRM:
case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
@@ -1354,9 +1350,10 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata,
enum nl80211_chan_width width;
struct ieee80211_supported_band *sband;
- mutex_lock(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (conf) {
width = conf->def.width;
sband = local->hw.wiphy->bands[conf->def.chan->band];
@@ -1384,7 +1381,6 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata,
}
}
- mutex_unlock(&local->chanctx_mtx);
}
static int iee80211_tdls_have_ht_peers(struct ieee80211_sub_if_data *sdata)
@@ -1447,8 +1443,10 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_local *local = sdata->local;
int ret;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
if (sdata->vif.type != NL80211_IFTYPE_STATION)
return -EINVAL;
@@ -1461,41 +1459,32 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
case NL80211_TDLS_SETUP:
case NL80211_TDLS_DISCOVERY_REQ:
/* We don't support in-driver setup/teardown/discovery */
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
/* protect possible bss_conf changes and avoid concurrency in
* ieee80211_bss_info_change_notify()
*/
- sdata_lock(sdata);
- mutex_lock(&local->mtx);
tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer);
switch (oper) {
case NL80211_TDLS_ENABLE_LINK:
if (sdata->vif.bss_conf.csa_active) {
tdls_dbg(sdata, "TDLS: disallow link during CSA\n");
- ret = -EBUSY;
- break;
+ return -EBUSY;
}
- mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, peer);
- if (!sta) {
- mutex_unlock(&local->sta_mtx);
- ret = -ENOLINK;
- break;
- }
+ if (!sta)
+ return -ENOLINK;
iee80211_tdls_recalc_chanctx(sdata, sta);
iee80211_tdls_recalc_ht_protection(sdata, sta);
set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH);
- mutex_unlock(&local->sta_mtx);
WARN_ON_ONCE(is_zero_ether_addr(sdata->u.mgd.tdls_peer) ||
!ether_addr_equal(sdata->u.mgd.tdls_peer, peer));
- ret = 0;
break;
case NL80211_TDLS_DISABLE_LINK:
/*
@@ -1514,29 +1503,26 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
ret = sta_info_destroy_addr(sdata, peer);
- mutex_lock(&local->sta_mtx);
iee80211_tdls_recalc_ht_protection(sdata, NULL);
- mutex_unlock(&local->sta_mtx);
iee80211_tdls_recalc_chanctx(sdata, NULL);
+ if (ret)
+ return ret;
break;
default:
- ret = -ENOTSUPP;
- break;
+ return -EOPNOTSUPP;
}
- if (ret == 0 && ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) {
- cancel_delayed_work(&sdata->u.mgd.tdls_peer_del_work);
+ if (ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) {
+ wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+ &sdata->u.mgd.tdls_peer_del_work);
eth_zero_addr(sdata->u.mgd.tdls_peer);
}
- if (ret == 0)
- wiphy_work_queue(sdata->local->hw.wiphy,
- &sdata->deflink.u.mgd.request_smps_work);
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &sdata->deflink.u.mgd.request_smps_work);
- mutex_unlock(&local->mtx);
- sdata_unlock(sdata);
- return ret;
+ return 0;
}
void ieee80211_tdls_oper_request(struct ieee80211_vif *vif, const u8 *peer,
@@ -1669,11 +1655,12 @@ ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev,
u32 ch_sw_tm_ie;
int ret;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (chandef->chan->freq_offset)
/* this may work, but is untested */
return -EOPNOTSUPP;
- mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, addr);
if (!sta) {
tdls_dbg(sdata,
@@ -1686,7 +1673,7 @@ ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev,
if (!test_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH)) {
tdls_dbg(sdata, "TDLS channel switch unsupported by %pM\n",
addr);
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
goto out;
}
@@ -1703,7 +1690,6 @@ ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev,
set_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL);
out:
- mutex_unlock(&local->sta_mtx);
dev_kfree_skb_any(skb);
return ret;
}
@@ -1717,26 +1703,24 @@ ieee80211_tdls_cancel_channel_switch(struct wiphy *wiphy,
struct ieee80211_local *local = sdata->local;
struct sta_info *sta;
- mutex_lock(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
sta = sta_info_get(sdata, addr);
if (!sta) {
tdls_dbg(sdata,
"Invalid TDLS peer %pM for channel switch cancel\n",
addr);
- goto out;
+ return;
}
if (!test_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL)) {
tdls_dbg(sdata, "TDLS channel switch not initiated by %pM\n",
addr);
- goto out;
+ return;
}
drv_tdls_cancel_channel_switch(local, sdata, &sta->sta);
clear_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL);
-
-out:
- mutex_unlock(&local->sta_mtx);
}
static struct sk_buff *
@@ -1798,6 +1782,8 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata,
struct ieee80211_tdls_ch_sw_params params = {};
int ret;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
params.action_code = WLAN_TDLS_CHANNEL_SWITCH_RESPONSE;
params.timestamp = rx_status->device_timestamp;
@@ -1807,7 +1793,6 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata,
return -EINVAL;
}
- mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, tf->sa);
if (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) {
tdls_dbg(sdata, "TDLS chan switch from non-peer sta %pM\n",
@@ -1870,7 +1855,6 @@ call_drv:
tf->sa, params.status);
out:
- mutex_unlock(&local->sta_mtx);
dev_kfree_skb_any(params.tmpl_skb);
kfree(elems);
return ret;
@@ -1896,6 +1880,8 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
struct ieee80211_tdls_ch_sw_params params = {};
int ret = 0;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
params.action_code = WLAN_TDLS_CHANNEL_SWITCH_REQUEST;
params.timestamp = rx_status->device_timestamp;
@@ -1984,7 +1970,6 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
goto free;
}
- mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, tf->sa);
if (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) {
tdls_dbg(sdata, "TDLS chan switch from non-peer sta %pM\n",
@@ -2008,7 +1993,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
if (!sta->sta.deflink.ht_cap.ht_supported && elems->sec_chan_offs &&
elems->sec_chan_offs->sec_chan_offs) {
tdls_dbg(sdata, "TDLS chan switch - wide chan unsupported\n");
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
goto out;
}
@@ -2031,7 +2016,6 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
tf->sa, params.chandef->chan->center_freq,
params.chandef->width);
out:
- mutex_unlock(&local->sta_mtx);
dev_kfree_skb_any(params.tmpl_skb);
free:
kfree(elems);
diff --git a/net/mac80211/tests/Makefile b/net/mac80211/tests/Makefile
new file mode 100644
index 000000000000..4fdaf3feaca3
--- /dev/null
+++ b/net/mac80211/tests/Makefile
@@ -0,0 +1,3 @@
+mac80211-tests-y += module.o elems.o mfp.o
+
+obj-$(CONFIG_MAC80211_KUNIT_TEST) += mac80211-tests.o
diff --git a/net/mac80211/tests/elems.c b/net/mac80211/tests/elems.c
new file mode 100644
index 000000000000..997d0cd27b2d
--- /dev/null
+++ b/net/mac80211/tests/elems.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KUnit tests for element parsing
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+#include <kunit/test.h>
+#include "../ieee80211_i.h"
+
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+
+static void mle_defrag(struct kunit *test)
+{
+ struct ieee80211_elems_parse_params parse_params = {
+ .link_id = 12,
+ .from_ap = true,
+ };
+ struct ieee802_11_elems *parsed;
+ struct sk_buff *skb;
+ u8 *len_mle, *len_prof;
+ int i;
+
+ skb = alloc_skb(1024, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, skb);
+
+ if (skb_pad(skb, skb_tailroom(skb))) {
+ KUNIT_FAIL(test, "failed to pad skb");
+ return;
+ }
+
+ /* build a multi-link element */
+ skb_put_u8(skb, WLAN_EID_EXTENSION);
+ len_mle = skb_put(skb, 1);
+ skb_put_u8(skb, WLAN_EID_EXT_EHT_MULTI_LINK);
+
+ put_unaligned_le16(IEEE80211_ML_CONTROL_TYPE_BASIC,
+ skb_put(skb, 2));
+ /* struct ieee80211_mle_basic_common_info */
+ skb_put_u8(skb, 7); /* includes len field */
+ skb_put_data(skb, "\x00\x00\x00\x00\x00\x00", ETH_ALEN); /* MLD addr */
+
+ /* with a STA profile inside */
+ skb_put_u8(skb, IEEE80211_MLE_SUBELEM_PER_STA_PROFILE);
+ len_prof = skb_put(skb, 1);
+ put_unaligned_le16(IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE |
+ parse_params.link_id,
+ skb_put(skb, 2));
+ skb_put_u8(skb, 1); /* fake sta_info_len - includes itself */
+ /* put a bunch of useless elements into it */
+ for (i = 0; i < 20; i++) {
+ skb_put_u8(skb, WLAN_EID_SSID);
+ skb_put_u8(skb, 20);
+ skb_put(skb, 20);
+ }
+
+ /* fragment STA profile */
+ ieee80211_fragment_element(skb, len_prof,
+ IEEE80211_MLE_SUBELEM_FRAGMENT);
+ /* fragment MLE */
+ ieee80211_fragment_element(skb, len_mle, WLAN_EID_FRAGMENT);
+
+ parse_params.start = skb->data;
+ parse_params.len = skb->len;
+ parsed = ieee802_11_parse_elems_full(&parse_params);
+ /* should return ERR_PTR or valid, not NULL */
+ KUNIT_EXPECT_NOT_NULL(test, parsed);
+
+ if (IS_ERR_OR_NULL(parsed))
+ goto free_skb;
+
+ KUNIT_EXPECT_NOT_NULL(test, parsed->ml_basic_elem);
+ KUNIT_EXPECT_EQ(test,
+ parsed->ml_basic_len,
+ 2 /* control */ +
+ 7 /* common info */ +
+ 2 /* sta profile element header */ +
+ 3 /* sta profile header */ +
+ 20 * 22 /* sta profile data */ +
+ 2 /* sta profile fragment element */);
+ KUNIT_EXPECT_NOT_NULL(test, parsed->prof);
+ KUNIT_EXPECT_EQ(test,
+ parsed->sta_prof_len,
+ 3 /* sta profile header */ +
+ 20 * 22 /* sta profile data */);
+
+ kfree(parsed);
+free_skb:
+ kfree_skb(skb);
+}
+
+static struct kunit_case element_parsing_test_cases[] = {
+ KUNIT_CASE(mle_defrag),
+ {}
+};
+
+static struct kunit_suite element_parsing = {
+ .name = "mac80211-element-parsing",
+ .test_cases = element_parsing_test_cases,
+};
+
+kunit_test_suite(element_parsing);
diff --git a/net/mac80211/tests/mfp.c b/net/mac80211/tests/mfp.c
new file mode 100644
index 000000000000..a8dc1601da60
--- /dev/null
+++ b/net/mac80211/tests/mfp.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KUnit tests for management frame acceptance
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+#include <kunit/test.h>
+#include <kunit/skbuff.h>
+#include "../ieee80211_i.h"
+#include "../sta_info.h"
+
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+
+static const struct mfp_test_case {
+ const char *desc;
+ bool sta, mfp, decrypted, unicast, assoc;
+ u8 category;
+ u8 stype;
+ u8 action;
+ ieee80211_rx_result result;
+} accept_mfp_cases[] = {
+ /* regular public action */
+ {
+ .desc = "public action: accept unicast from unknown peer",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PUBLIC,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = true,
+ .result = RX_CONTINUE,
+ },
+ {
+ .desc = "public action: accept multicast from unknown peer",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PUBLIC,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = false,
+ .result = RX_CONTINUE,
+ },
+ {
+ .desc = "public action: accept unicast without MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PUBLIC,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = true,
+ .sta = true,
+ .result = RX_CONTINUE,
+ },
+ {
+ .desc = "public action: accept multicast without MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PUBLIC,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = false,
+ .sta = true,
+ .result = RX_CONTINUE,
+ },
+ {
+ .desc = "public action: drop unicast with MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PUBLIC,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = true,
+ .sta = true,
+ .mfp = true,
+ .result = RX_DROP_U_UNPROT_UNICAST_PUB_ACTION,
+ },
+ {
+ .desc = "public action: accept multicast with MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PUBLIC,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = false,
+ .sta = true,
+ .mfp = true,
+ .result = RX_CONTINUE,
+ },
+ /* protected dual of public action */
+ {
+ .desc = "protected dual: drop unicast from unknown peer",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = true,
+ .result = RX_DROP_U_UNPROT_DUAL,
+ },
+ {
+ .desc = "protected dual: drop multicast from unknown peer",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = false,
+ .result = RX_DROP_U_UNPROT_DUAL,
+ },
+ {
+ .desc = "protected dual: drop unicast without MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = true,
+ .sta = true,
+ .result = RX_DROP_U_UNPROT_DUAL,
+ },
+ {
+ .desc = "protected dual: drop multicast without MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = false,
+ .sta = true,
+ .result = RX_DROP_U_UNPROT_DUAL,
+ },
+ {
+ .desc = "protected dual: drop undecrypted unicast with MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = true,
+ .sta = true,
+ .mfp = true,
+ .result = RX_DROP_U_UNPROT_DUAL,
+ },
+ {
+ .desc = "protected dual: drop undecrypted multicast with MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = false,
+ .sta = true,
+ .mfp = true,
+ .result = RX_DROP_U_UNPROT_DUAL,
+ },
+ {
+ .desc = "protected dual: accept unicast with MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .decrypted = true,
+ .unicast = true,
+ .sta = true,
+ .mfp = true,
+ .result = RX_CONTINUE,
+ },
+ {
+ .desc = "protected dual: accept multicast with MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .decrypted = true,
+ .unicast = false,
+ .sta = true,
+ .mfp = true,
+ .result = RX_CONTINUE,
+ },
+ /* deauth/disassoc before keys are set */
+ {
+ .desc = "deauth: accept unicast with MFP but w/o key",
+ .stype = IEEE80211_STYPE_DEAUTH,
+ .sta = true,
+ .mfp = true,
+ .unicast = true,
+ .result = RX_CONTINUE,
+ },
+ {
+ .desc = "disassoc: accept unicast with MFP but w/o key",
+ .stype = IEEE80211_STYPE_DEAUTH,
+ .sta = true,
+ .mfp = true,
+ .unicast = true,
+ .result = RX_CONTINUE,
+ },
+ /* non-public robust action frame ... */
+ {
+ .desc = "BA action: drop unicast before assoc",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_BACK,
+ .unicast = true,
+ .sta = true,
+ .result = RX_DROP_U_UNPROT_ROBUST_ACTION,
+ },
+ {
+ .desc = "BA action: drop unprotected after assoc",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_BACK,
+ .unicast = true,
+ .sta = true,
+ .mfp = true,
+ .result = RX_DROP_U_UNPROT_UCAST_MGMT,
+ },
+ {
+ .desc = "BA action: accept unprotected without MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_BACK,
+ .unicast = true,
+ .sta = true,
+ .assoc = true,
+ .mfp = false,
+ .result = RX_CONTINUE,
+ },
+ {
+ .desc = "BA action: drop unprotected with MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_BACK,
+ .unicast = true,
+ .sta = true,
+ .mfp = true,
+ .result = RX_DROP_U_UNPROT_UCAST_MGMT,
+ },
+};
+
+KUNIT_ARRAY_PARAM_DESC(accept_mfp, accept_mfp_cases, desc);
+
+static void accept_mfp(struct kunit *test)
+{
+ static struct sta_info sta;
+ const struct mfp_test_case *params = test->param_value;
+ struct ieee80211_rx_data rx = {
+ .sta = params->sta ? &sta : NULL,
+ };
+ struct ieee80211_rx_status *status;
+ struct ieee80211_hdr_3addr hdr = {
+ .frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ params->stype),
+ .addr1 = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .addr2 = { 0x12, 0x22, 0x33, 0x44, 0x55, 0x66 },
+ /* A3/BSSID doesn't matter here */
+ };
+
+ memset(&sta, 0, sizeof(sta));
+
+ if (!params->sta) {
+ KUNIT_ASSERT_FALSE(test, params->mfp);
+ KUNIT_ASSERT_FALSE(test, params->decrypted);
+ }
+
+ if (params->mfp)
+ set_sta_flag(&sta, WLAN_STA_MFP);
+
+ if (params->assoc)
+ set_bit(WLAN_STA_ASSOC, &sta._flags);
+
+ rx.skb = kunit_zalloc_skb(test, 128, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, rx.skb);
+ status = IEEE80211_SKB_RXCB(rx.skb);
+
+ if (params->decrypted) {
+ status->flag |= RX_FLAG_DECRYPTED;
+ if (params->unicast)
+ hdr.frame_control |=
+ cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+ }
+
+ if (params->unicast)
+ hdr.addr1[0] = 0x02;
+
+ skb_put_data(rx.skb, &hdr, sizeof(hdr));
+
+ switch (params->stype) {
+ case IEEE80211_STYPE_ACTION:
+ skb_put_u8(rx.skb, params->category);
+ skb_put_u8(rx.skb, params->action);
+ break;
+ case IEEE80211_STYPE_DEAUTH:
+ case IEEE80211_STYPE_DISASSOC: {
+ __le16 reason = cpu_to_le16(WLAN_REASON_UNSPECIFIED);
+
+ skb_put_data(rx.skb, &reason, sizeof(reason));
+ }
+ break;
+ }
+
+ KUNIT_EXPECT_EQ(test,
+ (__force u32)ieee80211_drop_unencrypted_mgmt(&rx),
+ (__force u32)params->result);
+}
+
+static struct kunit_case mfp_test_cases[] = {
+ KUNIT_CASE_PARAM(accept_mfp, accept_mfp_gen_params),
+ {}
+};
+
+static struct kunit_suite mfp = {
+ .name = "mac80211-mfp",
+ .test_cases = mfp_test_cases,
+};
+
+kunit_test_suite(mfp);
diff --git a/net/mac80211/tests/module.c b/net/mac80211/tests/module.c
new file mode 100644
index 000000000000..9d05f2943935
--- /dev/null
+++ b/net/mac80211/tests/module.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This is just module boilerplate for the mac80211 kunit module.
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+#include <linux/module.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("tests for mac80211");
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index b8c53b4a710b..06835ed4c44f 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2512,6 +2512,31 @@ TRACE_EVENT(drv_net_setup_tc,
)
);
+TRACE_EVENT(drv_can_activate_links,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u16 active_links),
+
+ TP_ARGS(local, sdata, active_links),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ __field(u16, active_links)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->active_links = active_links;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT " requested active_links:0x%04x\n",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->active_links
+ )
+);
+
TRACE_EVENT(drv_change_vif_links,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
@@ -2839,23 +2864,26 @@ TRACE_EVENT(api_sta_block_awake,
);
TRACE_EVENT(api_chswitch_done,
- TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success),
+ TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success,
+ unsigned int link_id),
- TP_ARGS(sdata, success),
+ TP_ARGS(sdata, success, link_id),
TP_STRUCT__entry(
VIF_ENTRY
__field(bool, success)
+ __field(unsigned int, link_id)
),
TP_fast_assign(
VIF_ASSIGN;
__entry->success = success;
+ __entry->link_id = link_id;
),
TP_printk(
- VIF_PR_FMT " success=%d",
- VIF_PR_ARG, __entry->success
+ VIF_PR_FMT " success=%d link_id=%d",
+ VIF_PR_ARG, __entry->success, __entry->link_id
)
);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7fe7280e8437..e448ab338448 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -43,7 +43,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
struct sk_buff *skb, int group_addr,
int next_frag_len)
{
- int rate, mrate, erp, dur, i, shift = 0;
+ int rate, mrate, erp, dur, i;
struct ieee80211_rate *txrate;
struct ieee80211_local *local = tx->local;
struct ieee80211_supported_band *sband;
@@ -58,10 +58,8 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
rcu_read_lock();
chanctx_conf = rcu_dereference(tx->sdata->vif.bss_conf.chanctx_conf);
- if (chanctx_conf) {
- shift = ieee80211_chandef_get_shift(&chanctx_conf->def);
+ if (chanctx_conf)
rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
- }
rcu_read_unlock();
/* uh huh? */
@@ -143,7 +141,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
continue;
if (tx->sdata->vif.bss_conf.basic_rates & BIT(i))
- rate = DIV_ROUND_UP(r->bitrate, 1 << shift);
+ rate = r->bitrate;
switch (sband->band) {
case NL80211_BAND_2GHZ:
@@ -173,7 +171,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
if (rate == -1) {
/* No matching basic rate found; use highest suitable mandatory
* PHY rate */
- rate = DIV_ROUND_UP(mrate, 1 << shift);
+ rate = mrate;
}
/* Don't calculate ACKs for QoS Frames with NoAck Policy set */
@@ -185,8 +183,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
* (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up
* to closest integer */
dur = ieee80211_frame_duration(sband->band, 10, rate, erp,
- tx->sdata->vif.bss_conf.use_short_preamble,
- shift);
+ tx->sdata->vif.bss_conf.use_short_preamble);
if (next_frag_len) {
/* Frame is fragmented: duration increases with time needed to
@@ -195,8 +192,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
/* next fragment */
dur += ieee80211_frame_duration(sband->band, next_frag_len,
txrate->bitrate, erp,
- tx->sdata->vif.bss_conf.use_short_preamble,
- shift);
+ tx->sdata->vif.bss_conf.use_short_preamble);
}
return cpu_to_le16(dur);
@@ -266,8 +262,8 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
IEEE80211_QUEUE_STOP_REASON_PS,
false);
ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
- ieee80211_queue_work(&local->hw,
- &local->dynamic_ps_disable_work);
+ wiphy_work_queue(local->hw.wiphy,
+ &local->dynamic_ps_disable_work);
}
/* Don't restart the timer if we're not disassociated */
@@ -665,7 +661,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
}
if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED &&
- !ieee80211_is_deauth(hdr->frame_control)))
+ !ieee80211_is_deauth(hdr->frame_control)) &&
+ tx->skb->protocol != tx->sdata->control_port_protocol)
return TX_DROP;
if (!skip_hw && tx->key &&
@@ -2166,6 +2163,11 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
rate_found = true;
break;
+ case IEEE80211_RADIOTAP_ANTENNA:
+ /* this can appear multiple times, keep a bitmap */
+ info->control.antennas |= BIT(*iterator.this_arg);
+ break;
+
case IEEE80211_RADIOTAP_DATA_RETRIES:
rate_retries = *iterator.this_arg;
break;
@@ -2260,8 +2262,17 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
}
if (rate_flags & IEEE80211_TX_RC_MCS) {
+ /* reset antennas if not enough */
+ if (IEEE80211_HT_MCS_CHAINS(rate) >
+ hweight8(info->control.antennas))
+ info->control.antennas = 0;
+
info->control.rates[0].idx = rate;
} else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) {
+ /* reset antennas if not enough */
+ if (vht_nss > hweight8(info->control.antennas))
+ info->control.antennas = 0;
+
ieee80211_rate_set_vht(info->control.rates, vht_mcs,
vht_nss);
} else if (sband) {
@@ -2855,9 +2866,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
goto free;
}
- if (unlikely(!multicast && ((skb->sk &&
- skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ||
- ctrl_flags & IEEE80211_TX_CTL_REQ_TX_STATUS)))
+ if (unlikely(!multicast &&
+ ((skb->sk &&
+ skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ||
+ ctrl_flags & IEEE80211_TX_CTL_REQ_TX_STATUS)))
info_id = ieee80211_store_ack_skb(local, skb, &info_flags,
cookie);
@@ -2941,7 +2953,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
memset(info, 0, sizeof(*info));
info->flags = info_flags;
- info->ack_frame_id = info_id;
+ if (info_id) {
+ info->status_data = info_id;
+ info->status_data_idr = 1;
+ }
info->band = band;
if (likely(!cookie)) {
@@ -3033,7 +3048,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
sdata->vif.type == NL80211_IFTYPE_STATION)
goto out;
- if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+ if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded)
goto out;
if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
@@ -3085,10 +3100,11 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
/* DA SA BSSID */
build.da_offs = offsetof(struct ieee80211_hdr, addr1);
build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+ rcu_read_lock();
link = rcu_dereference(sdata->link[tdls_link_id]);
- if (WARN_ON_ONCE(!link))
- break;
- memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN);
+ if (!WARN_ON_ONCE(!link))
+ memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN);
+ rcu_read_unlock();
build.hdr_len = 24;
break;
}
@@ -3998,14 +4014,13 @@ ieee80211_txq_set_active(struct txq_info *txqi)
return;
sta = container_of(txqi->txq.sta, struct sta_info, sta);
- sta->airtime[txqi->txq.ac].last_active = (u32)jiffies;
+ sta->airtime[txqi->txq.ac].last_active = jiffies;
}
static bool
ieee80211_txq_keep_active(struct txq_info *txqi)
{
struct sta_info *sta;
- u32 diff;
if (!txqi->txq.sta)
return false;
@@ -4014,9 +4029,7 @@ ieee80211_txq_keep_active(struct txq_info *txqi)
if (ieee80211_sta_deficit(sta, txqi->txq.ac) >= 0)
return false;
- diff = (u32)jiffies - sta->airtime[txqi->txq.ac].last_active;
-
- return diff <= AIRTIME_ACTIVE_DURATION;
+ return ieee80211_sta_keep_active(sta, txqi->txq.ac);
}
struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
@@ -4474,6 +4487,8 @@ static void ieee80211_mlo_multicast_tx(struct net_device *dev,
* @dev: incoming interface
*
* On failure skb will be freed.
+ *
+ * Returns: the netdev TX status (but really only %NETDEV_TX_OK)
*/
netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
struct net_device *dev)
@@ -4638,9 +4653,12 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
}
if (unlikely(skb->sk &&
- skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS))
- info->ack_frame_id = ieee80211_store_ack_skb(local, skb,
- &info->flags, NULL);
+ skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) {
+ info->status_data = ieee80211_store_ack_skb(local, skb,
+ &info->flags, NULL);
+ if (info->status_data)
+ info->status_data_idr = 1;
+ }
dev_sw_netstats_tx_add(dev, skbs, len);
sta->deflink.tx_stats.packets[queue] += skbs;
@@ -5549,7 +5567,6 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
IEEE80211_INCLUDE_ALL_MBSSID_ELEMS,
NULL);
struct sk_buff *copy;
- int shift;
if (!bcn)
return bcn;
@@ -5569,8 +5586,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
if (!copy)
return bcn;
- shift = ieee80211_vif_get_shift(vif);
- ieee80211_tx_monitor(hw_to_local(hw), copy, 1, shift, false, NULL);
+ ieee80211_tx_monitor(hw_to_local(hw), copy, 1, false, NULL);
return bcn;
}
@@ -5920,7 +5936,7 @@ int ieee80211_reserve_tid(struct ieee80211_sta *pubsta, u8 tid)
int ret;
u32 queues;
- lockdep_assert_held(&local->sta_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
/* only some cases are supported right now */
switch (sdata->vif.type) {
@@ -5981,7 +5997,7 @@ void ieee80211_unreserve_tid(struct ieee80211_sta *pubsta, u8 tid)
struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
struct ieee80211_sub_if_data *sdata = sta->sdata;
- lockdep_assert_held(&sdata->local->sta_mtx);
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
/* only some cases are supported right now */
switch (sdata->vif.type) {
@@ -6102,6 +6118,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
u32 flags = 0;
int err;
+ /* mutex lock is only needed for incrementing the cookie counter */
+ lockdep_assert_wiphy(local->hw.wiphy);
+
/* Only accept CONTROL_PORT_PROTOCOL configured in CONNECT/ASSOCIATE
* or Pre-Authentication
*/
@@ -6192,15 +6211,10 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
rcu_read_unlock();
start_xmit:
- /* mutex lock is only needed for incrementing the cookie counter */
- mutex_lock(&local->mtx);
-
local_bh_disable();
__ieee80211_subif_start_xmit(skb, skb->dev, flags, ctrl_flags, cookie);
local_bh_enable();
- mutex_unlock(&local->mtx);
-
return 0;
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 8a6917cf63cf..643c54855be6 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -24,6 +24,7 @@
#include <net/net_namespace.h>
#include <net/cfg80211.h>
#include <net/rtnetlink.h>
+#include <kunit/visibility.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
@@ -109,8 +110,7 @@ void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx)
}
int ieee80211_frame_duration(enum nl80211_band band, size_t len,
- int rate, int erp, int short_preamble,
- int shift)
+ int rate, int erp, int short_preamble)
{
int dur;
@@ -121,9 +121,6 @@ int ieee80211_frame_duration(enum nl80211_band band, size_t len,
*
* rate is in 100 kbps, so divident is multiplied by 10 in the
* DIV_ROUND_UP() operations.
- *
- * shift may be 2 for 5 MHz channels or 1 for 10 MHz channels, and
- * is assumed to be 0 otherwise.
*/
if (band == NL80211_BAND_5GHZ || erp) {
@@ -144,12 +141,6 @@ int ieee80211_frame_duration(enum nl80211_band band, size_t len,
dur += 16; /* IEEE 802.11-2012 18.3.2.4: T_PREAMBLE = 16 usec */
dur += 4; /* IEEE 802.11-2012 18.3.2.4: T_SIGNAL = 4 usec */
- /* IEEE 802.11-2012 18.3.2.4: all values above are:
- * * times 4 for 5 MHz
- * * times 2 for 10 MHz
- */
- dur *= 1 << shift;
-
/* rates should already consider the channel bandwidth,
* don't apply divisor again.
*/
@@ -184,7 +175,7 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
{
struct ieee80211_sub_if_data *sdata;
u16 dur;
- int erp, shift = 0;
+ int erp;
bool short_preamble = false;
erp = 0;
@@ -193,11 +184,10 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
short_preamble = sdata->vif.bss_conf.use_short_preamble;
if (sdata->deflink.operating_11g_mode)
erp = rate->flags & IEEE80211_RATE_ERP_G;
- shift = ieee80211_vif_get_shift(vif);
}
dur = ieee80211_frame_duration(band, frame_len, rate->bitrate, erp,
- short_preamble, shift);
+ short_preamble);
return cpu_to_le16(dur);
}
@@ -211,7 +201,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
struct ieee80211_rate *rate;
struct ieee80211_sub_if_data *sdata;
bool short_preamble;
- int erp, shift = 0, bitrate;
+ int erp, bitrate;
u16 dur;
struct ieee80211_supported_band *sband;
@@ -227,20 +217,19 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
short_preamble = sdata->vif.bss_conf.use_short_preamble;
if (sdata->deflink.operating_11g_mode)
erp = rate->flags & IEEE80211_RATE_ERP_G;
- shift = ieee80211_vif_get_shift(vif);
}
- bitrate = DIV_ROUND_UP(rate->bitrate, 1 << shift);
+ bitrate = rate->bitrate;
/* CTS duration */
dur = ieee80211_frame_duration(sband->band, 10, bitrate,
- erp, short_preamble, shift);
+ erp, short_preamble);
/* Data frame duration */
dur += ieee80211_frame_duration(sband->band, frame_len, bitrate,
- erp, short_preamble, shift);
+ erp, short_preamble);
/* ACK duration */
dur += ieee80211_frame_duration(sband->band, 10, bitrate,
- erp, short_preamble, shift);
+ erp, short_preamble);
return cpu_to_le16(dur);
}
@@ -255,7 +244,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
struct ieee80211_rate *rate;
struct ieee80211_sub_if_data *sdata;
bool short_preamble;
- int erp, shift = 0, bitrate;
+ int erp, bitrate;
u16 dur;
struct ieee80211_supported_band *sband;
@@ -270,18 +259,17 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
short_preamble = sdata->vif.bss_conf.use_short_preamble;
if (sdata->deflink.operating_11g_mode)
erp = rate->flags & IEEE80211_RATE_ERP_G;
- shift = ieee80211_vif_get_shift(vif);
}
- bitrate = DIV_ROUND_UP(rate->bitrate, 1 << shift);
+ bitrate = rate->bitrate;
/* Data frame duration */
dur = ieee80211_frame_duration(sband->band, frame_len, bitrate,
- erp, short_preamble, shift);
+ erp, short_preamble);
if (!(frame_txctl->flags & IEEE80211_TX_CTL_NO_ACK)) {
/* ACK duration */
dur += ieee80211_frame_duration(sband->band, 10, bitrate,
- erp, short_preamble, shift);
+ erp, short_preamble);
}
return cpu_to_le16(dur);
@@ -705,6 +693,19 @@ void __ieee80211_flush_queues(struct ieee80211_local *local,
IEEE80211_QUEUE_STOP_REASON_FLUSH,
false);
+ if (drop) {
+ struct sta_info *sta;
+
+ /* Purge the queues, so the frames on them won't be
+ * sent during __ieee80211_wake_queue()
+ */
+ list_for_each_entry(sta, &local->sta_list, list) {
+ if (sdata != sta->sdata)
+ continue;
+ ieee80211_purge_sta_txqs(sta);
+ }
+ }
+
drv_flush(local, sdata, queues, drop);
ieee80211_wake_queues_by_reason(&local->hw, queues,
@@ -1002,6 +1003,19 @@ ieee80211_parse_extension_element(u32 *crc,
}
}
break;
+ case WLAN_EID_EXT_BANDWIDTH_INDICATION:
+ if (ieee80211_bandwidth_indication_size_ok(data, len))
+ elems->bandwidth_indication = data;
+ calc_crc = true;
+ break;
+ case WLAN_EID_EXT_TID_TO_LINK_MAPPING:
+ calc_crc = true;
+ if (ieee80211_tid_to_link_map_size_ok(data, len) &&
+ elems->ttlm_num < ARRAY_SIZE(elems->ttlm)) {
+ elems->ttlm[elems->ttlm_num] = (void *)data;
+ elems->ttlm_num++;
+ }
+ break;
}
if (crc && calc_crc)
@@ -1017,11 +1031,11 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params,
bool calc_crc = params->filter != 0;
DECLARE_BITMAP(seen_elems, 256);
u32 crc = params->crc;
- const u8 *ie;
bitmap_zero(seen_elems, 256);
for_each_element(elem, params->start, params->len) {
+ const struct element *subelem;
bool elem_parse_failed;
u8 id = elem->id;
u8 elen = elem->datalen;
@@ -1279,15 +1293,27 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params,
}
/*
* This is a bit tricky, but as we only care about
- * the wide bandwidth channel switch element, so
- * just parse it out manually.
+ * a few elements, parse them out manually.
*/
- ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH,
- pos, elen);
- if (ie) {
- if (ie[1] >= sizeof(*elems->wide_bw_chansw_ie))
+ subelem = cfg80211_find_elem(WLAN_EID_WIDE_BW_CHANNEL_SWITCH,
+ pos, elen);
+ if (subelem) {
+ if (subelem->datalen >= sizeof(*elems->wide_bw_chansw_ie))
elems->wide_bw_chansw_ie =
- (void *)(ie + 2);
+ (void *)subelem->data;
+ else
+ elem_parse_failed = true;
+ }
+
+ subelem = cfg80211_find_ext_elem(WLAN_EID_EXT_BANDWIDTH_INDICATION,
+ pos, elen);
+ if (subelem) {
+ const void *edata = subelem->data + 1;
+ u8 edatalen = subelem->datalen - 1;
+
+ if (ieee80211_bandwidth_indication_size_ok(edata,
+ edatalen))
+ elems->bandwidth_indication = edata;
else
elem_parse_failed = true;
}
@@ -1599,7 +1625,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
int nontransmitted_profile_len = 0;
size_t scratch_len = 3 * params->len;
- elems = kzalloc(sizeof(*elems) + scratch_len, GFP_ATOMIC);
+ elems = kzalloc(struct_size(elems, scratch, scratch_len), GFP_ATOMIC);
if (!elems)
return NULL;
elems->ie_start = params->start;
@@ -1654,6 +1680,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
return elems;
}
+EXPORT_SYMBOL_IF_KUNIT(ieee802_11_parse_elems_full);
void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
struct ieee80211_tx_queue_params
@@ -1942,7 +1969,6 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
u8 rates[32];
int num_rates;
int ext_rates_len;
- int shift;
u32 rate_flags;
bool have_80mhz = false;
@@ -1953,7 +1979,6 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
return 0;
rate_flags = ieee80211_chandef_rate_flags(chandef);
- shift = ieee80211_chandef_get_shift(chandef);
/* For direct scan add S1G IE and consider its override bits */
if (band == NL80211_BAND_S1GHZ) {
@@ -1971,8 +1996,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
continue;
rates[num_rates++] =
- (u8) DIV_ROUND_UP(sband->bitrates[i].bitrate,
- (1 << shift) * 5);
+ (u8) DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
}
supp_rates_len = min_t(int, num_rates, 8);
@@ -2265,14 +2289,13 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband;
size_t num_rates;
u32 supp_rates, rate_flags;
- int i, j, shift;
+ int i, j;
sband = sdata->local->hw.wiphy->bands[band];
if (WARN_ON(!sband))
return 1;
rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
- shift = ieee80211_vif_get_shift(&sdata->vif);
num_rates = sband->n_bitrates;
supp_rates = 0;
@@ -2298,8 +2321,7 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
!= rate_flags)
continue;
- brate = DIV_ROUND_UP(sband->bitrates[j].bitrate,
- 1 << shift);
+ brate = sband->bitrates[j].bitrate;
if (brate == own_rate) {
supp_rates |= BIT(j);
@@ -2316,9 +2338,10 @@ void ieee80211_stop_device(struct ieee80211_local *local)
ieee80211_led_radio(local, false);
ieee80211_mod_tpt_led_trig(local, 0, IEEE80211_TPT_LEDTRIG_FL_RADIO);
- cancel_work_sync(&local->reconfig_filter);
+ wiphy_work_cancel(local->hw.wiphy, &local->reconfig_filter);
flush_workqueue(local->workqueue);
+ wiphy_work_flush(local->hw.wiphy, NULL);
drv_stop(local);
}
@@ -2340,8 +2363,8 @@ static void ieee80211_flush_completed_scan(struct ieee80211_local *local,
*/
if (aborted)
set_bit(SCAN_ABORTED, &local->scanning);
- ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
- flush_delayed_work(&local->scan_work);
+ wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0);
+ wiphy_delayed_work_flush(local->hw.wiphy, &local->scan_work);
}
}
@@ -2350,6 +2373,8 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
struct ieee80211_sub_if_data *sdata;
struct ieee80211_chanctx *ctx;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
/*
* We get here if during resume the device can't be restarted properly.
* We might also get here if this happens during HW reset, which is a
@@ -2378,10 +2403,8 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
/* Mark channel contexts as not being in the driver any more to avoid
* removing them from the driver during the shutdown process...
*/
- mutex_lock(&local->chanctx_mtx);
list_for_each_entry(ctx, &local->chanctx_list, list)
ctx->driver_present = false;
- mutex_unlock(&local->chanctx_mtx);
}
static void ieee80211_assign_chanctx(struct ieee80211_local *local,
@@ -2391,17 +2414,17 @@ static void ieee80211_assign_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *ctx;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (!local->use_chanctx)
return;
- mutex_lock(&local->chanctx_mtx);
conf = rcu_dereference_protected(link->conf->chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (conf) {
ctx = container_of(conf, struct ieee80211_chanctx, conf);
drv_assign_vif_chanctx(local, sdata, link->conf, ctx);
}
- mutex_unlock(&local->chanctx_mtx);
}
static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata)
@@ -2409,8 +2432,9 @@ static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata)
struct ieee80211_local *local = sdata->local;
struct sta_info *sta;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
/* add STAs back */
- mutex_lock(&local->sta_mtx);
list_for_each_entry(sta, &local->sta_list, list) {
enum ieee80211_sta_state state;
@@ -2422,7 +2446,6 @@ static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata)
WARN_ON(drv_sta_state(local, sta->sdata, sta, state,
state + 1));
}
- mutex_unlock(&local->sta_mtx);
}
static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata)
@@ -2509,6 +2532,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
bool suspended = local->suspended;
bool in_reconfig = false;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
/* nothing to do if HW shouldn't run */
if (!local->open_count)
goto wake_up;
@@ -2624,12 +2649,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
/* add channel contexts */
if (local->use_chanctx) {
- mutex_lock(&local->chanctx_mtx);
list_for_each_entry(ctx, &local->chanctx_list, list)
if (ctx->replace_state !=
IEEE80211_CHANCTX_REPLACES_OTHER)
WARN_ON(drv_add_chanctx(local, ctx));
- mutex_unlock(&local->chanctx_mtx);
sdata = wiphy_dereference(local->hw.wiphy,
local->monitor_sdata);
@@ -2663,7 +2686,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
if (!ieee80211_sdata_running(sdata))
continue;
- sdata_lock(sdata);
if (ieee80211_vif_is_mld(&sdata->vif)) {
struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS] = {
[0] = &sdata->vif.bss_conf,
@@ -2795,7 +2817,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
case NL80211_IFTYPE_NAN:
res = ieee80211_reconfig_nan(sdata);
if (res < 0) {
- sdata_unlock(sdata);
ieee80211_handle_reconfig_failure(local);
return res;
}
@@ -2813,7 +2834,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
WARN_ON(1);
break;
}
- sdata_unlock(sdata);
if (active_links)
ieee80211_set_active_links(&sdata->vif, active_links);
@@ -2843,7 +2863,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
if (!ieee80211_sdata_running(sdata))
continue;
- sdata_lock(sdata);
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_AP:
@@ -2852,7 +2871,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
default:
break;
}
- sdata_unlock(sdata);
}
/* add back keys */
@@ -2860,11 +2878,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
ieee80211_reenable_keys(sdata);
/* Reconfigure sched scan if it was interrupted by FW restart */
- mutex_lock(&local->mtx);
sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata,
- lockdep_is_held(&local->mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
sched_scan_req = rcu_dereference_protected(local->sched_scan_req,
- lockdep_is_held(&local->mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
if (sched_scan_sdata && sched_scan_req)
/*
* Sched scan stopped, but we don't want to report it. Instead,
@@ -2880,7 +2897,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
RCU_INIT_POINTER(local->sched_scan_req, NULL);
sched_scan_stopped = true;
}
- mutex_unlock(&local->mtx);
if (sched_scan_stopped)
cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0);
@@ -2901,16 +2917,12 @@ int ieee80211_reconfig(struct ieee80211_local *local)
* are active. This is really a workaround though.
*/
if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) {
- mutex_lock(&local->sta_mtx);
-
list_for_each_entry(sta, &local->sta_list, list) {
if (!local->resuming)
ieee80211_sta_tear_down_BA_sessions(
sta, AGG_STOP_LOCAL_REQUEST);
clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
}
-
- mutex_unlock(&local->sta_mtx);
}
/*
@@ -2926,9 +2938,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
barrier();
/* Restart deferred ROCs */
- mutex_lock(&local->mtx);
ieee80211_start_next_roc(local);
- mutex_unlock(&local->mtx);
/* Requeue all works */
list_for_each_entry(sdata, &local->interfaces, list)
@@ -2989,6 +2999,8 @@ static void ieee80211_reconfig_disconnect(struct ieee80211_vif *vif, u8 flag)
sdata = vif_to_sdata(vif);
local = sdata->local;
+ lockdep_assert_wiphy(local->hw.wiphy);
+
if (WARN_ON(flag & IEEE80211_SDATA_DISCONNECT_RESUME &&
!local->resuming))
return;
@@ -3002,10 +3014,8 @@ static void ieee80211_reconfig_disconnect(struct ieee80211_vif *vif, u8 flag)
sdata->flags |= flag;
- mutex_lock(&local->key_mtx);
list_for_each_entry(key, &sdata->key_list, list)
key->flags |= KEY_FLAG_TAINTED;
- mutex_unlock(&local->key_mtx);
}
void ieee80211_hw_restart_disconnect(struct ieee80211_vif *vif)
@@ -3027,10 +3037,10 @@ void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata,
struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_chanctx *chanctx;
- mutex_lock(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
chanctx_conf = rcu_dereference_protected(link->conf->chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
/*
* This function can be called from a work, thus it may be possible
@@ -3039,12 +3049,10 @@ void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata,
* So nothing should be done in such case.
*/
if (!chanctx_conf)
- goto unlock;
+ return;
chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf);
ieee80211_recalc_smps_chanctx(local, chanctx);
- unlock:
- mutex_unlock(&local->chanctx_mtx);
}
void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata,
@@ -3055,7 +3063,7 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata,
struct ieee80211_chanctx *chanctx;
int i;
- mutex_lock(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
for (i = 0; i < ARRAY_SIZE(sdata->vif.link_conf); i++) {
struct ieee80211_bss_conf *bss_conf;
@@ -3071,9 +3079,9 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata,
}
chanctx_conf = rcu_dereference_protected(bss_conf->chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
+ lockdep_is_held(&local->hw.wiphy->mtx));
/*
- * Since we hold the chanctx_mtx (checked above)
+ * Since we hold the wiphy mutex (checked above)
* we can take the chanctx_conf pointer out of the
* RCU critical section, it cannot go away without
* the mutex. Just the way we reached it could - in
@@ -3083,14 +3091,12 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
if (!chanctx_conf)
- goto unlock;
+ return;
chanctx = container_of(chanctx_conf, struct ieee80211_chanctx,
conf);
ieee80211_recalc_chanctx_min_def(local, chanctx, NULL);
}
- unlock:
- mutex_unlock(&local->chanctx_mtx);
}
size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset)
@@ -3778,12 +3784,10 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
return true;
}
-void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation *eht_oper,
+void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info,
bool support_160, bool support_320,
struct cfg80211_chan_def *chandef)
{
- struct ieee80211_eht_operation_info *info = (void *)eht_oper->optional;
-
chandef->center_freq1 =
ieee80211_channel_to_frequency(info->ccfs0,
chandef->chan->band);
@@ -3952,8 +3956,9 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
support_320 =
eht_phy_cap & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ;
- ieee80211_chandef_eht_oper(eht_oper, support_160,
- support_320, &he_chandef);
+ ieee80211_chandef_eht_oper((const void *)eht_oper->optional,
+ support_160, support_320,
+ &he_chandef);
}
if (!cfg80211_chandef_valid(&he_chandef)) {
@@ -4012,7 +4017,6 @@ int ieee80211_parse_bitrates(enum nl80211_chan_width width,
const u8 *srates, int srates_len, u32 *rates)
{
u32 rate_flags = ieee80211_chanwidth_rate_flags(width);
- int shift = ieee80211_chanwidth_get_shift(width);
struct ieee80211_rate *br;
int brate, rate, i, j, count = 0;
@@ -4026,7 +4030,7 @@ int ieee80211_parse_bitrates(enum nl80211_chan_width width,
if ((rate_flags & br->flags) != rate_flags)
continue;
- brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5);
+ brate = DIV_ROUND_UP(br->bitrate, 5);
if (brate == rate) {
*rates |= BIT(j);
count++;
@@ -4043,12 +4047,11 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
- int rate, shift;
+ int rate;
u8 i, rates, *pos;
u32 basic_rates = sdata->vif.bss_conf.basic_rates;
u32 rate_flags;
- shift = ieee80211_vif_get_shift(&sdata->vif);
rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
sband = local->hw.wiphy->bands[band];
rates = 0;
@@ -4073,8 +4076,7 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
if (need_basic && basic_rates & BIT(i))
basic = 0x80;
- rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
- 5 * (1 << shift));
+ rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
*pos++ = basic | (u8) rate;
}
@@ -4087,13 +4089,12 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
- int rate, shift;
+ int rate;
u8 i, exrates, *pos;
u32 basic_rates = sdata->vif.bss_conf.basic_rates;
u32 rate_flags;
rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
- shift = ieee80211_vif_get_shift(&sdata->vif);
sband = local->hw.wiphy->bands[band];
exrates = 0;
@@ -4122,8 +4123,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
continue;
if (need_basic && basic_rates & BIT(i))
basic = 0x80;
- rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
- 5 * (1 << shift));
+ rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5);
*pos++ = basic | (u8) rate;
}
}
@@ -4167,6 +4167,8 @@ u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs)
* This function calculates the RX timestamp at the given MPDU offset, taking
* into account what the RX timestamp was. An offset of 0 will just normalize
* the timestamp to TSF at beginning of MPDU reception.
+ *
+ * Returns: the calculated timestamp
*/
u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
struct ieee80211_rx_status *status,
@@ -4174,6 +4176,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
unsigned int mpdu_offset)
{
u64 ts = status->mactime;
+ bool mactime_plcp_start;
struct rate_info ri;
u16 rate;
u8 n_ltf;
@@ -4181,6 +4184,9 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
if (WARN_ON(!ieee80211_have_rx_timestamp(status)))
return 0;
+ mactime_plcp_start = (status->flag & RX_FLAG_MACTIME) ==
+ RX_FLAG_MACTIME_PLCP_START;
+
memset(&ri, 0, sizeof(ri));
ri.bw = status->bw;
@@ -4195,7 +4201,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
if (status->enc_flags & RX_ENC_FLAG_SHORT_GI)
ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
/* TODO/FIXME: is this right? handle other PPDUs */
- if (status->flag & RX_FLAG_MACTIME_PLCP_START) {
+ if (mactime_plcp_start) {
mpdu_offset += 2;
ts += 36;
}
@@ -4212,7 +4218,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
* See P802.11ax_D6.0, section 27.3.4 for
* VHT PPDU format.
*/
- if (status->flag & RX_FLAG_MACTIME_PLCP_START) {
+ if (mactime_plcp_start) {
mpdu_offset += 2;
ts += 36;
@@ -4236,7 +4242,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
* See P802.11REVmd_D3.0, section 19.3.2 for
* HT PPDU format.
*/
- if (status->flag & RX_FLAG_MACTIME_PLCP_START) {
+ if (mactime_plcp_start) {
mpdu_offset += 2;
if (status->enc_flags & RX_ENC_FLAG_HT_GF)
ts += 24;
@@ -4264,7 +4270,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
* See P802.11REVmd_D3.0, section 21.3.2 for
* VHT PPDU format.
*/
- if (status->flag & RX_FLAG_MACTIME_PLCP_START) {
+ if (mactime_plcp_start) {
mpdu_offset += 2;
ts += 36;
@@ -4282,25 +4288,13 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
fallthrough;
case RX_ENC_LEGACY: {
struct ieee80211_supported_band *sband;
- int shift = 0;
- int bitrate;
-
- switch (status->bw) {
- case RATE_INFO_BW_10:
- shift = 1;
- break;
- case RATE_INFO_BW_5:
- shift = 2;
- break;
- }
sband = local->hw.wiphy->bands[status->band];
- bitrate = sband->bitrates[status->rate_idx].bitrate;
- ri.legacy = DIV_ROUND_UP(bitrate, (1 << shift));
+ ri.legacy = sband->bitrates[status->rate_idx].bitrate;
- if (status->flag & RX_FLAG_MACTIME_PLCP_START) {
+ if (mactime_plcp_start) {
if (status->band == NL80211_BAND_5GHZ) {
- ts += 20 << shift;
+ ts += 20;
mpdu_offset += 2;
} else if (status->enc_flags & RX_ENC_FLAG_SHORTPRE) {
ts += 96;
@@ -4320,7 +4314,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
return 0;
/* rewind from end of MPDU */
- if (status->flag & RX_FLAG_MACTIME_END)
+ if ((status->flag & RX_FLAG_MACTIME) == RX_FLAG_MACTIME_END)
ts -= mpdu_len * 8 * 10 / rate;
ts += mpdu_offset * 8 * 10 / rate;
@@ -4333,16 +4327,15 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local)
struct ieee80211_sub_if_data *sdata;
struct cfg80211_chan_def chandef;
- /* for interface list, to avoid linking iflist_mtx and chanctx_mtx */
lockdep_assert_wiphy(local->hw.wiphy);
- mutex_lock(&local->mtx);
list_for_each_entry(sdata, &local->interfaces, list) {
/* it might be waiting for the local->mtx, but then
* by the time it gets it, sdata->wdev.cac_started
* will no longer be true
*/
- cancel_delayed_work(&sdata->deflink.dfs_cac_timer_work);
+ wiphy_delayed_work_cancel(local->hw.wiphy,
+ &sdata->deflink.dfs_cac_timer_work);
if (sdata->wdev.cac_started) {
chandef = sdata->vif.bss_conf.chandef;
@@ -4353,10 +4346,10 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local)
GFP_KERNEL);
}
}
- mutex_unlock(&local->mtx);
}
-void ieee80211_dfs_radar_detected_work(struct work_struct *work)
+void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, radar_detected_work);
@@ -4364,7 +4357,8 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work)
struct ieee80211_chanctx *ctx;
int num_chanctx = 0;
- mutex_lock(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
+
list_for_each_entry(ctx, &local->chanctx_list, list) {
if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER)
continue;
@@ -4372,11 +4366,8 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work)
num_chanctx++;
chandef = ctx->conf.def;
}
- mutex_unlock(&local->chanctx_mtx);
- wiphy_lock(local->hw.wiphy);
ieee80211_dfs_cac_cancel(local);
- wiphy_unlock(local->hw.wiphy);
if (num_chanctx > 1)
/* XXX: multi-channel is not supported yet */
@@ -4391,7 +4382,7 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw)
trace_api_radar_detected(local);
- schedule_work(&local->radar_detected_work);
+ wiphy_work_queue(hw->wiphy, &local->radar_detected_work);
}
EXPORT_SYMBOL(ieee80211_radar_detected);
@@ -4775,7 +4766,7 @@ static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local,
struct ieee80211_link_data *link;
u8 radar_detect = 0;
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (WARN_ON(ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED))
return 0;
@@ -4816,7 +4807,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
.radar_detect = radar_detect,
};
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
if (WARN_ON(hweight32(radar_detect) > 1))
return -EINVAL;
@@ -4906,7 +4897,7 @@ int ieee80211_max_num_channels(struct ieee80211_local *local)
int err;
struct iface_combination_params params = {0};
- lockdep_assert_held(&local->chanctx_mtx);
+ lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(ctx, &local->chanctx_list, list) {
if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)
@@ -5118,31 +5109,3 @@ u8 *ieee80211_ie_build_eht_cap(u8 *pos,
return pos;
}
-
-void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id)
-{
- unsigned int elem_len;
-
- if (!len_pos)
- return;
-
- elem_len = skb->data + skb->len - len_pos - 1;
-
- while (elem_len > 255) {
- /* this one is 255 */
- *len_pos = 255;
- /* remaining data gets smaller */
- elem_len -= 255;
- /* make space for the fragment ID/len in SKB */
- skb_put(skb, 2);
- /* shift back the remaining data to place fragment ID/len */
- memmove(len_pos + 255 + 3, len_pos + 255 + 1, elem_len);
- /* place the fragment ID */
- len_pos += 255 + 1;
- *len_pos = frag_id;
- /* and point to fragment length to update later */
- len_pos++;
- }
-
- *len_pos = elem_len;
-}
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index c1250aa47808..b3a5c3e96a72 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -4,7 +4,7 @@
*
* Portions of this file
* Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2022 Intel Corporation
+ * Copyright (C) 2018 - 2023 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -116,12 +116,14 @@ void
ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
const struct ieee80211_vht_cap *vht_cap_ie,
+ const struct ieee80211_vht_cap *vht_cap_ie2,
struct link_sta_info *link_sta)
{
struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap;
struct ieee80211_sta_vht_cap own_cap;
u32 cap_info, i;
bool have_80mhz;
+ u32 mpdu_len;
memset(vht_cap, 0, sizeof(*vht_cap));
@@ -318,10 +320,20 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta);
/*
+ * Work around the Cisco 9115 FW 17.3 bug by taking the min of
+ * both reported MPDU lengths.
+ */
+ mpdu_len = vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK;
+ if (vht_cap_ie2)
+ mpdu_len = min_t(u32, mpdu_len,
+ le32_get_bits(vht_cap_ie2->vht_cap_info,
+ IEEE80211_VHT_CAP_MAX_MPDU_MASK));
+
+ /*
* FIXME - should the amsdu len be per link? store per link
* and maintain a minimum?
*/
- switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) {
+ switch (mpdu_len) {
case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454;
break;
diff --git a/net/mac80211/wbrf.c b/net/mac80211/wbrf.c
new file mode 100644
index 000000000000..3a8612309137
--- /dev/null
+++ b/net/mac80211/wbrf.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wifi Band Exclusion Interface for WLAN
+ * Copyright (C) 2023 Advanced Micro Devices
+ *
+ */
+
+#include <linux/acpi_amd_wbrf.h>
+#include <linux/units.h>
+#include <net/cfg80211.h>
+#include "ieee80211_i.h"
+
+void ieee80211_check_wbrf_support(struct ieee80211_local *local)
+{
+ struct wiphy *wiphy = local->hw.wiphy;
+ struct device *dev;
+
+ if (!wiphy)
+ return;
+
+ dev = wiphy->dev.parent;
+ if (!dev)
+ return;
+
+ local->wbrf_supported = acpi_amd_wbrf_supported_producer(dev);
+}
+
+static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 *start, u64 *end)
+{
+ bandwidth *= KHZ_PER_MHZ;
+ center_freq *= KHZ_PER_MHZ;
+
+ *start = center_freq - bandwidth / 2;
+ *end = center_freq + bandwidth / 2;
+
+ /* Frequency in Hz is expected */
+ *start = *start * HZ_PER_KHZ;
+ *end = *end * HZ_PER_KHZ;
+}
+
+static void get_ranges_from_chandef(struct cfg80211_chan_def *chandef,
+ struct wbrf_ranges_in_out *ranges_in)
+{
+ u64 start_freq1, end_freq1;
+ u64 start_freq2, end_freq2;
+ int bandwidth;
+
+ bandwidth = nl80211_chan_width_to_mhz(chandef->width);
+
+ get_chan_freq_boundary(chandef->center_freq1, bandwidth, &start_freq1, &end_freq1);
+
+ ranges_in->band_list[0].start = start_freq1;
+ ranges_in->band_list[0].end = end_freq1;
+ ranges_in->num_of_ranges = 1;
+
+ if (chandef->width == NL80211_CHAN_WIDTH_80P80) {
+ get_chan_freq_boundary(chandef->center_freq2, bandwidth, &start_freq2, &end_freq2);
+
+ ranges_in->band_list[1].start = start_freq2;
+ ranges_in->band_list[1].end = end_freq2;
+ ranges_in->num_of_ranges++;
+ }
+}
+
+void ieee80211_add_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef)
+{
+ struct wbrf_ranges_in_out ranges_in = {0};
+ struct device *dev;
+
+ if (!local->wbrf_supported)
+ return;
+
+ dev = local->hw.wiphy->dev.parent;
+
+ get_ranges_from_chandef(chandef, &ranges_in);
+
+ acpi_amd_wbrf_add_remove(dev, WBRF_RECORD_ADD, &ranges_in);
+}
+
+void ieee80211_remove_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef)
+{
+ struct wbrf_ranges_in_out ranges_in = {0};
+ struct device *dev;
+
+ if (!local->wbrf_supported)
+ return;
+
+ dev = local->hw.wiphy->dev.parent;
+
+ get_ranges_from_chandef(chandef, &ranges_in);
+
+ acpi_amd_wbrf_add_remove(dev, WBRF_RECORD_REMOVE, &ranges_in);
+}
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 9a6e11d7b4db..5c01e121481a 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -3,6 +3,7 @@
* Software WEP encryption implementation
* Copyright 2002, Jouni Malinen <jkmaline@cc.hut.fi>
* Copyright 2003, Instant802 Networks, Inc.
+ * Copyright (C) 2023 Intel Corporation
*/
#include <linux/netdevice.h>
@@ -250,18 +251,18 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx)
if (!(status->flag & RX_FLAG_DECRYPTED)) {
if (skb_linearize(rx->skb))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_OOM;
if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_WEP_DEC_FAIL;
} else if (!(status->flag & RX_FLAG_IV_STRIPPED)) {
if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) +
IEEE80211_WEP_IV_LEN))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_NO_IV;
ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key);
/* remove ICV */
if (!(status->flag & RX_FLAG_ICV_STRIPPED) &&
pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_NO_ICV;
}
return RX_CONTINUE;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 2d8e38b3bcb5..94dae7cb6dbd 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -3,7 +3,7 @@
* Copyright 2002-2004, Instant802 Networks, Inc.
* Copyright 2008, Jouni Malinen <j@w1.fi>
* Copyright (C) 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2020-2022 Intel Corporation
+ * Copyright (C) 2020-2023 Intel Corporation
*/
#include <linux/netdevice.h>
@@ -142,7 +142,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
* group keys and only the AP is sending real multicast
* frames in the BSS.
*/
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_AP_RX_GROUPCAST;
}
if (status->flag & RX_FLAG_MMIC_ERROR)
@@ -150,10 +150,10 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
hdrlen = ieee80211_hdrlen(hdr->frame_control);
if (skb->len < hdrlen + MICHAEL_MIC_LEN)
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_SHORT_MMIC;
if (skb_linearize(rx->skb))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_OOM;
hdr = (void *)skb->data;
data = skb->data + hdrlen;
@@ -188,7 +188,7 @@ mic_fail_no_key:
NL80211_KEYTYPE_PAIRWISE,
rx->key ? rx->key->conf.keyidx : -1,
NULL, GFP_ATOMIC);
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_MMIC_FAIL;
}
static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
@@ -276,11 +276,11 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
if (!rx->sta || skb->len - hdrlen < 12)
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_SHORT_TKIP;
/* it may be possible to optimize this a bit more */
if (skb_linearize(rx->skb))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_OOM;
hdr = (void *)skb->data;
/*
@@ -298,7 +298,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
&rx->tkip.iv32,
&rx->tkip.iv16);
if (res != TKIP_DECRYPT_OK)
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_TKIP_FAIL;
/* Trim ICV */
if (!(status->flag & RX_FLAG_ICV_STRIPPED))
@@ -523,12 +523,12 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
if (status->flag & RX_FLAG_DECRYPTED) {
if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_CCMP_HDR_LEN))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_SHORT_CCMP;
if (status->flag & RX_FLAG_MIC_STRIPPED)
mic_len = 0;
} else {
if (skb_linearize(rx->skb))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_OOM;
}
/* reload hdr - skb might have been reallocated */
@@ -536,7 +536,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len;
if (!rx->sta || data_len < 0)
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_SHORT_CCMP;
if (!(status->flag & RX_FLAG_PN_VALIDATED)) {
int res;
@@ -574,7 +574,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
/* Remove CCMP header and MIC */
if (pskb_trim(skb, skb->len - mic_len))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_SHORT_CCMP_MIC;
memmove(skb->data + IEEE80211_CCMP_HDR_LEN, skb->data, hdrlen);
skb_pull(skb, IEEE80211_CCMP_HDR_LEN);
@@ -719,12 +719,12 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
if (status->flag & RX_FLAG_DECRYPTED) {
if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_GCMP_HDR_LEN))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_SHORT_GCMP;
if (status->flag & RX_FLAG_MIC_STRIPPED)
mic_len = 0;
} else {
if (skb_linearize(rx->skb))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_OOM;
}
/* reload hdr - skb might have been reallocated */
@@ -732,7 +732,7 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len;
if (!rx->sta || data_len < 0)
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_SHORT_GCMP;
if (!(status->flag & RX_FLAG_PN_VALIDATED)) {
int res;
@@ -771,7 +771,7 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
/* Remove GCMP header and MIC */
if (pskb_trim(skb, skb->len - mic_len))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_SHORT_GCMP_MIC;
memmove(skb->data + IEEE80211_GCMP_HDR_LEN, skb->data, hdrlen);
skb_pull(skb, IEEE80211_GCMP_HDR_LEN);
@@ -924,7 +924,7 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
/* management frames are already linear */
if (skb->len < 24 + sizeof(*mmie))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_SHORT_CMAC;
mmie = (struct ieee80211_mmie *)
(skb->data + skb->len - sizeof(*mmie));
@@ -974,13 +974,13 @@ ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx)
/* management frames are already linear */
if (skb->len < 24 + sizeof(*mmie))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_SHORT_CMAC256;
mmie = (struct ieee80211_mmie_16 *)
(skb->data + skb->len - sizeof(*mmie));
if (mmie->element_id != WLAN_EID_MMIE ||
mmie->length != sizeof(*mmie) - 2)
- return RX_DROP_UNUSABLE; /* Invalid MMIE */
+ return RX_DROP_U_BAD_MMIE; /* Invalid MMIE */
bip_ipn_swap(ipn, mmie->sequence_number);
@@ -1073,7 +1073,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx)
/* management frames are already linear */
if (skb->len < 24 + sizeof(*mmie))
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_SHORT_GMAC;
mmie = (struct ieee80211_mmie_16 *)
(skb->data + skb->len - sizeof(*mmie));
@@ -1097,7 +1097,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx)
mic = kmalloc(GMAC_MIC_LEN, GFP_ATOMIC);
if (!mic)
- return RX_DROP_UNUSABLE;
+ return RX_DROP_U_OOM;
if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce,
skb->data + 24, skb->len - 24,
mic) < 0 ||
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index 5c3cb019f751..ef7f23af043f 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -315,6 +315,179 @@ static int mac802154_stop_beacons(struct wpan_phy *wpan_phy,
return mac802154_stop_beacons_locked(local, sdata);
}
+static int mac802154_associate(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *coord)
+{
+ struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
+ u64 ceaddr = swab64((__force u64)coord->extended_addr);
+ struct ieee802154_sub_if_data *sdata;
+ struct ieee802154_pan_device *parent;
+ __le16 short_addr;
+ int ret;
+
+ ASSERT_RTNL();
+
+ sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(wpan_dev);
+
+ if (wpan_dev->parent) {
+ dev_err(&sdata->dev->dev,
+ "Device %8phC is already associated\n", &ceaddr);
+ return -EPERM;
+ }
+
+ if (coord->mode == IEEE802154_SHORT_ADDRESSING)
+ return -EINVAL;
+
+ parent = kzalloc(sizeof(*parent), GFP_KERNEL);
+ if (!parent)
+ return -ENOMEM;
+
+ parent->pan_id = coord->pan_id;
+ parent->mode = coord->mode;
+ parent->extended_addr = coord->extended_addr;
+ parent->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST);
+
+ /* Set the PAN ID hardware address filter beforehand to avoid dropping
+ * the association response with a destination PAN ID field set to the
+ * "new" PAN ID.
+ */
+ if (local->hw.flags & IEEE802154_HW_AFILT) {
+ ret = drv_set_pan_id(local, coord->pan_id);
+ if (ret < 0)
+ goto free_parent;
+ }
+
+ ret = mac802154_perform_association(sdata, parent, &short_addr);
+ if (ret)
+ goto reset_panid;
+
+ if (local->hw.flags & IEEE802154_HW_AFILT) {
+ ret = drv_set_short_addr(local, short_addr);
+ if (ret < 0)
+ goto reset_panid;
+ }
+
+ wpan_dev->pan_id = coord->pan_id;
+ wpan_dev->short_addr = short_addr;
+ wpan_dev->parent = parent;
+
+ return 0;
+
+reset_panid:
+ if (local->hw.flags & IEEE802154_HW_AFILT)
+ drv_set_pan_id(local, cpu_to_le16(IEEE802154_PAN_ID_BROADCAST));
+
+free_parent:
+ kfree(parent);
+ return ret;
+}
+
+static int mac802154_disassociate_from_parent(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev)
+{
+ struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
+ struct ieee802154_pan_device *child, *tmp;
+ struct ieee802154_sub_if_data *sdata;
+ unsigned int max_assoc;
+ u64 eaddr;
+ int ret;
+
+ sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(wpan_dev);
+
+ /* Start by disassociating all the children and preventing new ones to
+ * attempt associations.
+ */
+ max_assoc = cfg802154_set_max_associations(wpan_dev, 0);
+ list_for_each_entry_safe(child, tmp, &wpan_dev->children, node) {
+ ret = mac802154_send_disassociation_notif(sdata, child,
+ IEEE802154_COORD_WISHES_DEVICE_TO_LEAVE);
+ if (ret) {
+ eaddr = swab64((__force u64)child->extended_addr);
+ dev_err(&sdata->dev->dev,
+ "Disassociation with %8phC may have failed (%d)\n",
+ &eaddr, ret);
+ }
+
+ list_del(&child->node);
+ }
+
+ ret = mac802154_send_disassociation_notif(sdata, wpan_dev->parent,
+ IEEE802154_DEVICE_WISHES_TO_LEAVE);
+ if (ret) {
+ eaddr = swab64((__force u64)wpan_dev->parent->extended_addr);
+ dev_err(&sdata->dev->dev,
+ "Disassociation from %8phC may have failed (%d)\n",
+ &eaddr, ret);
+ }
+
+ ret = 0;
+
+ kfree(wpan_dev->parent);
+ wpan_dev->parent = NULL;
+ wpan_dev->pan_id = cpu_to_le16(IEEE802154_PAN_ID_BROADCAST);
+ wpan_dev->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST);
+
+ if (local->hw.flags & IEEE802154_HW_AFILT) {
+ ret = drv_set_pan_id(local, wpan_dev->pan_id);
+ if (ret < 0)
+ goto reset_mac_assoc;
+
+ ret = drv_set_short_addr(local, wpan_dev->short_addr);
+ if (ret < 0)
+ goto reset_mac_assoc;
+ }
+
+reset_mac_assoc:
+ cfg802154_set_max_associations(wpan_dev, max_assoc);
+
+ return ret;
+}
+
+static int mac802154_disassociate_child(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_pan_device *child)
+{
+ struct ieee802154_sub_if_data *sdata;
+ int ret;
+
+ sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(wpan_dev);
+
+ ret = mac802154_send_disassociation_notif(sdata, child,
+ IEEE802154_COORD_WISHES_DEVICE_TO_LEAVE);
+ if (ret)
+ return ret;
+
+ list_del(&child->node);
+ wpan_dev->nchildren--;
+ kfree(child);
+
+ return 0;
+}
+
+static int mac802154_disassociate(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *target)
+{
+ u64 teaddr = swab64((__force u64)target->extended_addr);
+ struct ieee802154_pan_device *pan_device;
+
+ ASSERT_RTNL();
+
+ if (cfg802154_device_is_parent(wpan_dev, target))
+ return mac802154_disassociate_from_parent(wpan_phy, wpan_dev);
+
+ pan_device = cfg802154_device_is_child(wpan_dev, target);
+ if (pan_device)
+ return mac802154_disassociate_child(wpan_phy, wpan_dev,
+ pan_device);
+
+ dev_err(&wpan_dev->netdev->dev,
+ "Device %8phC is not associated with us\n", &teaddr);
+
+ return -EINVAL;
+}
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
static void
ieee802154_get_llsec_table(struct wpan_phy *wpan_phy,
@@ -526,6 +699,8 @@ const struct cfg802154_ops mac802154_config_ops = {
.abort_scan = mac802154_abort_scan,
.send_beacons = mac802154_send_beacons,
.stop_beacons = mac802154_stop_beacons,
+ .associate = mac802154_associate,
+ .disassociate = mac802154_disassociate,
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
.get_llsec_table = ieee802154_get_llsec_table,
.lock_llsec_table = ieee802154_lock_llsec_table,
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index c347ec9ff8c9..08dd521a51a5 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h
@@ -24,6 +24,7 @@
enum ieee802154_ongoing {
IEEE802154_IS_SCANNING = BIT(0),
IEEE802154_IS_BEACONING = BIT(1),
+ IEEE802154_IS_ASSOCIATING = BIT(2),
};
/* mac802154 device private data */
@@ -74,6 +75,13 @@ struct ieee802154_local {
struct list_head rx_mac_cmd_list;
struct work_struct rx_mac_cmd_work;
+ /* Association */
+ struct ieee802154_pan_device *assoc_dev;
+ struct completion assoc_done;
+ __le16 assoc_addr;
+ u8 assoc_status;
+ struct work_struct assoc_work;
+
bool started;
bool suspended;
unsigned long ongoing;
@@ -296,6 +304,25 @@ static inline bool mac802154_is_beaconing(struct ieee802154_local *local)
void mac802154_rx_mac_cmd_worker(struct work_struct *work);
+int mac802154_perform_association(struct ieee802154_sub_if_data *sdata,
+ struct ieee802154_pan_device *coord,
+ __le16 *short_addr);
+int mac802154_process_association_resp(struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb);
+
+static inline bool mac802154_is_associating(struct ieee802154_local *local)
+{
+ return test_bit(IEEE802154_IS_ASSOCIATING, &local->ongoing);
+}
+
+int mac802154_send_disassociation_notif(struct ieee802154_sub_if_data *sdata,
+ struct ieee802154_pan_device *target,
+ u8 reason);
+int mac802154_process_disassociation_notif(struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb);
+int mac802154_process_association_req(struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb);
+
/* interface handling */
int ieee802154_iface_init(void);
void ieee802154_iface_exit(void);
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 357ece67432b..9ab7396668d2 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -103,6 +103,8 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops)
INIT_DELAYED_WORK(&local->beacon_work, mac802154_beacon_worker);
INIT_WORK(&local->rx_mac_cmd_work, mac802154_rx_mac_cmd_worker);
+ init_completion(&local->assoc_done);
+
/* init supported flags with 802.15.4 default ranges */
phy->supported.max_minbe = 8;
phy->supported.min_maxbe = 3;
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index e2434b4fe514..e40a988d6c80 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -93,6 +93,31 @@ void mac802154_rx_mac_cmd_worker(struct work_struct *work)
queue_delayed_work(local->mac_wq, &local->beacon_work, 0);
break;
+
+ case IEEE802154_CMD_ASSOCIATION_RESP:
+ dev_dbg(&mac_pkt->sdata->dev->dev, "processing ASSOC RESP\n");
+ if (!mac802154_is_associating(local))
+ break;
+
+ mac802154_process_association_resp(mac_pkt->sdata, mac_pkt->skb);
+ break;
+
+ case IEEE802154_CMD_ASSOCIATION_REQ:
+ dev_dbg(&mac_pkt->sdata->dev->dev, "processing ASSOC REQ\n");
+ if (mac_pkt->sdata->wpan_dev.iftype != NL802154_IFTYPE_COORD)
+ break;
+
+ mac802154_process_association_req(mac_pkt->sdata, mac_pkt->skb);
+ break;
+
+ case IEEE802154_CMD_DISASSOCIATION_NOTIFY:
+ dev_dbg(&mac_pkt->sdata->dev->dev, "processing DISASSOC NOTIF\n");
+ if (mac_pkt->sdata->wpan_dev.iftype != NL802154_IFTYPE_COORD)
+ break;
+
+ mac802154_process_disassociation_notif(mac_pkt->sdata, mac_pkt->skb);
+ break;
+
default:
break;
}
@@ -131,12 +156,15 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata,
switch (mac_cb(skb)->dest.mode) {
case IEEE802154_ADDR_NONE:
- if (hdr->source.mode != IEEE802154_ADDR_NONE)
- /* FIXME: check if we are PAN coordinator */
- skb->pkt_type = PACKET_OTHERHOST;
- else
+ if (hdr->source.mode == IEEE802154_ADDR_NONE)
/* ACK comes with both addresses empty */
skb->pkt_type = PACKET_HOST;
+ else if (!wpan_dev->parent)
+ /* No dest means PAN coordinator is the recipient */
+ skb->pkt_type = PACKET_HOST;
+ else
+ /* We are not the PAN coordinator, just relaying */
+ skb->pkt_type = PACKET_OTHERHOST;
break;
case IEEE802154_ADDR_LONG:
if (mac_cb(skb)->dest.pan_id != span &&
diff --git a/net/mac802154/scan.c b/net/mac802154/scan.c
index d9658f2c4ae6..1c0eeaa76560 100644
--- a/net/mac802154/scan.c
+++ b/net/mac802154/scan.c
@@ -466,6 +466,7 @@ int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata,
struct cfg802154_beacon_request *request)
{
struct ieee802154_local *local = sdata->local;
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
ASSERT_RTNL();
@@ -495,8 +496,7 @@ int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata,
local->beacon.mac_pl.superframe_order = request->interval;
local->beacon.mac_pl.final_cap_slot = 0xf;
local->beacon.mac_pl.battery_life_ext = 0;
- /* TODO: Fill this field with the coordinator situation in the network */
- local->beacon.mac_pl.pan_coordinator = 1;
+ local->beacon.mac_pl.pan_coordinator = !wpan_dev->parent;
local->beacon.mac_pl.assoc_permit = 1;
if (request->interval == IEEE802154_ACTIVE_SCAN_DURATION)
@@ -510,3 +510,406 @@ int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata,
return 0;
}
+
+int mac802154_perform_association(struct ieee802154_sub_if_data *sdata,
+ struct ieee802154_pan_device *coord,
+ __le16 *short_addr)
+{
+ u64 ceaddr = swab64((__force u64)coord->extended_addr);
+ struct ieee802154_association_req_frame frame = {};
+ struct ieee802154_local *local = sdata->local;
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct sk_buff *skb;
+ int ret;
+
+ frame.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD;
+ frame.mhr.fc.security_enabled = 0;
+ frame.mhr.fc.frame_pending = 0;
+ frame.mhr.fc.ack_request = 1; /* We always expect an ack here */
+ frame.mhr.fc.intra_pan = 0;
+ frame.mhr.fc.dest_addr_mode = (coord->mode == IEEE802154_ADDR_LONG) ?
+ IEEE802154_EXTENDED_ADDRESSING : IEEE802154_SHORT_ADDRESSING;
+ frame.mhr.fc.version = IEEE802154_2003_STD;
+ frame.mhr.fc.source_addr_mode = IEEE802154_EXTENDED_ADDRESSING;
+ frame.mhr.source.mode = IEEE802154_ADDR_LONG;
+ frame.mhr.source.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST);
+ frame.mhr.source.extended_addr = wpan_dev->extended_addr;
+ frame.mhr.dest.mode = coord->mode;
+ frame.mhr.dest.pan_id = coord->pan_id;
+ if (coord->mode == IEEE802154_ADDR_LONG)
+ frame.mhr.dest.extended_addr = coord->extended_addr;
+ else
+ frame.mhr.dest.short_addr = coord->short_addr;
+ frame.mhr.seq = atomic_inc_return(&wpan_dev->dsn) & 0xFF;
+ frame.mac_pl.cmd_id = IEEE802154_CMD_ASSOCIATION_REQ;
+ frame.assoc_req_pl.device_type = 1;
+ frame.assoc_req_pl.power_source = 1;
+ frame.assoc_req_pl.rx_on_when_idle = 1;
+ frame.assoc_req_pl.alloc_addr = 1;
+
+ skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ + sizeof(frame.assoc_req_pl),
+ GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ skb->dev = sdata->dev;
+
+ ret = ieee802154_mac_cmd_push(skb, &frame, &frame.assoc_req_pl,
+ sizeof(frame.assoc_req_pl));
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
+
+ local->assoc_dev = coord;
+ reinit_completion(&local->assoc_done);
+ set_bit(IEEE802154_IS_ASSOCIATING, &local->ongoing);
+
+ ret = ieee802154_mlme_tx_one_locked(local, sdata, skb);
+ if (ret) {
+ if (ret > 0)
+ ret = (ret == IEEE802154_NO_ACK) ? -EREMOTEIO : -EIO;
+ dev_warn(&sdata->dev->dev,
+ "No ASSOC REQ ACK received from %8phC\n", &ceaddr);
+ goto clear_assoc;
+ }
+
+ ret = wait_for_completion_killable_timeout(&local->assoc_done, 10 * HZ);
+ if (ret <= 0) {
+ dev_warn(&sdata->dev->dev,
+ "No ASSOC RESP received from %8phC\n", &ceaddr);
+ ret = -ETIMEDOUT;
+ goto clear_assoc;
+ }
+
+ if (local->assoc_status != IEEE802154_ASSOCIATION_SUCCESSFUL) {
+ if (local->assoc_status == IEEE802154_PAN_AT_CAPACITY)
+ ret = -ERANGE;
+ else
+ ret = -EPERM;
+
+ dev_warn(&sdata->dev->dev,
+ "Negative ASSOC RESP received from %8phC: %s\n", &ceaddr,
+ local->assoc_status == IEEE802154_PAN_AT_CAPACITY ?
+ "PAN at capacity" : "access denied");
+ }
+
+ ret = 0;
+ *short_addr = local->assoc_addr;
+
+clear_assoc:
+ clear_bit(IEEE802154_IS_ASSOCIATING, &local->ongoing);
+ local->assoc_dev = NULL;
+
+ return ret;
+}
+
+int mac802154_process_association_resp(struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee802154_addr *src = &mac_cb(skb)->source;
+ struct ieee802154_addr *dest = &mac_cb(skb)->dest;
+ u64 deaddr = swab64((__force u64)dest->extended_addr);
+ struct ieee802154_local *local = sdata->local;
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct ieee802154_assoc_resp_pl resp_pl = {};
+
+ if (skb->len != sizeof(resp_pl))
+ return -EINVAL;
+
+ if (unlikely(src->mode != IEEE802154_EXTENDED_ADDRESSING ||
+ dest->mode != IEEE802154_EXTENDED_ADDRESSING))
+ return -EINVAL;
+
+ if (unlikely(dest->extended_addr != wpan_dev->extended_addr ||
+ src->extended_addr != local->assoc_dev->extended_addr))
+ return -ENODEV;
+
+ memcpy(&resp_pl, skb->data, sizeof(resp_pl));
+ local->assoc_addr = resp_pl.short_addr;
+ local->assoc_status = resp_pl.status;
+
+ dev_dbg(&skb->dev->dev,
+ "ASSOC RESP 0x%x received from %8phC, getting short address %04x\n",
+ local->assoc_status, &deaddr, local->assoc_addr);
+
+ complete(&local->assoc_done);
+
+ return 0;
+}
+
+int mac802154_send_disassociation_notif(struct ieee802154_sub_if_data *sdata,
+ struct ieee802154_pan_device *target,
+ u8 reason)
+{
+ struct ieee802154_disassociation_notif_frame frame = {};
+ u64 teaddr = swab64((__force u64)target->extended_addr);
+ struct ieee802154_local *local = sdata->local;
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct sk_buff *skb;
+ int ret;
+
+ frame.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD;
+ frame.mhr.fc.security_enabled = 0;
+ frame.mhr.fc.frame_pending = 0;
+ frame.mhr.fc.ack_request = 1;
+ frame.mhr.fc.intra_pan = 1;
+ frame.mhr.fc.dest_addr_mode = (target->mode == IEEE802154_ADDR_LONG) ?
+ IEEE802154_EXTENDED_ADDRESSING : IEEE802154_SHORT_ADDRESSING;
+ frame.mhr.fc.version = IEEE802154_2003_STD;
+ frame.mhr.fc.source_addr_mode = IEEE802154_EXTENDED_ADDRESSING;
+ frame.mhr.source.mode = IEEE802154_ADDR_LONG;
+ frame.mhr.source.pan_id = wpan_dev->pan_id;
+ frame.mhr.source.extended_addr = wpan_dev->extended_addr;
+ frame.mhr.dest.mode = target->mode;
+ frame.mhr.dest.pan_id = wpan_dev->pan_id;
+ if (target->mode == IEEE802154_ADDR_LONG)
+ frame.mhr.dest.extended_addr = target->extended_addr;
+ else
+ frame.mhr.dest.short_addr = target->short_addr;
+ frame.mhr.seq = atomic_inc_return(&wpan_dev->dsn) & 0xFF;
+ frame.mac_pl.cmd_id = IEEE802154_CMD_DISASSOCIATION_NOTIFY;
+ frame.disassoc_pl = reason;
+
+ skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ + sizeof(frame.disassoc_pl),
+ GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ skb->dev = sdata->dev;
+
+ ret = ieee802154_mac_cmd_push(skb, &frame, &frame.disassoc_pl,
+ sizeof(frame.disassoc_pl));
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
+
+ ret = ieee802154_mlme_tx_one_locked(local, sdata, skb);
+ if (ret) {
+ dev_warn(&sdata->dev->dev,
+ "No DISASSOC ACK received from %8phC\n", &teaddr);
+ if (ret > 0)
+ ret = (ret == IEEE802154_NO_ACK) ? -EREMOTEIO : -EIO;
+ return ret;
+ }
+
+ dev_dbg(&sdata->dev->dev, "DISASSOC ACK received from %8phC\n", &teaddr);
+ return 0;
+}
+
+static int
+mac802154_send_association_resp_locked(struct ieee802154_sub_if_data *sdata,
+ struct ieee802154_pan_device *target,
+ struct ieee802154_assoc_resp_pl *assoc_resp_pl)
+{
+ u64 teaddr = swab64((__force u64)target->extended_addr);
+ struct ieee802154_association_resp_frame frame = {};
+ struct ieee802154_local *local = sdata->local;
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct sk_buff *skb;
+ int ret;
+
+ frame.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD;
+ frame.mhr.fc.security_enabled = 0;
+ frame.mhr.fc.frame_pending = 0;
+ frame.mhr.fc.ack_request = 1; /* We always expect an ack here */
+ frame.mhr.fc.intra_pan = 1;
+ frame.mhr.fc.dest_addr_mode = IEEE802154_EXTENDED_ADDRESSING;
+ frame.mhr.fc.version = IEEE802154_2003_STD;
+ frame.mhr.fc.source_addr_mode = IEEE802154_EXTENDED_ADDRESSING;
+ frame.mhr.source.mode = IEEE802154_ADDR_LONG;
+ frame.mhr.source.extended_addr = wpan_dev->extended_addr;
+ frame.mhr.dest.mode = IEEE802154_ADDR_LONG;
+ frame.mhr.dest.pan_id = wpan_dev->pan_id;
+ frame.mhr.dest.extended_addr = target->extended_addr;
+ frame.mhr.seq = atomic_inc_return(&wpan_dev->dsn) & 0xFF;
+ frame.mac_pl.cmd_id = IEEE802154_CMD_ASSOCIATION_RESP;
+
+ skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ + sizeof(*assoc_resp_pl),
+ GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ skb->dev = sdata->dev;
+
+ ret = ieee802154_mac_cmd_push(skb, &frame, assoc_resp_pl,
+ sizeof(*assoc_resp_pl));
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
+
+ ret = ieee802154_mlme_tx_locked(local, sdata, skb);
+ if (ret) {
+ dev_warn(&sdata->dev->dev,
+ "No ASSOC RESP ACK received from %8phC\n", &teaddr);
+ if (ret > 0)
+ ret = (ret == IEEE802154_NO_ACK) ? -EREMOTEIO : -EIO;
+ return ret;
+ }
+
+ return 0;
+}
+
+int mac802154_process_association_req(struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct ieee802154_addr *src = &mac_cb(skb)->source;
+ struct ieee802154_addr *dest = &mac_cb(skb)->dest;
+ struct ieee802154_assoc_resp_pl assoc_resp_pl = {};
+ struct ieee802154_assoc_req_pl assoc_req_pl;
+ struct ieee802154_pan_device *child, *exchild;
+ struct ieee802154_addr tmp = {};
+ u64 ceaddr;
+ int ret;
+
+ if (skb->len != sizeof(assoc_req_pl))
+ return -EINVAL;
+
+ if (unlikely(src->mode != IEEE802154_EXTENDED_ADDRESSING))
+ return -EINVAL;
+
+ if (unlikely(dest->pan_id != wpan_dev->pan_id))
+ return -ENODEV;
+
+ if (dest->mode == IEEE802154_EXTENDED_ADDRESSING &&
+ unlikely(dest->extended_addr != wpan_dev->extended_addr))
+ return -ENODEV;
+ else if (dest->mode == IEEE802154_SHORT_ADDRESSING &&
+ unlikely(dest->short_addr != wpan_dev->short_addr))
+ return -ENODEV;
+
+ if (wpan_dev->parent) {
+ dev_dbg(&sdata->dev->dev,
+ "Ignoring ASSOC REQ, not the PAN coordinator\n");
+ return -ENODEV;
+ }
+
+ mutex_lock(&wpan_dev->association_lock);
+
+ memcpy(&assoc_req_pl, skb->data, sizeof(assoc_req_pl));
+ if (assoc_req_pl.assoc_type) {
+ dev_err(&skb->dev->dev, "Fast associations not supported yet\n");
+ ret = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ child = kzalloc(sizeof(*child), GFP_KERNEL);
+ if (!child) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ child->extended_addr = src->extended_addr;
+ child->mode = IEEE802154_EXTENDED_ADDRESSING;
+ ceaddr = swab64((__force u64)child->extended_addr);
+
+ if (wpan_dev->nchildren >= wpan_dev->max_associations) {
+ if (!wpan_dev->max_associations)
+ assoc_resp_pl.status = IEEE802154_PAN_ACCESS_DENIED;
+ else
+ assoc_resp_pl.status = IEEE802154_PAN_AT_CAPACITY;
+ assoc_resp_pl.short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST);
+ dev_dbg(&sdata->dev->dev,
+ "Refusing ASSOC REQ from child %8phC, %s\n", &ceaddr,
+ assoc_resp_pl.status == IEEE802154_PAN_ACCESS_DENIED ?
+ "access denied" : "too many children");
+ } else {
+ assoc_resp_pl.status = IEEE802154_ASSOCIATION_SUCCESSFUL;
+ if (assoc_req_pl.alloc_addr) {
+ assoc_resp_pl.short_addr = cfg802154_get_free_short_addr(wpan_dev);
+ child->mode = IEEE802154_SHORT_ADDRESSING;
+ } else {
+ assoc_resp_pl.short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+ }
+ child->short_addr = assoc_resp_pl.short_addr;
+ dev_dbg(&sdata->dev->dev,
+ "Accepting ASSOC REQ from child %8phC, providing short address 0x%04x\n",
+ &ceaddr, le16_to_cpu(child->short_addr));
+ }
+
+ ret = mac802154_send_association_resp_locked(sdata, child, &assoc_resp_pl);
+ if (ret || assoc_resp_pl.status != IEEE802154_ASSOCIATION_SUCCESSFUL) {
+ kfree(child);
+ goto unlock;
+ }
+
+ dev_dbg(&sdata->dev->dev,
+ "Successful association with new child %8phC\n", &ceaddr);
+
+ /* Ensure this child is not already associated (might happen due to
+ * retransmissions), in this case drop the ex structure.
+ */
+ tmp.mode = child->mode;
+ tmp.extended_addr = child->extended_addr;
+ exchild = cfg802154_device_is_child(wpan_dev, &tmp);
+ if (exchild) {
+ dev_dbg(&sdata->dev->dev,
+ "Child %8phC was already known\n", &ceaddr);
+ list_del(&exchild->node);
+ }
+
+ list_add(&child->node, &wpan_dev->children);
+ wpan_dev->nchildren++;
+
+unlock:
+ mutex_unlock(&wpan_dev->association_lock);
+ return ret;
+}
+
+int mac802154_process_disassociation_notif(struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee802154_addr *src = &mac_cb(skb)->source;
+ struct ieee802154_addr *dest = &mac_cb(skb)->dest;
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct ieee802154_pan_device *child;
+ struct ieee802154_addr target;
+ bool parent;
+ u64 teaddr;
+
+ if (skb->len != sizeof(u8))
+ return -EINVAL;
+
+ if (unlikely(src->mode != IEEE802154_EXTENDED_ADDRESSING))
+ return -EINVAL;
+
+ if (dest->mode == IEEE802154_EXTENDED_ADDRESSING &&
+ unlikely(dest->extended_addr != wpan_dev->extended_addr))
+ return -ENODEV;
+ else if (dest->mode == IEEE802154_SHORT_ADDRESSING &&
+ unlikely(dest->short_addr != wpan_dev->short_addr))
+ return -ENODEV;
+
+ if (dest->pan_id != wpan_dev->pan_id)
+ return -ENODEV;
+
+ target.mode = IEEE802154_EXTENDED_ADDRESSING;
+ target.extended_addr = src->extended_addr;
+ teaddr = swab64((__force u64)target.extended_addr);
+ dev_dbg(&skb->dev->dev, "Processing DISASSOC NOTIF from %8phC\n", &teaddr);
+
+ mutex_lock(&wpan_dev->association_lock);
+ parent = cfg802154_device_is_parent(wpan_dev, &target);
+ if (!parent)
+ child = cfg802154_device_is_child(wpan_dev, &target);
+ if (!parent && !child) {
+ mutex_unlock(&wpan_dev->association_lock);
+ return -EINVAL;
+ }
+
+ if (parent) {
+ kfree(wpan_dev->parent);
+ wpan_dev->parent = NULL;
+ } else {
+ list_del(&child->node);
+ kfree(child);
+ wpan_dev->nchildren--;
+ }
+
+ mutex_unlock(&wpan_dev->association_lock);
+
+ return 0;
+}
diff --git a/net/mctp/route.c b/net/mctp/route.c
index ab62fe447038..7a47a58aa54b 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -737,6 +737,8 @@ struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
{
struct mctp_route *tmp, *rt = NULL;
+ rcu_read_lock();
+
list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
/* TODO: add metrics */
if (mctp_rt_match_eid(tmp, dnet, daddr)) {
@@ -747,21 +749,29 @@ struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
}
}
+ rcu_read_unlock();
+
return rt;
}
static struct mctp_route *mctp_route_lookup_null(struct net *net,
struct net_device *dev)
{
- struct mctp_route *rt;
+ struct mctp_route *tmp, *rt = NULL;
- list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
- if (rt->dev->dev == dev && rt->type == RTN_LOCAL &&
- refcount_inc_not_zero(&rt->refs))
- return rt;
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
+ if (tmp->dev->dev == dev && tmp->type == RTN_LOCAL &&
+ refcount_inc_not_zero(&tmp->refs)) {
+ rt = tmp;
+ break;
+ }
}
- return NULL;
+ rcu_read_unlock();
+
+ return rt;
}
static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index 84e531f86b82..bcf1dbf3a432 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -2,7 +2,8 @@
obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
- mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o
+ mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o \
+ mptcp_pm_gen.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
diff --git a/net/mptcp/crypto_test.c b/net/mptcp/crypto_test.c
index 017248dea038..220414e5c850 100644
--- a/net/mptcp/crypto_test.c
+++ b/net/mptcp/crypto_test.c
@@ -70,3 +70,4 @@ static struct kunit_suite mptcp_crypto_suite = {
kunit_test_suite(mptcp_crypto_suite);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("KUnit tests for MPTCP Crypto");
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index e72b518c5d02..13fe0748dde8 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -27,6 +27,7 @@ struct mptcp_pernet {
#endif
unsigned int add_addr_timeout;
+ unsigned int close_timeout;
unsigned int stale_loss_cnt;
u8 mptcp_enabled;
u8 checksum_enabled;
@@ -65,6 +66,13 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net)
return mptcp_get_pernet(net)->stale_loss_cnt;
}
+unsigned int mptcp_close_timeout(const struct sock *sk)
+{
+ if (sock_flag(sk, SOCK_DEAD))
+ return TCP_TIMEWAIT_LEN;
+ return mptcp_get_pernet(sock_net(sk))->close_timeout;
+}
+
int mptcp_get_pm_type(const struct net *net)
{
return mptcp_get_pernet(net)->pm_type;
@@ -79,6 +87,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
+ pernet->close_timeout = TCP_TIMEWAIT_LEN;
pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1;
pernet->stale_loss_cnt = 4;
@@ -141,6 +150,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dostring,
},
+ {
+ .procname = "close_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
{}
};
@@ -163,6 +178,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[4].data = &pernet->stale_loss_cnt;
table[5].data = &pernet->pm_type;
table[6].data = &pernet->scheduler;
+ table[7].data = &pernet->close_timeout;
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
ARRAY_SIZE(mptcp_sysctl_table));
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
index bceaab8dd8e4..74698582a285 100644
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -52,6 +52,7 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
mptcp_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb);
+ mptcp_sk(sk)->bytes_received += skb->len;
sk->sk_data_ready(sk);
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index a0990c365a2e..c30405e76833 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -66,6 +66,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED),
SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
+ SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index cae71d947252..dd7fd1f246b5 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -65,6 +65,7 @@ enum linux_mptcp_mib_field {
* conflict with another subflow while updating msk rcv wnd
*/
MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
+ MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
__MPTCP_MIB_MAX
};
@@ -95,4 +96,11 @@ static inline void __MPTCP_INC_STATS(struct net *net,
__SNMP_INC_STATS(net->mib.mptcp_statistics, field);
}
+static inline void MPTCP_DEC_STATS(struct net *net,
+ enum linux_mptcp_mib_field field)
+{
+ if (likely(net->mib.mptcp_statistics))
+ SNMP_DEC_STATS(net->mib.mptcp_statistics, field);
+}
+
bool mptcp_mib_alloc(struct net *net);
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index 8df1bdb647e2..5409c2ea3f57 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -245,4 +245,5 @@ static void __exit mptcp_diag_exit(void)
module_init(mptcp_diag_init);
module_exit(mptcp_diag_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MPTCP socket monitoring via SOCK_DIAG");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-262 /* AF_INET - IPPROTO_MPTCP */);
diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c
new file mode 100644
index 000000000000..670da7822e6c
--- /dev/null
+++ b/net/mptcp/mptcp_pm_gen.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/mptcp_pm.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "mptcp_pm_gen.h"
+
+#include <uapi/linux/mptcp_pm.h>
+
+/* Common nested types */
+const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1] = {
+ [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, },
+ [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, },
+ [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, },
+ [MPTCP_PM_ADDR_ATTR_ADDR6] = NLA_POLICY_EXACT_LEN(16),
+ [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16, },
+ [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32, },
+ [MPTCP_PM_ADDR_ATTR_IF_IDX] = { .type = NLA_S32, },
+};
+
+/* MPTCP_PM_CMD_ADD_ADDR - do */
+const struct nla_policy mptcp_pm_add_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = {
+ [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+};
+
+/* MPTCP_PM_CMD_DEL_ADDR - do */
+const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = {
+ [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+};
+
+/* MPTCP_PM_CMD_GET_ADDR - do */
+const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = {
+ [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+};
+
+/* MPTCP_PM_CMD_FLUSH_ADDRS - do */
+const struct nla_policy mptcp_pm_flush_addrs_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = {
+ [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+};
+
+/* MPTCP_PM_CMD_SET_LIMITS - do */
+const struct nla_policy mptcp_pm_set_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1] = {
+ [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
+};
+
+/* MPTCP_PM_CMD_GET_LIMITS - do */
+const struct nla_policy mptcp_pm_get_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1] = {
+ [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
+};
+
+/* MPTCP_PM_CMD_SET_FLAGS - do */
+const struct nla_policy mptcp_pm_set_flags_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = {
+ [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+};
+
+/* MPTCP_PM_CMD_ANNOUNCE - do */
+const struct nla_policy mptcp_pm_announce_nl_policy[MPTCP_PM_ATTR_TOKEN + 1] = {
+ [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
+};
+
+/* MPTCP_PM_CMD_REMOVE - do */
+const struct nla_policy mptcp_pm_remove_nl_policy[MPTCP_PM_ATTR_LOC_ID + 1] = {
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, },
+};
+
+/* MPTCP_PM_CMD_SUBFLOW_CREATE - do */
+const struct nla_policy mptcp_pm_subflow_create_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = {
+ [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+};
+
+/* MPTCP_PM_CMD_SUBFLOW_DESTROY - do */
+const struct nla_policy mptcp_pm_subflow_destroy_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = {
+ [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+};
+
+/* Ops table for mptcp_pm */
+const struct genl_ops mptcp_pm_nl_ops[11] = {
+ {
+ .cmd = MPTCP_PM_CMD_ADD_ADDR,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_add_addr_doit,
+ .policy = mptcp_pm_add_addr_nl_policy,
+ .maxattr = MPTCP_PM_ENDPOINT_ADDR,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_DEL_ADDR,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_del_addr_doit,
+ .policy = mptcp_pm_del_addr_nl_policy,
+ .maxattr = MPTCP_PM_ENDPOINT_ADDR,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_GET_ADDR,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_get_addr_doit,
+ .dumpit = mptcp_pm_nl_get_addr_dumpit,
+ .policy = mptcp_pm_get_addr_nl_policy,
+ .maxattr = MPTCP_PM_ENDPOINT_ADDR,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_FLUSH_ADDRS,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_flush_addrs_doit,
+ .policy = mptcp_pm_flush_addrs_nl_policy,
+ .maxattr = MPTCP_PM_ENDPOINT_ADDR,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_SET_LIMITS,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_set_limits_doit,
+ .policy = mptcp_pm_set_limits_nl_policy,
+ .maxattr = MPTCP_PM_ATTR_SUBFLOWS,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_GET_LIMITS,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_get_limits_doit,
+ .policy = mptcp_pm_get_limits_nl_policy,
+ .maxattr = MPTCP_PM_ATTR_SUBFLOWS,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_SET_FLAGS,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_set_flags_doit,
+ .policy = mptcp_pm_set_flags_nl_policy,
+ .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_ANNOUNCE,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_announce_doit,
+ .policy = mptcp_pm_announce_nl_policy,
+ .maxattr = MPTCP_PM_ATTR_TOKEN,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_REMOVE,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_remove_doit,
+ .policy = mptcp_pm_remove_nl_policy,
+ .maxattr = MPTCP_PM_ATTR_LOC_ID,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_subflow_create_doit,
+ .policy = mptcp_pm_subflow_create_nl_policy,
+ .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_subflow_destroy_doit,
+ .policy = mptcp_pm_subflow_destroy_nl_policy,
+ .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+};
diff --git a/net/mptcp/mptcp_pm_gen.h b/net/mptcp/mptcp_pm_gen.h
new file mode 100644
index 000000000000..ac9fc7225b6a
--- /dev/null
+++ b/net/mptcp/mptcp_pm_gen.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/mptcp_pm.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_MPTCP_PM_GEN_H
+#define _LINUX_MPTCP_PM_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/mptcp_pm.h>
+
+/* Common nested types */
+extern const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1];
+
+extern const struct nla_policy mptcp_pm_add_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1];
+
+extern const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1];
+
+extern const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1];
+
+extern const struct nla_policy mptcp_pm_flush_addrs_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1];
+
+extern const struct nla_policy mptcp_pm_set_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1];
+
+extern const struct nla_policy mptcp_pm_get_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1];
+
+extern const struct nla_policy mptcp_pm_set_flags_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1];
+
+extern const struct nla_policy mptcp_pm_announce_nl_policy[MPTCP_PM_ATTR_TOKEN + 1];
+
+extern const struct nla_policy mptcp_pm_remove_nl_policy[MPTCP_PM_ATTR_LOC_ID + 1];
+
+extern const struct nla_policy mptcp_pm_subflow_create_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1];
+
+extern const struct nla_policy mptcp_pm_subflow_destroy_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1];
+
+/* Ops table for mptcp_pm */
+extern const struct genl_ops mptcp_pm_nl_ops[11];
+
+int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb,
+ struct genl_info *info);
+
+#endif /* _LINUX_MPTCP_PM_GEN_H */
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index c254accb14de..d2527d189a79 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -108,6 +108,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->suboptions |= OPTION_MPTCP_DSS;
mp_opt->use_map = 1;
mp_opt->mpc_map = 1;
+ mp_opt->use_ack = 0;
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
}
@@ -122,8 +123,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
break;
case MPTCPOPT_MP_JOIN:
- mp_opt->suboptions |= OPTIONS_MPTCP_MPJ;
if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
+ mp_opt->suboptions |= OPTION_MPTCP_MPJ_SYN;
mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
mp_opt->join_id = *ptr++;
mp_opt->token = get_unaligned_be32(ptr);
@@ -134,6 +135,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->backup, mp_opt->join_id,
mp_opt->token, mp_opt->nonce);
} else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) {
+ mp_opt->suboptions |= OPTION_MPTCP_MPJ_SYNACK;
mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
mp_opt->join_id = *ptr++;
mp_opt->thmac = get_unaligned_be64(ptr);
@@ -144,11 +146,10 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->backup, mp_opt->join_id,
mp_opt->thmac, mp_opt->nonce);
} else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) {
+ mp_opt->suboptions |= OPTION_MPTCP_MPJ_ACK;
ptr += 2;
memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
pr_debug("MP_JOIN hmac");
- } else {
- mp_opt->suboptions &= ~OPTIONS_MPTCP_MPJ;
}
break;
@@ -1269,12 +1270,13 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
if (rcv_wnd == rcv_wnd_old)
break;
- if (before64(rcv_wnd_new, rcv_wnd)) {
+
+ rcv_wnd_old = rcv_wnd;
+ if (before64(rcv_wnd_new, rcv_wnd_old)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE);
goto raise_win;
}
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
- rcv_wnd_old = rcv_wnd;
}
return;
}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index d8da5374d9e1..4ae19113b8eb 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -184,7 +184,7 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk)
spin_unlock_bh(&pm->lock);
}
-void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
+void mptcp_pm_subflow_check_next(struct mptcp_sock *msk,
const struct mptcp_subflow_context *subflow)
{
struct mptcp_pm_data *pm = &msk->pm;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 9661f3812682..287a60381eae 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1048,6 +1048,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err)
return err;
+ /* We don't use mptcp_set_state() here because it needs to be called
+ * under the msk socket lock. For the moment, that will not bring
+ * anything more than only calling inet_sk_state_store(), because the
+ * old status is known (TCP_CLOSE).
+ */
inet_sk_state_store(newsk, TCP_LISTEN);
lock_sock(ssk);
err = __inet_listen_sk(ssk, backlog);
@@ -1100,33 +1105,10 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
static const struct genl_multicast_group mptcp_pm_mcgrps[] = {
[MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, },
[MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME,
- .flags = GENL_UNS_ADMIN_PERM,
+ .flags = GENL_MCAST_CAP_NET_ADMIN,
},
};
-static const struct nla_policy
-mptcp_pm_addr_policy[MPTCP_PM_ADDR_ATTR_MAX + 1] = {
- [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, },
- [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, },
- [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, },
- [MPTCP_PM_ADDR_ATTR_ADDR6] =
- NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
- [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16 },
- [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32 },
- [MPTCP_PM_ADDR_ATTR_IF_IDX] = { .type = NLA_S32 },
-};
-
-static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = {
- [MPTCP_PM_ATTR_ADDR] =
- NLA_POLICY_NESTED(mptcp_pm_addr_policy),
- [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, },
- [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
- [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
- [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, },
- [MPTCP_PM_ATTR_ADDR_REMOTE] =
- NLA_POLICY_NESTED(mptcp_pm_addr_policy),
-};
-
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
{
struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk);
@@ -1188,7 +1170,7 @@ static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[],
/* no validation needed - was already done via nested policy */
err = nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr,
- mptcp_pm_addr_policy, info->extack);
+ mptcp_pm_address_nl_policy, info->extack);
if (err)
return err;
@@ -1303,9 +1285,9 @@ next:
return 0;
}
-static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
struct mptcp_pm_addr_entry addr, *entry;
int ret;
@@ -1484,9 +1466,9 @@ next:
return 0;
}
-static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
struct mptcp_pm_addr_entry addr, *entry;
unsigned int addr_max;
@@ -1538,8 +1520,9 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
struct mptcp_pm_addr_entry *entry;
list_for_each_entry(entry, rm_list, list) {
- remove_anno_list_by_saddr(msk, &entry->addr);
- if (alist.nr < MPTCP_RM_IDS_MAX)
+ if ((remove_anno_list_by_saddr(msk, &entry->addr) ||
+ lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) &&
+ alist.nr < MPTCP_RM_IDS_MAX)
alist.ids[alist.nr++] = entry->addr.id;
}
@@ -1619,7 +1602,7 @@ static void __reset_counters(struct pm_nl_pernet *pernet)
pernet->addrs = 0;
}
-static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info)
{
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
LIST_HEAD(free_list);
@@ -1675,9 +1658,9 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int mptcp_nl_cmd_get_addr(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
struct mptcp_pm_addr_entry addr, *entry;
struct sk_buff *msg;
@@ -1725,8 +1708,8 @@ fail:
return ret;
}
-static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
- struct netlink_callback *cb)
+int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
{
struct net *net = sock_net(msg->sk);
struct mptcp_pm_addr_entry *entry;
@@ -1783,8 +1766,7 @@ static int parse_limit(struct genl_info *info, int id, unsigned int *limit)
return 0;
}
-static int
-mptcp_nl_cmd_set_limits(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info)
{
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
unsigned int rcv_addrs, subflows;
@@ -1809,8 +1791,7 @@ unlock:
return ret;
}
-static int
-mptcp_nl_cmd_get_limits(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info)
{
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
struct sk_buff *msg;
@@ -1919,7 +1900,7 @@ int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8
return 0;
}
-static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info)
{
struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, };
struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, };
@@ -2283,72 +2264,13 @@ nla_put_failure:
nlmsg_free(skb);
}
-static const struct genl_small_ops mptcp_pm_ops[] = {
- {
- .cmd = MPTCP_PM_CMD_ADD_ADDR,
- .doit = mptcp_nl_cmd_add_addr,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_DEL_ADDR,
- .doit = mptcp_nl_cmd_del_addr,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_FLUSH_ADDRS,
- .doit = mptcp_nl_cmd_flush_addrs,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_GET_ADDR,
- .doit = mptcp_nl_cmd_get_addr,
- .dumpit = mptcp_nl_cmd_dump_addrs,
- },
- {
- .cmd = MPTCP_PM_CMD_SET_LIMITS,
- .doit = mptcp_nl_cmd_set_limits,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_GET_LIMITS,
- .doit = mptcp_nl_cmd_get_limits,
- },
- {
- .cmd = MPTCP_PM_CMD_SET_FLAGS,
- .doit = mptcp_nl_cmd_set_flags,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_ANNOUNCE,
- .doit = mptcp_nl_cmd_announce,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_REMOVE,
- .doit = mptcp_nl_cmd_remove,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE,
- .doit = mptcp_nl_cmd_sf_create,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY,
- .doit = mptcp_nl_cmd_sf_destroy,
- .flags = GENL_UNS_ADMIN_PERM,
- },
-};
-
static struct genl_family mptcp_genl_family __ro_after_init = {
.name = MPTCP_PM_NAME,
.version = MPTCP_PM_VER,
- .maxattr = MPTCP_PM_ATTR_MAX,
- .policy = mptcp_pm_policy,
.netnsok = true,
.module = THIS_MODULE,
- .small_ops = mptcp_pm_ops,
- .n_small_ops = ARRAY_SIZE(mptcp_pm_ops),
+ .ops = mptcp_pm_nl_ops,
+ .n_ops = ARRAY_SIZE(mptcp_pm_nl_ops),
.resv_start_op = MPTCP_PM_CMD_SUBFLOW_DESTROY + 1,
.mcgrps = mptcp_pm_mcgrps,
.n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps),
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index b5a8aa4c1ebd..efecbe3cf415 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -145,13 +145,14 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry);
}
-int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct mptcp_pm_addr_entry addr_val;
struct mptcp_sock *msk;
int err = -EINVAL;
+ struct sock *sk;
u32 token_val;
if (!addr || !token) {
@@ -167,6 +168,8 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
return err;
}
+ sk = (struct sock *)msk;
+
if (!mptcp_pm_is_userspace(msk)) {
GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
goto announce_err;
@@ -190,7 +193,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
goto announce_err;
}
- lock_sock((struct sock *)msk);
+ lock_sock(sk);
spin_lock_bh(&msk->pm.lock);
if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) {
@@ -200,15 +203,49 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
}
spin_unlock_bh(&msk->pm.lock);
- release_sock((struct sock *)msk);
+ release_sock(sk);
err = 0;
announce_err:
- sock_put((struct sock *)msk);
+ sock_put(sk);
return err;
}
-int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info)
+static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk,
+ struct genl_info *info)
+{
+ struct mptcp_rm_list list = { .nr = 0 };
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ bool has_id_0 = false;
+ int err = -EINVAL;
+
+ lock_sock(sk);
+ mptcp_for_each_subflow(msk, subflow) {
+ if (subflow->local_id == 0) {
+ has_id_0 = true;
+ break;
+ }
+ }
+ if (!has_id_0) {
+ GENL_SET_ERR_MSG(info, "address with id 0 not found");
+ goto remove_err;
+ }
+
+ list.ids[list.nr++] = 0;
+
+ spin_lock_bh(&msk->pm.lock);
+ mptcp_pm_remove_addr(msk, &list);
+ spin_unlock_bh(&msk->pm.lock);
+
+ err = 0;
+
+remove_err:
+ release_sock(sk);
+ return err;
+}
+
+int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID];
@@ -217,6 +254,7 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info)
struct mptcp_sock *msk;
LIST_HEAD(free_list);
int err = -EINVAL;
+ struct sock *sk;
u32 token_val;
u8 id_val;
@@ -234,12 +272,19 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info)
return err;
}
+ sk = (struct sock *)msk;
+
if (!mptcp_pm_is_userspace(msk)) {
GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
- goto remove_err;
+ goto out;
}
- lock_sock((struct sock *)msk);
+ if (id_val == 0) {
+ err = mptcp_userspace_pm_remove_id_zero_address(msk, info);
+ goto out;
+ }
+
+ lock_sock(sk);
list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
if (entry->addr.id == id_val) {
@@ -250,27 +295,27 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info)
if (!match) {
GENL_SET_ERR_MSG(info, "address with specified id not found");
- release_sock((struct sock *)msk);
- goto remove_err;
+ release_sock(sk);
+ goto out;
}
list_move(&match->list, &free_list);
mptcp_pm_remove_addrs(msk, &free_list);
- release_sock((struct sock *)msk);
+ release_sock(sk);
list_for_each_entry_safe(match, entry, &free_list, list) {
- sock_kfree_s((struct sock *)msk, match, sizeof(*match));
+ sock_kfree_s(sk, match, sizeof(*match));
}
err = 0;
- remove_err:
- sock_put((struct sock *)msk);
+out:
+ sock_put(sk);
return err;
}
-int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
@@ -296,6 +341,8 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
return err;
}
+ sk = (struct sock *)msk;
+
if (!mptcp_pm_is_userspace(msk)) {
GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
goto create_err;
@@ -307,20 +354,12 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
- if (addr_l.id == 0) {
- NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local addr id");
- err = -EINVAL;
- goto create_err;
- }
-
err = mptcp_pm_parse_addr(raddr, info, &addr_r);
if (err < 0) {
NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr");
goto create_err;
}
- sk = (struct sock *)msk;
-
if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) {
GENL_SET_ERR_MSG(info, "families mismatch");
err = -EINVAL;
@@ -348,7 +387,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
spin_unlock_bh(&msk->pm.lock);
create_err:
- sock_put((struct sock *)msk);
+ sock_put(sk);
return err;
}
@@ -400,7 +439,7 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk,
return NULL;
}
-int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
@@ -425,6 +464,8 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
return err;
}
+ sk = (struct sock *)msk;
+
if (!mptcp_pm_is_userspace(msk)) {
GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
goto destroy_err;
@@ -454,7 +495,6 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
goto destroy_err;
}
- sk = (struct sock *)msk;
lock_sock(sk);
ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r);
if (ssk) {
@@ -474,7 +514,7 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
release_sock(sk);
destroy_err:
- sock_put((struct sock *)msk);
+ sock_put(sk);
return err;
}
@@ -484,6 +524,7 @@ int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
{
struct mptcp_sock *msk;
int ret = -EINVAL;
+ struct sock *sk;
u32 token_val;
token_val = nla_get_u32(token);
@@ -492,6 +533,8 @@ int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
if (!msk)
return ret;
+ sk = (struct sock *)msk;
+
if (!mptcp_pm_is_userspace(msk))
goto set_flags_err;
@@ -499,11 +542,11 @@ int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
rem->addr.family == AF_UNSPEC)
goto set_flags_err;
- lock_sock((struct sock *)msk);
+ lock_sock(sk);
ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc->addr, &rem->addr, bkup);
- release_sock((struct sock *)msk);
+ release_sock(sk);
set_flags_err:
- sock_put((struct sock *)msk);
+ sock_put(sk);
return ret;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 933b257eee02..028e8b473626 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -55,28 +55,14 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
return READ_ONCE(msk->wnd_end);
}
-static bool mptcp_is_tcpsk(struct sock *sk)
+static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk)
{
- struct socket *sock = sk->sk_socket;
-
- if (unlikely(sk->sk_prot == &tcp_prot)) {
- /* we are being invoked after mptcp_accept() has
- * accepted a non-mp-capable flow: sk is a tcp_sk,
- * not an mptcp one.
- *
- * Hand the socket over to tcp so all further socket ops
- * bypass mptcp.
- */
- WRITE_ONCE(sock->ops, &inet_stream_ops);
- return true;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- } else if (unlikely(sk->sk_prot == &tcpv6_prot)) {
- WRITE_ONCE(sock->ops, &inet6_stream_ops);
- return true;
+ if (sk->sk_prot == &tcpv6_prot)
+ return &inet6_stream_ops;
#endif
- }
-
- return false;
+ WARN_ON_ONCE(sk->sk_prot != &tcp_prot);
+ return &inet_stream_ops;
}
static int __mptcp_socket_create(struct mptcp_sock *msk)
@@ -121,8 +107,6 @@ struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk)
ret = __mptcp_socket_create(msk);
if (ret)
return ERR_PTR(ret);
-
- mptcp_sockopt_sync(msk, msk->first);
}
return msk->first;
@@ -134,9 +118,15 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
}
+static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size)
+{
+ WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc,
+ mptcp_sk(sk)->rmem_fwd_alloc + size);
+}
+
static void mptcp_rmem_charge(struct sock *sk, int size)
{
- mptcp_sk(sk)->rmem_fwd_alloc -= size;
+ mptcp_rmem_fwd_alloc_add(sk, -size);
}
static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
@@ -177,7 +167,7 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
{
amount >>= PAGE_SHIFT;
- mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT;
+ mptcp_rmem_charge(sk, amount << PAGE_SHIFT);
__sk_mem_reduce_allocated(sk, amount);
}
@@ -186,7 +176,7 @@ static void mptcp_rmem_uncharge(struct sock *sk, int size)
struct mptcp_sock *msk = mptcp_sk(sk);
int reclaimable;
- msk->rmem_fwd_alloc += size;
+ mptcp_rmem_fwd_alloc_add(sk, size);
reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
/* see sk_mem_uncharge() for the rationale behind the following schema */
@@ -341,7 +331,7 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV))
return false;
- msk->rmem_fwd_alloc += amount;
+ mptcp_rmem_fwd_alloc_add(sk, amount);
return true;
}
@@ -399,7 +389,7 @@ drop:
return false;
}
-static void mptcp_stop_timer(struct sock *sk)
+static void mptcp_stop_rtx_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -439,11 +429,11 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
switch (sk->sk_state) {
case TCP_FIN_WAIT1:
- inet_sk_state_store(sk, TCP_FIN_WAIT2);
+ mptcp_set_state(sk, TCP_FIN_WAIT2);
break;
case TCP_CLOSING:
case TCP_LAST_ACK:
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
break;
}
@@ -604,13 +594,13 @@ static bool mptcp_check_data_fin(struct sock *sk)
switch (sk->sk_state) {
case TCP_ESTABLISHED:
- inet_sk_state_store(sk, TCP_CLOSE_WAIT);
+ mptcp_set_state(sk, TCP_CLOSE_WAIT);
break;
case TCP_FIN_WAIT1:
- inet_sk_state_store(sk, TCP_CLOSING);
+ mptcp_set_state(sk, TCP_CLOSING);
break;
case TCP_FIN_WAIT2:
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
break;
default:
/* Other states not expected */
@@ -764,6 +754,46 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
return moved;
}
+static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
+{
+ int err = sock_error(ssk);
+ int ssk_state;
+
+ if (!err)
+ return false;
+
+ /* only propagate errors on fallen-back sockets or
+ * on MPC connect
+ */
+ if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk)))
+ return false;
+
+ /* We need to propagate only transition to CLOSE state.
+ * Orphaned socket will see such state change via
+ * subflow_sched_work_if_closed() and that path will properly
+ * destroy the msk as needed.
+ */
+ ssk_state = inet_sk_state_load(ssk);
+ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+ mptcp_set_state(sk, ssk_state);
+ WRITE_ONCE(sk->sk_err, -err);
+
+ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+ smp_wmb();
+ sk_error_report(sk);
+ return true;
+}
+
+void __mptcp_error_report(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_for_each_subflow(msk, subflow)
+ if (__mptcp_subflow_error_report(sk, mptcp_subflow_tcp_sock(subflow)))
+ break;
+}
+
/* In most cases we will be able to lock the mptcp socket. If its already
* owned, we need to defer to the work queue to avoid ABBA deadlock.
*/
@@ -817,9 +847,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
/* Wake-up the reader only for in-sequence data */
mptcp_data_lock(sk);
- if (move_skbs_to_msk(msk, ssk))
+ if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk))
sk->sk_data_ready(sk);
-
mptcp_data_unlock(sk);
}
@@ -846,6 +875,8 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
mptcp_sockopt_sync_locked(msk, ssk);
mptcp_subflow_joined(msk, ssk);
+ mptcp_stop_tout_timer(sk);
+ __mptcp_propagate_sndbuf(sk, ssk);
return true;
}
@@ -865,12 +896,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list
}
}
-static bool mptcp_timer_pending(struct sock *sk)
+static bool mptcp_rtx_timer_pending(struct sock *sk)
{
return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
}
-static void mptcp_reset_timer(struct sock *sk)
+static void mptcp_reset_rtx_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
unsigned long tout;
@@ -1004,10 +1035,10 @@ static void __mptcp_clean_una(struct sock *sk)
out:
if (snd_una == READ_ONCE(msk->snd_nxt) &&
snd_una == READ_ONCE(msk->write_seq)) {
- if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
- mptcp_stop_timer(sk);
+ if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
+ mptcp_stop_rtx_timer(sk);
} else {
- mptcp_reset_timer(sk);
+ mptcp_reset_rtx_timer(sk);
}
}
@@ -1032,15 +1063,16 @@ static void mptcp_enter_memory_pressure(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);
bool first = true;
- sk_stream_moderate_sndbuf(sk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
if (first)
tcp_enter_memory_pressure(ssk);
sk_stream_moderate_sndbuf(ssk);
+
first = false;
}
+ __mptcp_sync_sndbuf(sk);
}
/* ensure we get enough memory for the frag hdr, beyond some minimal amount of
@@ -1184,6 +1216,8 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk,
mptcp_do_fallback(ssk);
}
+#define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1))
+
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_data_frag *dfrag,
struct mptcp_sendmsg_info *info)
@@ -1210,6 +1244,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
return -EAGAIN;
/* compute send limit */
+ if (unlikely(ssk->sk_gso_max_size > MPTCP_MAX_GSO_SIZE))
+ ssk->sk_gso_max_size = MPTCP_MAX_GSO_SIZE;
info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
copy = info->size_goal;
@@ -1221,7 +1257,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
* queue management operation, to avoid breaking the ext <->
* SSN association set here
*/
- mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
+ mpext = mptcp_get_ext(skb);
if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
TCP_SKB_CB(skb)->eor = 1;
goto alloc_skb;
@@ -1243,7 +1279,7 @@ alloc_skb:
i = skb_shinfo(skb)->nr_frags;
reuse_skb = false;
- mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
+ mpext = mptcp_get_ext(skb);
}
/* Zero window and all data acked? Probe. */
@@ -1251,7 +1287,7 @@ alloc_skb:
if (copy == 0) {
u64 snd_una = READ_ONCE(msk->snd_una);
- if (snd_una != msk->snd_nxt) {
+ if (snd_una != msk->snd_nxt || tcp_write_queue_tail(ssk)) {
tcp_remove_empty_skb(ssk);
return 0;
}
@@ -1259,11 +1295,6 @@ alloc_skb:
zero_window_probe = true;
data_seq = snd_una - 1;
copy = 1;
-
- /* all mptcp-level data is acked, no skbs should be present into the
- * ssk write queue
- */
- WARN_ON_ONCE(reuse_skb);
}
copy = min_t(size_t, copy, info->limit - info->sent);
@@ -1292,7 +1323,6 @@ alloc_skb:
if (reuse_skb) {
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
mpext->data_len += copy;
- WARN_ON_ONCE(zero_window_probe);
goto out;
}
@@ -1580,8 +1610,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
mptcp_push_release(ssk, &info);
/* ensure the rtx timer is running */
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
if (do_check_data_fin)
mptcp_check_send_data_fin(sk);
}
@@ -1644,8 +1674,8 @@ out:
if (copied) {
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
if (msk->snd_data_fin_enable &&
msk->snd_nxt + 1 == msk->write_seq)
@@ -1720,6 +1750,18 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
return ret;
}
+static int do_copy_data_nocache(struct sock *sk, int copy,
+ struct iov_iter *from, char *to)
+{
+ if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
+ if (!copy_from_iter_full_nocache(to, copy, from))
+ return -EFAULT;
+ } else if (!copy_from_iter_full(to, copy, from)) {
+ return -EFAULT;
+ }
+ return 0;
+}
+
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1793,14 +1835,13 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!sk_wmem_schedule(sk, total_ts))
goto wait_for_memory;
- if (copy_page_from_iter(dfrag->page, offset, psize,
- &msg->msg_iter) != psize) {
- ret = -EFAULT;
+ ret = do_copy_data_nocache(sk, psize, &msg->msg_iter,
+ page_address(dfrag->page) + offset);
+ if (ret)
goto do_error;
- }
/* data successfully copied into the write queue */
- sk->sk_forward_alloc -= total_ts;
+ sk_forward_alloc_add(sk, -total_ts);
copied += psize;
dfrag->data_len += psize;
frag_truesize += psize;
@@ -1881,6 +1922,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
if (!(flags & MSG_PEEK)) {
MPTCP_SKB_CB(skb)->offset += count;
MPTCP_SKB_CB(skb)->map_seq += count;
+ msk->bytes_consumed += count;
}
break;
}
@@ -1891,6 +1933,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize);
__skb_unlink(skb, &msk->receive_queue);
__kfree_skb(skb);
+ msk->bytes_consumed += count;
}
if (copied >= len)
@@ -2214,7 +2257,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)
sock_put(sk);
}
-static void mptcp_timeout_timer(struct timer_list *t)
+static void mptcp_tout_timer(struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);
@@ -2271,9 +2314,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
if (__mptcp_check_fallback(msk))
return false;
- if (tcp_rtx_and_write_queues_empty(sk))
- return false;
-
/* the closing socket has some data untransmitted and/or unacked:
* some data in the mptcp rtx queue has not really xmitted yet.
* keep it simple and re-inject the whole mptcp level rtx queue
@@ -2307,6 +2347,26 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
#define MPTCP_CF_PUSH BIT(1)
#define MPTCP_CF_FASTCLOSE BIT(2)
+/* be sure to send a reset only if the caller asked for it, also
+ * clean completely the subflow status when the subflow reaches
+ * TCP_CLOSE state
+ */
+static void __mptcp_subflow_disconnect(struct sock *ssk,
+ struct mptcp_subflow_context *subflow,
+ unsigned int flags)
+{
+ if (((1 << ssk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+ (flags & MPTCP_CF_FASTCLOSE)) {
+ /* The MPTCP code never wait on the subflow sockets, TCP-level
+ * disconnect should never fail
+ */
+ WARN_ON_ONCE(tcp_disconnect(ssk, 0));
+ mptcp_subflow_ctx_reset(subflow);
+ } else {
+ tcp_shutdown(ssk, SEND_SHUTDOWN);
+ }
+}
+
/* subflow sockets can be either outgoing (connect) or incoming
* (accept).
*
@@ -2323,19 +2383,15 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
bool dispose_it, need_push = false;
/* If the first subflow moved to a close state before accept, e.g. due
- * to an incoming reset, mptcp either:
- * - if either the subflow or the msk are dead, destroy the context
- * (the subflow socket is deleted by inet_child_forget) and the msk
- * - otherwise do nothing at the moment and take action at accept and/or
- * listener shutdown - user-space must be able to accept() the closed
- * socket.
+ * to an incoming reset or listener shutdown, the subflow socket is
+ * already deleted by inet_child_forget() and the mptcp socket can't
+ * survive too.
*/
- if (msk->in_accept_queue && msk->first == ssk) {
- if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
- return;
-
+ if (msk->in_accept_queue && msk->first == ssk &&
+ (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) {
/* ensure later check in mptcp_worker() will dispose the msk */
sock_set_flag(sk, SOCK_DEAD);
+ mptcp_set_close_tout(sk, tcp_jiffies32 - (mptcp_close_timeout(sk) + 1));
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
mptcp_subflow_drop_ctx(ssk);
goto out_release;
@@ -2348,7 +2404,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) {
- /* be sure to force the tcp_disconnect() path,
+ /* be sure to force the tcp_close path
* to generate the egress reset
*/
ssk->sk_lingertime = 0;
@@ -2358,11 +2414,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
- /* The MPTCP code never wait on the subflow sockets, TCP-level
- * disconnect should never fail
- */
- WARN_ON_ONCE(tcp_disconnect(ssk, 0));
- mptcp_subflow_ctx_reset(subflow);
+ __mptcp_subflow_disconnect(ssk, subflow, flags);
release_sock(ssk);
goto out;
@@ -2386,6 +2438,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
}
out_release:
+ __mptcp_subflow_error_report(sk, ssk);
release_sock(ssk);
sock_put(ssk);
@@ -2394,8 +2447,25 @@ out_release:
WRITE_ONCE(msk->first, NULL);
out:
+ __mptcp_sync_sndbuf(sk);
if (need_push)
__mptcp_push_pending(sk, 0);
+
+ /* Catch every 'all subflows closed' scenario, including peers silently
+ * closing them, e.g. due to timeout.
+ * For established sockets, allow an additional timeout before closing,
+ * as the protocol can still create more subflows.
+ */
+ if (list_is_singular(&msk->conn_list) && msk->first &&
+ inet_sk_state_load(msk->first) == TCP_CLOSE) {
+ if (sk->sk_state != TCP_ESTABLISHED ||
+ msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
+ mptcp_set_state(sk, TCP_CLOSE);
+ mptcp_close_wake_up(sk);
+ } else {
+ mptcp_start_tout_timer(sk);
+ }
+ }
}
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2407,7 +2477,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
/* subflow aborted before reaching the fully_established status
* attempt the creation of the next subflow
*/
- mptcp_pm_subflow_check_next(mptcp_sk(sk), ssk, subflow);
+ mptcp_pm_subflow_check_next(mptcp_sk(sk), subflow);
__mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_PUSH);
}
@@ -2439,23 +2509,14 @@ static void __mptcp_close_subflow(struct sock *sk)
}
-static bool mptcp_should_close(const struct sock *sk)
+static bool mptcp_close_tout_expired(const struct sock *sk)
{
- s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
- struct mptcp_subflow_context *subflow;
-
- if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
- return true;
+ if (!inet_csk(sk)->icsk_mtup.probe_timestamp ||
+ sk->sk_state == TCP_CLOSE)
+ return false;
- /* if all subflows are in closed status don't bother with additional
- * timeout
- */
- mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
- if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) !=
- TCP_CLOSE)
- return false;
- }
- return true;
+ return time_after32(tcp_jiffies32,
+ inet_csk(sk)->icsk_mtup.probe_timestamp + mptcp_close_timeout(sk));
}
static void mptcp_check_fastclose(struct mptcp_sock *msk)
@@ -2494,7 +2555,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
WRITE_ONCE(sk->sk_err, ECONNRESET);
}
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
@@ -2582,27 +2643,28 @@ static void __mptcp_retrans(struct sock *sk)
reset_timer:
mptcp_check_and_set_pending(sk);
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
}
/* schedule the timeout timer for the relevant event: either close timeout
* or mp_fail timeout. The close timeout takes precedence on the mp_fail one
*/
-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout)
+void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
{
struct sock *sk = (struct sock *)msk;
unsigned long timeout, close_timeout;
- if (!fail_tout && !sock_flag(sk, SOCK_DEAD))
+ if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
return;
- close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN;
+ close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies +
+ mptcp_close_timeout(sk);
/* the close timeout takes precedence on the fail one, and here at least one of
* them is active
*/
- timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout;
+ timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout;
sk_reset_timer(sk, &sk->sk_timer, timeout);
}
@@ -2621,8 +2683,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
mptcp_subflow_reset(ssk);
WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
unlock_sock_fast(ssk, slow);
-
- mptcp_reset_timeout(msk, 0);
}
static void mptcp_do_fastclose(struct sock *sk)
@@ -2630,7 +2690,7 @@ static void mptcp_do_fastclose(struct sock *sk)
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
mptcp_for_each_subflow_safe(msk, subflow, tmp)
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
subflow, MPTCP_CF_FASTCLOSE);
@@ -2659,18 +2719,14 @@ static void mptcp_worker(struct work_struct *work)
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(sk);
- /* There is no point in keeping around an orphaned sk timedout or
- * closed, but we need the msk around to reply to incoming DATA_FIN,
- * even if it is orphaned and in FIN_WAIT2 state
- */
- if (sock_flag(sk, SOCK_DEAD)) {
- if (mptcp_should_close(sk))
- mptcp_do_fastclose(sk);
+ if (mptcp_close_tout_expired(sk)) {
+ mptcp_do_fastclose(sk);
+ mptcp_close_wake_up(sk);
+ }
- if (sk->sk_state == TCP_CLOSE) {
- __mptcp_destroy_sock(sk);
- goto unlock;
- }
+ if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) {
+ __mptcp_destroy_sock(sk);
+ goto unlock;
}
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
@@ -2699,6 +2755,7 @@ static void __mptcp_init_sock(struct sock *sk)
msk->rmem_fwd_alloc = 0;
WRITE_ONCE(msk->rmem_released, 0);
msk->timer_ival = TCP_RTO_MIN;
+ msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
WRITE_ONCE(msk->first, NULL);
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
@@ -2711,7 +2768,7 @@ static void __mptcp_init_sock(struct sock *sk)
/* re-use the csk retrans timer for MPTCP-level retrans */
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
- timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
+ timer_setup(&sk->sk_timer, mptcp_tout_timer, 0);
}
static void mptcp_ca_reset(struct sock *sk)
@@ -2802,8 +2859,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
} else {
pr_debug("Sending DATA_FIN on subflow %p", ssk);
tcp_send_ack(ssk);
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
}
break;
}
@@ -2811,6 +2868,24 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
release_sock(ssk);
}
+void mptcp_set_state(struct sock *sk, int state)
+{
+ int oldstate = sk->sk_state;
+
+ switch (state) {
+ case TCP_ESTABLISHED:
+ if (oldstate != TCP_ESTABLISHED)
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
+ break;
+
+ default:
+ if (oldstate == TCP_ESTABLISHED)
+ MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
+ }
+
+ inet_sk_state_store(sk, state);
+}
+
static const unsigned char new_state[16] = {
/* current state: new state: action: */
[0 /* (Invalid) */] = TCP_CLOSE,
@@ -2833,7 +2908,7 @@ static int mptcp_close_state(struct sock *sk)
int next = (int)new_state[sk->sk_state];
int ns = next & TCP_STATE_MASK;
- inet_sk_state_store(sk, ns);
+ mptcp_set_state(sk, ns);
return next & TCP_ACTION_FIN;
}
@@ -2886,7 +2961,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
might_sleep();
- mptcp_stop_timer(sk);
+ mptcp_stop_rtx_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
msk->pm.status = 0;
mptcp_release_sched(msk);
@@ -2908,16 +2983,9 @@ void __mptcp_unaccepted_force_close(struct sock *sk)
__mptcp_destroy_sock(sk);
}
-static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
+static __poll_t mptcp_check_readable(struct sock *sk)
{
- /* Concurrent splices from sk_receive_queue into receive_queue will
- * always show at least one non-empty queue when checked in this order.
- */
- if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) &&
- skb_queue_empty_lockless(&msk->receive_queue))
- return 0;
-
- return EPOLLIN | EPOLLRDNORM;
+ return mptcp_epollin_ready(sk) ? EPOLLIN | EPOLLRDNORM : 0;
}
static void mptcp_check_listen_stop(struct sock *sk)
@@ -2951,11 +3019,11 @@ bool __mptcp_close(struct sock *sk, long timeout)
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
mptcp_check_listen_stop(sk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
goto cleanup;
}
- if (mptcp_check_readable(msk) || timeout < 0) {
+ if (mptcp_data_avail(msk) || timeout < 0) {
/* If the msk has read data, or the caller explicitly ask it,
* do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
*/
@@ -2969,7 +3037,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
cleanup:
/* orphan all the subflows */
- inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk);
@@ -2995,7 +3062,7 @@ cleanup:
* state, let's not keep resources busy for no reasons
*/
if (subflows_alive == 0)
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
@@ -3006,7 +3073,7 @@ cleanup:
__mptcp_destroy_sock(sk);
do_cancel_work = true;
} else {
- mptcp_reset_timeout(msk, 0);
+ mptcp_start_tout_timer(sk);
}
return do_cancel_work;
@@ -3053,12 +3120,6 @@ static int mptcp_disconnect(struct sock *sk, int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- /* Deny disconnect if other threads are blocked in sk_wait_event()
- * or inet_wait_for_connect().
- */
- if (sk->sk_wait_pending)
- return -EBUSY;
-
/* We are on the fastopen error path. We can't call straight into the
* subflows cleanup code due to lock nesting (we are already under
* msk->firstsocket lock).
@@ -3067,10 +3128,10 @@ static int mptcp_disconnect(struct sock *sk, int flags)
return -EBUSY;
mptcp_check_listen_stop(sk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
- mptcp_stop_timer(sk);
- sk_stop_timer(sk, &sk->sk_timer);
+ mptcp_stop_rtx_timer(sk);
+ mptcp_stop_tout_timer(sk);
if (msk->token)
mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
@@ -3089,6 +3150,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
msk->snd_data_fin_enable = false;
msk->rcv_fastclose = false;
msk->use_64bit_ack = false;
+ msk->bytes_consumed = 0;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_reset(msk);
mptcp_ca_reset(sk);
@@ -3128,7 +3190,6 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
#endif
- nsk->sk_wait_pending = 0;
__mptcp_init_sock(nsk);
msk = mptcp_sk(nsk);
@@ -3155,7 +3216,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
/* this can't race with mptcp_close(), as the msk is
* not yet exposted to user-space
*/
- inet_sk_state_store(nsk, TCP_ESTABLISHED);
+ mptcp_set_state(nsk, TCP_ESTABLISHED);
/* The msk maintain a ref to each subflow in the connections list */
WRITE_ONCE(msk->first, ssk);
@@ -3171,7 +3232,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
* uses the correct data
*/
mptcp_copy_inaddrs(nsk, ssk);
- mptcp_propagate_sndbuf(nsk, ssk);
+ __mptcp_propagate_sndbuf(nsk, ssk);
mptcp_rcv_space_init(msk, ssk);
bh_unlock_sock(nsk);
@@ -3198,44 +3259,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
}
-static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
- bool kern)
-{
- struct sock *newsk;
-
- pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
- newsk = inet_csk_accept(ssk, flags, err, kern);
- if (!newsk)
- return NULL;
-
- pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
- if (sk_is_mptcp(newsk)) {
- struct mptcp_subflow_context *subflow;
- struct sock *new_mptcp_sock;
-
- subflow = mptcp_subflow_ctx(newsk);
- new_mptcp_sock = subflow->conn;
-
- /* is_mptcp should be false if subflow->conn is missing, see
- * subflow_syn_recv_sock()
- */
- if (WARN_ON_ONCE(!new_mptcp_sock)) {
- tcp_sk(newsk)->is_mptcp = 0;
- goto out;
- }
-
- newsk = new_mptcp_sock;
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
- } else {
- MPTCP_INC_STATS(sock_net(ssk),
- MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
- }
-
-out:
- newsk->sk_kern_sock = kern;
- return newsk;
-}
-
void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
{
struct mptcp_subflow_context *subflow, *tmp;
@@ -3257,8 +3280,8 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
/* move all the rx fwd alloc into the sk_mem_reclaim_final in
* inet_sock_destruct() will dispose it
*/
- sk->sk_forward_alloc += msk->rmem_fwd_alloc;
- msk->rmem_fwd_alloc = 0;
+ sk_forward_alloc_add(sk, msk->rmem_fwd_alloc);
+ WRITE_ONCE(msk->rmem_fwd_alloc, 0);
mptcp_token_destroy(msk);
mptcp_pm_free_anno_list(msk);
mptcp_free_local_addr_list(msk);
@@ -3342,13 +3365,16 @@ static void mptcp_release_cb(struct sock *sk)
if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
if (unlikely(msk->cb_flags)) {
- /* be sure to set the current sk state before tacking actions
- * depending on sk_state, that is processing MPTCP_ERROR_REPORT
+ /* be sure to sync the msk state before taking actions
+ * depending on sk_state (MPTCP_ERROR_REPORT)
+ * On sk release avoid actions depending on the first subflow
*/
- if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
- __mptcp_set_connected(sk);
+ if (__test_and_clear_bit(MPTCP_SYNC_STATE, &msk->cb_flags) && msk->first)
+ __mptcp_sync_state(sk, msk->pending_state);
if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
__mptcp_error_report(sk);
+ if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags))
+ __mptcp_sync_sndbuf(sk);
}
__mptcp_update_rmem(sk);
@@ -3380,24 +3406,29 @@ static void schedule_3rdack_retransmission(struct sock *ssk)
sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout);
}
-void mptcp_subflow_process_delegated(struct sock *ssk)
+void mptcp_subflow_process_delegated(struct sock *ssk, long status)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = subflow->conn;
- if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
+ if (status & BIT(MPTCP_DELEGATE_SEND)) {
mptcp_data_lock(sk);
if (!sock_owned_by_user(sk))
__mptcp_subflow_push_pending(sk, ssk, true);
else
__set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
- mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
}
- if (test_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status)) {
- schedule_3rdack_retransmission(ssk);
- mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_ACK);
+ if (status & BIT(MPTCP_DELEGATE_SNDBUF)) {
+ mptcp_data_lock(sk);
+ if (!sock_owned_by_user(sk))
+ __mptcp_sync_sndbuf(sk);
+ else
+ __set_bit(MPTCP_SYNC_SNDBUF, &mptcp_sk(sk)->cb_flags);
+ mptcp_data_unlock(sk);
}
+ if (status & BIT(MPTCP_DELEGATE_ACK))
+ schedule_3rdack_retransmission(ssk);
}
static int mptcp_hash(struct sock *sk)
@@ -3480,6 +3511,7 @@ bool mptcp_finish_join(struct sock *ssk)
/* active subflow, already present inside the conn_list */
if (!list_empty(&subflow->node)) {
mptcp_subflow_joined(msk, ssk);
+ mptcp_propagate_sndbuf(parent, ssk);
return true;
}
@@ -3522,7 +3554,8 @@ static void mptcp_shutdown(struct sock *sk, int how)
static int mptcp_forward_alloc_get(const struct sock *sk)
{
- return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
+ return READ_ONCE(sk->sk_forward_alloc) +
+ READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc);
}
static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
@@ -3604,7 +3637,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (IS_ERR(ssk))
return PTR_ERR(ssk);
- inet_sk_state_store(sk, TCP_SYN_SENT);
+ mptcp_set_state(sk, TCP_SYN_SENT);
subflow = mptcp_subflow_ctx(ssk);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
@@ -3654,7 +3687,7 @@ out:
if (unlikely(err)) {
/* avoid leaving a dangling token in an unconnected socket */
mptcp_token_destroy(msk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
return err;
}
@@ -3669,7 +3702,6 @@ static struct proto mptcp_prot = {
.connect = mptcp_connect,
.disconnect = mptcp_disconnect,
.close = mptcp_close,
- .accept = mptcp_accept,
.setsockopt = mptcp_setsockopt,
.getsockopt = mptcp_getsockopt,
.shutdown = mptcp_shutdown,
@@ -3744,13 +3776,13 @@ static int mptcp_listen(struct socket *sock, int backlog)
goto unlock;
}
- inet_sk_state_store(sk, TCP_LISTEN);
+ mptcp_set_state(sk, TCP_LISTEN);
sock_set_flag(sk, SOCK_RCU_FREE);
lock_sock(ssk);
err = __inet_listen_sk(ssk, backlog);
release_sock(ssk);
- inet_sk_state_store(sk, inet_sk_state_load(ssk));
+ mptcp_set_state(sk, inet_sk_state_load(ssk));
if (!err) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -3779,18 +3811,36 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk)
return -EINVAL;
- newsk = mptcp_accept(ssk, flags, &err, kern);
+ pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
+ newsk = inet_csk_accept(ssk, flags, &err, kern);
if (!newsk)
return err;
- lock_sock(newsk);
-
- __inet_accept(sock, newsock, newsk);
- if (!mptcp_is_tcpsk(newsock->sk)) {
- struct mptcp_sock *msk = mptcp_sk(newsk);
+ pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
+ if (sk_is_mptcp(newsk)) {
struct mptcp_subflow_context *subflow;
+ struct sock *new_mptcp_sock;
+
+ subflow = mptcp_subflow_ctx(newsk);
+ new_mptcp_sock = subflow->conn;
+
+ /* is_mptcp should be false if subflow->conn is missing, see
+ * subflow_syn_recv_sock()
+ */
+ if (WARN_ON_ONCE(!new_mptcp_sock)) {
+ tcp_sk(newsk)->is_mptcp = 0;
+ goto tcpfallback;
+ }
+
+ newsk = new_mptcp_sock;
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
+
+ newsk->sk_kern_sock = kern;
+ lock_sock(newsk);
+ __inet_accept(sock, newsock, newsk);
set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
+ msk = mptcp_sk(newsk);
msk->in_accept_queue = 0;
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
@@ -3810,8 +3860,23 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
__mptcp_close_ssk(newsk, msk->first,
mptcp_subflow_ctx(msk->first), 0);
if (unlikely(list_is_singular(&msk->conn_list)))
- inet_sk_state_store(newsk, TCP_CLOSE);
+ mptcp_set_state(newsk, TCP_CLOSE);
}
+ } else {
+ MPTCP_INC_STATS(sock_net(ssk),
+ MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
+tcpfallback:
+ newsk->sk_kern_sock = kern;
+ lock_sock(newsk);
+ __inet_accept(sock, newsock, newsk);
+ /* we are being invoked after accepting a non-mp-capable
+ * flow: sk is a tcp_sk, not an mptcp one.
+ *
+ * Hand the socket over to tcp so all further socket ops
+ * bypass mptcp.
+ */
+ WRITE_ONCE(newsock->sk->sk_socket->ops,
+ mptcp_fallback_tcp_ops(newsock->sk));
}
release_sock(newsk);
@@ -3863,7 +3928,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
- mask |= mptcp_check_readable(msk);
+ mask |= mptcp_check_readable(sk);
if (shutdown & SEND_SHUTDOWN)
mask |= EPOLLOUT | EPOLLWRNORM;
else
@@ -3901,6 +3966,7 @@ static const struct proto_ops mptcp_stream_ops = {
.sendmsg = inet_sendmsg,
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
+ .set_rcvlowat = mptcp_set_rcvlowat,
};
static struct inet_protosw mptcp_protosw = {
@@ -3922,14 +3988,17 @@ static int mptcp_napi_poll(struct napi_struct *napi, int budget)
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bh_lock_sock_nested(ssk);
- if (!sock_owned_by_user(ssk) &&
- mptcp_subflow_has_delegated_action(subflow))
- mptcp_subflow_process_delegated(ssk);
- /* ... elsewhere tcp_release_cb_override already processed
- * the action or will do at next release_sock().
- * In both case must dequeue the subflow here - on the same
- * CPU that scheduled it.
- */
+ if (!sock_owned_by_user(ssk)) {
+ mptcp_subflow_process_delegated(ssk, xchg(&subflow->delegated_status, 0));
+ } else {
+ /* tcp_release_cb_override already processed
+ * the action or will do at next release_sock().
+ * In both case must dequeue the subflow here - on the same
+ * CPU that scheduled it.
+ */
+ smp_wmb();
+ clear_bit(MPTCP_DELEGATE_SCHEDULED, &subflow->delegated_status);
+ }
bh_unlock_sock(ssk);
sock_put(ssk);
@@ -3999,6 +4068,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
#endif
+ .set_rcvlowat = mptcp_set_rcvlowat,
};
static struct proto mptcp_v6_prot;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 7254b3562575..3517f2d24a22 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -13,6 +13,8 @@
#include <uapi/linux/mptcp.h>
#include <net/genetlink.h>
+#include "mptcp_pm_gen.h"
+
#define MPTCP_SUPPORTED_VERSION 1
/* MPTCP option bits */
@@ -122,7 +124,8 @@
#define MPTCP_ERROR_REPORT 3
#define MPTCP_RETRANSMIT 4
#define MPTCP_FLUSH_JOIN_LIST 5
-#define MPTCP_CONNECTED 6
+#define MPTCP_SYNC_STATE 6
+#define MPTCP_SYNC_SNDBUF 7
struct mptcp_skb_cb {
u64 map_seq;
@@ -267,6 +270,7 @@ struct mptcp_sock {
atomic64_t rcv_wnd_sent;
u64 rcv_data_fin_seq;
u64 bytes_retrans;
+ u64 bytes_consumed;
int rmem_fwd_alloc;
int snd_burst;
int old_wspace;
@@ -292,6 +296,9 @@ struct mptcp_sock {
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
bool csum_enabled;
bool allow_infinite_fallback;
+ u8 pending_state; /* A subflow asked to set this sk_state,
+ * protected by the msk data lock
+ */
u8 mpc_endpoint_id;
u8 recvmsg_inq:1,
cork:1,
@@ -432,11 +439,6 @@ mptcp_subflow_rsk(const struct request_sock *rsk)
return (struct mptcp_subflow_request_sock *)rsk;
}
-enum mptcp_data_avail {
- MPTCP_SUBFLOW_NODATA,
- MPTCP_SUBFLOW_DATA_AVAIL,
-};
-
struct mptcp_delegated_action {
struct napi_struct napi;
struct list_head head;
@@ -444,9 +446,12 @@ struct mptcp_delegated_action {
DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
-#define MPTCP_DELEGATE_SEND 0
-#define MPTCP_DELEGATE_ACK 1
+#define MPTCP_DELEGATE_SCHEDULED 0
+#define MPTCP_DELEGATE_SEND 1
+#define MPTCP_DELEGATE_ACK 2
+#define MPTCP_DELEGATE_SNDBUF 3
+#define MPTCP_DELEGATE_ACTIONS_MASK (~BIT(MPTCP_DELEGATE_SCHEDULED))
/* MPTCP subflow context */
struct mptcp_subflow_context {
struct list_head node;/* conn_list of subflows */
@@ -490,7 +495,7 @@ struct mptcp_subflow_context {
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
__unused : 9;
- enum mptcp_data_avail data_avail;
+ bool data_avail;
bool scheduled;
u32 remote_nonce;
u64 thmac;
@@ -518,6 +523,9 @@ struct mptcp_subflow_context {
u32 setsockopt_seq;
u32 stale_rcv_tstamp;
+ int cached_sndbuf; /* sndbuf size when last synced with the msk sndbuf,
+ * protected by the msk socket lock
+ */
struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */
@@ -564,23 +572,24 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
}
-void mptcp_subflow_process_delegated(struct sock *ssk);
+void mptcp_subflow_process_delegated(struct sock *ssk, long actions);
static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
{
+ long old, set_bits = BIT(MPTCP_DELEGATE_SCHEDULED) | BIT(action);
struct mptcp_delegated_action *delegated;
bool schedule;
/* the caller held the subflow bh socket lock */
lockdep_assert_in_softirq();
- /* The implied barrier pairs with mptcp_subflow_delegated_done(), and
- * ensures the below list check sees list updates done prior to status
- * bit changes
+ /* The implied barrier pairs with tcp_release_cb_override()
+ * mptcp_napi_poll(), and ensures the below list check sees list
+ * updates done prior to delegated status bits changes
*/
- if (!test_and_set_bit(action, &subflow->delegated_status)) {
- /* still on delegated list from previous scheduling */
- if (!list_empty(&subflow->delegated_node))
+ old = set_mask_bits(&subflow->delegated_status, 0, set_bits);
+ if (!(old & BIT(MPTCP_DELEGATE_SCHEDULED))) {
+ if (WARN_ON_ONCE(!list_empty(&subflow->delegated_node)))
return;
delegated = this_cpu_ptr(&mptcp_delegated_actions);
@@ -605,25 +614,12 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
return ret;
}
-static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
-{
- return !!READ_ONCE(subflow->delegated_status);
-}
-
-static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow, int action)
-{
- /* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
- * touching the status bit
- */
- smp_wmb();
- clear_bit(action, &subflow->delegated_status);
-}
-
int mptcp_is_enabled(const struct net *net);
unsigned int mptcp_get_add_addr_timeout(const struct net *net);
int mptcp_is_checksum_enabled(const struct net *net);
int mptcp_allow_join_id0(const struct net *net);
unsigned int mptcp_stale_loss_cnt(const struct net *net);
+unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
@@ -645,6 +641,7 @@ bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
void __mptcp_unaccepted_force_close(struct sock *sk);
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
+void mptcp_set_state(struct sock *sk, int state);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port);
@@ -672,6 +669,24 @@ struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk);
int mptcp_sched_get_send(struct mptcp_sock *msk);
int mptcp_sched_get_retrans(struct mptcp_sock *msk);
+static inline u64 mptcp_data_avail(const struct mptcp_sock *msk)
+{
+ return READ_ONCE(msk->bytes_received) - READ_ONCE(msk->bytes_consumed);
+}
+
+static inline bool mptcp_epollin_ready(const struct sock *sk)
+{
+ /* mptcp doesn't have to deal with small skbs in the receive queue,
+ * at it can always coalesce them
+ */
+ return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) ||
+ (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+ mem_cgroup_under_socket_pressure(sk->sk_memcg)) ||
+ READ_ONCE(tcp_memory_pressure);
+}
+
+int mptcp_set_rcvlowat(struct sock *sk, int val);
+
static inline bool __tcp_can_send(const struct sock *ssk)
{
/* only send if our side has not closed yet */
@@ -717,13 +732,36 @@ void mptcp_get_options(const struct sk_buff *skb,
struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
-void __mptcp_set_connected(struct sock *sk);
-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout);
+void __mptcp_sync_state(struct sock *sk, int state);
+void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout);
+
+static inline void mptcp_stop_tout_timer(struct sock *sk)
+{
+ if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
+ return;
+
+ sk_stop_timer(sk, &sk->sk_timer);
+ inet_csk(sk)->icsk_mtup.probe_timestamp = 0;
+}
+
+static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout)
+{
+ /* avoid 0 timestamp, as that means no close timeout */
+ inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1;
+}
+
+static inline void mptcp_start_tout_timer(struct sock *sk)
+{
+ mptcp_set_close_tout(sk, tcp_jiffies32);
+ mptcp_reset_tout_timer(mptcp_sk(sk), 0);
+}
+
static inline bool mptcp_is_fully_established(struct sock *sk)
{
return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
READ_ONCE(mptcp_sk(sk)->fully_established);
}
+
void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
bool mptcp_finish_join(struct sock *sk);
@@ -751,13 +789,52 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
}
-static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
+static inline void __mptcp_sync_sndbuf(struct sock *sk)
{
- if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf))
- return false;
+ struct mptcp_subflow_context *subflow;
+ int ssk_sndbuf, new_sndbuf;
+
+ if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
+ return;
+
+ new_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[0];
+ mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
+ ssk_sndbuf = READ_ONCE(mptcp_subflow_tcp_sock(subflow)->sk_sndbuf);
- WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf);
- return true;
+ subflow->cached_sndbuf = ssk_sndbuf;
+ new_sndbuf += ssk_sndbuf;
+ }
+
+ /* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */
+ WRITE_ONCE(sk->sk_sndbuf, new_sndbuf);
+}
+
+/* The called held both the msk socket and the subflow socket locks,
+ * possibly under BH
+ */
+static inline void __mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+
+ if (READ_ONCE(ssk->sk_sndbuf) != subflow->cached_sndbuf)
+ __mptcp_sync_sndbuf(sk);
+}
+
+/* the caller held only the subflow socket lock, either in process or
+ * BH context. Additionally this can be called under the msk data lock,
+ * so we can't acquire such lock here: let the delegate action acquires
+ * the needed locks in suitable order.
+ */
+static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+
+ if (likely(READ_ONCE(ssk->sk_sndbuf) == subflow->cached_sndbuf))
+ return;
+
+ local_bh_disable();
+ mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_SNDBUF);
+ local_bh_enable();
}
static inline void mptcp_write_space(struct sock *sk)
@@ -815,7 +892,7 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk);
bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk);
-void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
+void mptcp_pm_subflow_check_next(struct mptcp_sock *msk,
const struct mptcp_subflow_context *subflow);
void mptcp_pm_add_addr_received(const struct sock *ssk,
const struct mptcp_addr_info *addr);
@@ -866,10 +943,6 @@ void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
struct list_head *rm_list);
void mptcp_free_local_addr_list(struct mptcp_sock *msk);
-int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info);
-int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info);
-int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info);
-int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info);
void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
const struct sock *ssk, gfp_t gfp);
@@ -996,13 +1069,22 @@ static inline bool mptcp_check_fallback(const struct sock *sk)
static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
{
- if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) {
+ if (__mptcp_check_fallback(msk)) {
pr_debug("TCP fallback already done (msk=%p)", msk);
return;
}
set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
}
+static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
+{
+ struct sock *ssk = READ_ONCE(msk->first);
+
+ return ssk && ((1 << inet_sk_state_load(ssk)) &
+ (TCPF_ESTABLISHED | TCPF_SYN_SENT |
+ TCPF_SYN_RECV | TCPF_LISTEN));
+}
+
static inline void mptcp_do_fallback(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -1046,7 +1128,7 @@ static inline bool subflow_simultaneous_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- return sk->sk_state == TCP_ESTABLISHED &&
+ return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) &&
is_active_ssk(subflow) &&
!subflow->conn_finished;
}
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 8260202c0066..c40f1428e602 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -89,12 +89,13 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val);
break;
case SO_PRIORITY:
- ssk->sk_priority = val;
+ WRITE_ONCE(ssk->sk_priority, val);
break;
case SO_SNDBUF:
case SO_SNDBUFFORCE:
ssk->sk_userlocks |= SOCK_SNDBUF_LOCK;
WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
+ mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf;
break;
case SO_RCVBUF:
case SO_RCVBUFFORCE:
@@ -439,6 +440,8 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* should work fine */
case IP_FREEBIND:
case IP_TRANSPARENT:
+ case IP_BIND_ADDRESS_NO_PORT:
+ case IP_LOCAL_PORT_RANGE:
/* the following are control cmsg related */
case IP_PKTINFO:
@@ -454,7 +457,6 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* common stuff that need some love */
case IP_TOS:
case IP_TTL:
- case IP_BIND_ADDRESS_NO_PORT:
case IP_MTU_DISCOVER:
case IP_RECVERR:
@@ -682,8 +684,8 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op
return 0;
}
-static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname,
- sockptr_t optval, unsigned int optlen)
+static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
struct sock *ssk;
@@ -709,6 +711,14 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
inet_assign_bit(TRANSPARENT, ssk,
inet_test_bit(TRANSPARENT, sk));
break;
+ case IP_BIND_ADDRESS_NO_PORT:
+ inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk,
+ inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
+ break;
+ case IP_LOCAL_PORT_RANGE:
+ WRITE_ONCE(inet_sk(ssk)->local_port_range,
+ READ_ONCE(inet_sk(sk)->local_port_range));
+ break;
default:
release_sock(sk);
WARN_ON_ONCE(1);
@@ -734,11 +744,14 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
lock_sock(sk);
sockopt_seq_inc(msk);
- val = inet_sk(sk)->tos;
+ val = READ_ONCE(inet_sk(sk)->tos);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow;
+ slow = lock_sock_fast(ssk);
__ip_sock_set_tos(ssk, val);
+ unlock_sock_fast(ssk, slow);
}
release_sock(sk);
@@ -751,7 +764,9 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
switch (optname) {
case IP_FREEBIND:
case IP_TRANSPARENT:
- return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen);
+ case IP_BIND_ADDRESS_NO_PORT:
+ case IP_LOCAL_PORT_RANGE:
+ return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen);
case IP_TOS:
return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
}
@@ -915,7 +930,7 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
mptcp_pm_get_local_addr_max(msk);
}
- if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags))
+ if (__mptcp_check_fallback(msk))
flags |= MPTCP_INFO_FLAG_FALLBACK;
if (READ_ONCE(msk->can_ack))
flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
@@ -934,6 +949,8 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_bytes_sent = msk->bytes_sent;
info->mptcpi_bytes_received = msk->bytes_received;
info->mptcpi_bytes_retrans = msk->bytes_retrans;
+ info->mptcpi_subflows_total = info->mptcpi_subflows +
+ __mptcp_has_initial_subflow(msk);
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
@@ -1343,7 +1360,13 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
switch (optname) {
case IP_TOS:
- return mptcp_put_int_option(msk, optval, optlen, inet_sk(sk)->tos);
+ return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos));
+ case IP_BIND_ADDRESS_NO_PORT:
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
+ case IP_LOCAL_PORT_RANGE:
+ return mptcp_put_int_option(msk, optval, optlen,
+ READ_ONCE(inet_sk(sk)->local_port_range));
}
return -EOPNOTSUPP;
@@ -1415,8 +1438,10 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
if (sk->sk_userlocks & tx_rx_locks) {
ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;
- if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
+ if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) {
WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
+ mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf;
+ }
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
}
@@ -1442,39 +1467,67 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
+ inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
+ WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range));
}
-static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
-{
- bool slow = lock_sock_fast(ssk);
-
- sync_socket_options(msk, ssk);
-
- unlock_sock_fast(ssk, slow);
-}
-
-void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
+void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
msk_owned_by_me(msk);
+ ssk->sk_rcvlowat = 0;
+
+ /* subflows must ignore any latency-related settings: will not affect
+ * the user-space - only the msk is relevant - but will foul the
+ * mptcp scheduler
+ */
+ tcp_sk(ssk)->notsent_lowat = UINT_MAX;
+
if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
- __mptcp_sockopt_sync(msk, ssk);
+ sync_socket_options(msk, ssk);
subflow->setsockopt_seq = msk->setsockopt_seq;
}
}
-void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
+/* unfortunately this is different enough from the tcp version so
+ * that we can't factor it out
+ */
+int mptcp_set_rcvlowat(struct sock *sk, int val)
{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct mptcp_subflow_context *subflow;
+ int space, cap;
- msk_owned_by_me(msk);
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+ cap = sk->sk_rcvbuf >> 1;
+ else
+ cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
+ val = min(val, cap);
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
- if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
- sync_socket_options(msk, ssk);
+ /* Check if we need to signal EPOLLIN right now */
+ if (mptcp_epollin_ready(sk))
+ sk->sk_data_ready(sk);
- subflow->setsockopt_seq = msk->setsockopt_seq;
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+ return 0;
+
+ space = __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, val);
+ if (space <= sk->sk_rcvbuf)
+ return 0;
+
+ /* propagate the rcvbuf changes to all the subflows */
+ WRITE_ONCE(sk->sk_rcvbuf, space);
+ mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow;
+
+ slow = lock_sock_fast(ssk);
+ WRITE_ONCE(ssk->sk_rcvbuf, space);
+ tcp_sk(ssk)->window_clamp = val;
+ unlock_sock_fast(ssk, slow);
}
+ return 0;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9bf3c7bc1762..0dcb721c89d1 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -9,8 +9,8 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netdevice.h>
-#include <crypto/algapi.h>
#include <crypto/sha2.h>
+#include <crypto/utils.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@@ -157,8 +157,8 @@ static int subflow_check_req(struct request_sock *req,
mptcp_get_options(skb, &mp_opt);
- opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
- opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
+ opt_mp_capable = !!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYN);
+ opt_mp_join = !!(mp_opt.suboptions & OPTION_MPTCP_MPJ_SYN);
if (opt_mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
@@ -254,8 +254,8 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
subflow_init_req(req, sk_listener);
mptcp_get_options(skb, &mp_opt);
- opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
- opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
+ opt_mp_capable = !!(mp_opt.suboptions & OPTION_MPTCP_MPC_ACK);
+ opt_mp_join = !!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK);
if (opt_mp_capable && opt_mp_join)
return -EINVAL;
@@ -419,21 +419,28 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc
return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport;
}
-void __mptcp_set_connected(struct sock *sk)
+void __mptcp_sync_state(struct sock *sk, int state)
{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ __mptcp_propagate_sndbuf(sk, msk->first);
if (sk->sk_state == TCP_SYN_SENT) {
- inet_sk_state_store(sk, TCP_ESTABLISHED);
+ mptcp_set_state(sk, state);
sk->sk_state_change(sk);
}
}
-static void mptcp_set_connected(struct sock *sk)
+static void mptcp_propagate_state(struct sock *sk, struct sock *ssk)
{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
mptcp_data_lock(sk);
- if (!sock_owned_by_user(sk))
- __mptcp_set_connected(sk);
- else
- __set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->cb_flags);
+ if (!sock_owned_by_user(sk)) {
+ __mptcp_sync_state(sk, ssk->sk_state);
+ } else {
+ msk->pending_state = ssk->sk_state;
+ __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
+ }
mptcp_data_unlock(sk);
}
@@ -472,7 +479,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
return;
msk = mptcp_sk(parent);
- mptcp_propagate_sndbuf(parent, sk);
subflow->rel_write_seq = 1;
subflow->conn_finished = 1;
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
@@ -480,7 +486,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp) {
- if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
+ if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) {
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
mptcp_do_fallback(sk);
@@ -496,11 +502,11 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow_set_remote_key(msk, subflow, &mp_opt);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
- mptcp_set_connected(parent);
+ mptcp_propagate_state(parent, sk);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
- if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ)) {
+ if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_SYNACK)) {
subflow->reset_reason = MPTCP_RST_EMPTCP;
goto do_reset;
}
@@ -540,7 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
} else if (mptcp_check_fallback(sk)) {
fallback:
mptcp_rcv_space_init(msk, sk);
- mptcp_set_connected(parent);
+ mptcp_propagate_state(parent, sk);
}
return;
@@ -777,12 +783,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* options.
*/
mptcp_get_options(skb, &mp_opt);
- if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC))
+ if (!(mp_opt.suboptions &
+ (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_ACK)))
fallback = true;
} else if (subflow_req->mp_join) {
mptcp_get_options(skb, &mp_opt);
- if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) ||
+ if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK) ||
!subflow_hmac_valid(req, &mp_opt) ||
!mptcp_can_accept_new_subflow(subflow_req->msk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
@@ -1226,7 +1233,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
WRITE_ONCE(subflow->fail_tout, fail_tout);
tcp_send_ack(ssk);
- mptcp_reset_timeout(msk, subflow->fail_tout);
+ mptcp_reset_tout_timer(msk, subflow->fail_tout);
}
static bool subflow_check_data_avail(struct sock *ssk)
@@ -1237,7 +1244,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
struct sk_buff *skb;
if (!skb_peek(&ssk->sk_receive_queue))
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ WRITE_ONCE(subflow->data_avail, false);
if (subflow->data_avail)
return true;
@@ -1271,7 +1278,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
continue;
}
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ WRITE_ONCE(subflow->data_avail, true);
break;
}
return true;
@@ -1293,7 +1300,7 @@ fallback:
goto reset;
}
mptcp_subflow_fail(msk, ssk);
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ WRITE_ONCE(subflow->data_avail, true);
return true;
}
@@ -1310,7 +1317,7 @@ reset:
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ WRITE_ONCE(subflow->data_avail, false);
return false;
}
@@ -1322,7 +1329,7 @@ reset:
subflow->map_seq = READ_ONCE(msk->ack_seq);
subflow->map_data_len = skb->len;
subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ WRITE_ONCE(subflow->data_avail, true);
return true;
}
@@ -1334,7 +1341,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
if (subflow->map_valid &&
mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
subflow->map_valid = 0;
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ WRITE_ONCE(subflow->data_avail, false);
pr_debug("Done with mapping: seq=%u data_len=%u",
subflow->map_subflow_seq,
@@ -1362,42 +1369,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
*full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
-void __mptcp_error_report(struct sock *sk)
-{
- struct mptcp_subflow_context *subflow;
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- int err = sock_error(ssk);
- int ssk_state;
-
- if (!err)
- continue;
-
- /* only propagate errors on fallen-back sockets or
- * on MPC connect
- */
- if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
- continue;
-
- /* We need to propagate only transition to CLOSE state.
- * Orphaned socket will see such state change via
- * subflow_sched_work_if_closed() and that path will properly
- * destroy the msk as needed.
- */
- ssk_state = inet_sk_state_load(ssk);
- if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
- inet_sk_state_store(sk, ssk_state);
- WRITE_ONCE(sk->sk_err, -err);
-
- /* This barrier is coupled with smp_rmb() in mptcp_poll() */
- smp_wmb();
- sk_error_report(sk);
- break;
- }
-}
-
static void subflow_error_report(struct sock *ssk)
{
struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
@@ -1441,10 +1412,18 @@ static void subflow_data_ready(struct sock *sk)
WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
!subflow->mp_join && !(state & TCPF_CLOSE));
- if (mptcp_subflow_data_available(sk))
+ if (mptcp_subflow_data_available(sk)) {
mptcp_data_ready(parent, sk);
- else if (unlikely(sk->sk_err))
+
+ /* subflow-level lowat test are not relevant.
+ * respect the msk-level threshold eventually mandating an immediate ack
+ */
+ if (mptcp_data_avail(msk) < parent->sk_rcvlowat &&
+ (tcp_sk(sk)->rcv_nxt - tcp_sk(sk)->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss)
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+ } else if (unlikely(sk->sk_err)) {
subflow_error_report(sk);
+ }
}
static void subflow_write_space(struct sock *ssk)
@@ -1561,8 +1540,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (addr.ss_family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
- mptcp_sockopt_sync(msk, ssk);
-
ssk->sk_bound_dev_if = ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
if (err)
@@ -1588,6 +1565,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
WRITE_ONCE(msk->allow_infinite_fallback, false);
+ mptcp_stop_tout_timer(sk);
return 0;
failed_unlink:
@@ -1672,7 +1650,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
err = security_mptcp_add_subflow(sk, sf->sk);
if (err)
- goto release_ssk;
+ goto err_free;
/* the newly created socket has to be in the same cgroup as its parent */
mptcp_attach_cgroup(sk, sf->sk);
@@ -1686,15 +1664,12 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL);
sock_inuse_add(net, 1);
err = tcp_set_ulp(sf->sk, "mptcp");
+ if (err)
+ goto err_free;
-release_ssk:
+ mptcp_sockopt_sync_locked(mptcp_sk(sk), sf->sk);
release_sock(sf->sk);
- if (err) {
- sock_release(sf);
- return err;
- }
-
/* the newly created socket really belongs to the owning MPTCP master
* socket, even if for additional subflows the allocation is performed
* by a kernel workqueue. Adjust inode references, so that the
@@ -1714,6 +1689,11 @@ release_ssk:
mptcp_subflow_ops_override(sf->sk);
return 0;
+
+err_free:
+ release_sock(sf->sk);
+ sock_release(sf);
+ return err;
}
static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
@@ -1763,12 +1743,11 @@ static void subflow_state_change(struct sock *sk)
msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
- mptcp_propagate_sndbuf(parent, sk);
mptcp_do_fallback(sk);
mptcp_rcv_space_init(msk, sk);
pr_fallback(msk);
subflow->conn_finished = 1;
- mptcp_set_connected(parent);
+ mptcp_propagate_state(parent, sk);
}
/* as recvmsg() does not acquire the subflow socket for ssk selection
@@ -1991,13 +1970,30 @@ static void subflow_ulp_clone(const struct request_sock *req,
static void tcp_release_cb_override(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ long status;
- if (mptcp_subflow_has_delegated_action(subflow))
- mptcp_subflow_process_delegated(ssk);
+ /* process and clear all the pending actions, but leave the subflow into
+ * the napi queue. To respect locking, only the same CPU that originated
+ * the action can touch the list. mptcp_napi_poll will take care of it.
+ */
+ status = set_mask_bits(&subflow->delegated_status, MPTCP_DELEGATE_ACTIONS_MASK, 0);
+ if (status)
+ mptcp_subflow_process_delegated(ssk, status);
tcp_release_cb(ssk);
}
+static int tcp_abort_override(struct sock *ssk, int err)
+{
+ /* closing a listener subflow requires a great deal of care.
+ * keep it simple and just prevent such operation
+ */
+ if (inet_sk_state_load(ssk) == TCP_LISTEN)
+ return -EINVAL;
+
+ return tcp_abort(ssk, err);
+}
+
static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
.name = "mptcp",
.owner = THIS_MODULE,
@@ -2042,6 +2038,7 @@ void __init mptcp_subflow_init(void)
tcp_prot_override = tcp_prot;
tcp_prot_override.release_cb = tcp_release_cb_override;
+ tcp_prot_override.diag_destroy = tcp_abort_override;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
/* In struct mptcp_subflow_request_sock, we assume the TCP request sock
@@ -2073,11 +2070,11 @@ void __init mptcp_subflow_init(void)
subflow_v6m_specific.send_check = ipv4_specific.send_check;
subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
- subflow_v6m_specific.net_frag_header_len = 0;
subflow_v6m_specific.rebuild_header = subflow_rebuild_header;
tcpv6_prot_override = tcpv6_prot;
tcpv6_prot_override.release_cb = tcp_release_cb_override;
+ tcpv6_prot_override.diag_destroy = tcp_abort_override;
#endif
mptcp_diag_subflow_init(&subflow_ulp_ops);
diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c
index 0758865ab658..bfff53e668da 100644
--- a/net/mptcp/token_test.c
+++ b/net/mptcp/token_test.c
@@ -143,3 +143,4 @@ static struct kunit_suite mptcp_token_suite = {
kunit_test_suite(mptcp_token_suite);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("KUnit tests for MPTCP Token");
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 03757e76bb6b..374412ed780b 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -105,8 +105,11 @@ enum {
struct ncsi_channel_version {
- u32 version; /* Supported BCD encoded NCSI version */
- u32 alpha2; /* Supported BCD encoded NCSI version */
+ u8 major; /* NCSI version major */
+ u8 minor; /* NCSI version minor */
+ u8 update; /* NCSI version update */
+ char alpha1; /* NCSI version alpha1 */
+ char alpha2; /* NCSI version alpha2 */
u8 fw_name[12]; /* Firmware name string */
u32 fw_version; /* Firmware version */
u16 pci_ids[4]; /* PCI identification */
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index fd2236ee9a79..b3ff37a181d7 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -270,7 +270,8 @@ static struct ncsi_cmd_handler {
{ NCSI_PKT_CMD_GPS, 0, ncsi_cmd_handler_default },
{ NCSI_PKT_CMD_OEM, -1, ncsi_cmd_handler_oem },
{ NCSI_PKT_CMD_PLDM, 0, NULL },
- { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default }
+ { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_GMCMA, 0, ncsi_cmd_handler_default }
};
static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index d9da942ad53d..745c788f1d1d 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -689,8 +689,6 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
return 0;
}
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)
-
static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca)
{
unsigned char data[NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN];
@@ -716,10 +714,6 @@ static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca)
return ret;
}
-#endif
-
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
-
/* NCSI OEM Command APIs */
static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca)
{
@@ -856,8 +850,6 @@ static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
return nch->handler(nca);
}
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
-
/* Determine if a given channel from the channel_queue should be used for Tx */
static bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp,
struct ncsi_channel *nc)
@@ -1039,20 +1031,23 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
goto error;
}
- nd->state = ncsi_dev_state_config_oem_gma;
+ nd->state = IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
+ ? ncsi_dev_state_config_oem_gma
+ : ncsi_dev_state_config_clear_vids;
break;
case ncsi_dev_state_config_oem_gma:
nd->state = ncsi_dev_state_config_clear_vids;
- ret = -1;
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
- nca.type = NCSI_PKT_CMD_OEM;
nca.package = np->id;
nca.channel = nc->id;
ndp->pending_req_num = 1;
- ret = ncsi_gma_handler(&nca, nc->version.mf_id);
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
-
+ if (nc->version.major >= 1 && nc->version.minor >= 2) {
+ nca.type = NCSI_PKT_CMD_GMCMA;
+ ret = ncsi_xmit_cmd(&nca);
+ } else {
+ nca.type = NCSI_PKT_CMD_OEM;
+ ret = ncsi_gma_handler(&nca, nc->version.mf_id);
+ }
if (ret < 0)
schedule_work(&ndp->work);
@@ -1404,7 +1399,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
schedule_work(&ndp->work);
break;
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
case ncsi_dev_state_probe_mlx_gma:
ndp->pending_req_num = 1;
@@ -1429,7 +1423,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_cis;
break;
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
case ncsi_dev_state_probe_cis:
ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
@@ -1447,7 +1440,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY))
nd->state = ncsi_dev_state_probe_keep_phy;
break;
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)
case ncsi_dev_state_probe_keep_phy:
ndp->pending_req_num = 1;
@@ -1460,7 +1452,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_gvi;
break;
-#endif /* CONFIG_NCSI_OEM_CMD_KEEP_PHY */
case ncsi_dev_state_probe_gvi:
case ncsi_dev_state_probe_gc:
case ncsi_dev_state_probe_gls:
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index a3a6753a1db7..2f872d064396 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -71,8 +71,8 @@ static int ncsi_write_channel_info(struct sk_buff *skb,
if (nc == nc->package->preferred_channel)
nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
- nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
- nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2);
+ nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.major);
+ nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.minor);
nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name);
vid_nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index ba66c7dc3a21..f2f3b5c1b941 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -197,9 +197,12 @@ struct ncsi_rsp_gls_pkt {
/* Get Version ID */
struct ncsi_rsp_gvi_pkt {
struct ncsi_rsp_pkt_hdr rsp; /* Response header */
- __be32 ncsi_version; /* NCSI version */
+ unsigned char major; /* NCSI version major */
+ unsigned char minor; /* NCSI version minor */
+ unsigned char update; /* NCSI version update */
+ unsigned char alpha1; /* NCSI version alpha1 */
unsigned char reserved[3]; /* Reserved */
- unsigned char alpha2; /* NCSI version */
+ unsigned char alpha2; /* NCSI version alpha2 */
unsigned char fw_name[12]; /* f/w name string */
__be32 fw_version; /* f/w version */
__be16 pci_ids[4]; /* PCI IDs */
@@ -335,6 +338,14 @@ struct ncsi_rsp_gpuuid_pkt {
__be32 checksum;
};
+/* Get MC MAC Address */
+struct ncsi_rsp_gmcma_pkt {
+ struct ncsi_rsp_pkt_hdr rsp;
+ unsigned char address_count;
+ unsigned char reserved[3];
+ unsigned char addresses[][ETH_ALEN];
+};
+
/* AEN: Link State Change */
struct ncsi_aen_lsc_pkt {
struct ncsi_aen_pkt_hdr aen; /* AEN header */
@@ -395,6 +406,7 @@ struct ncsi_aen_hncdsc_pkt {
#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */
#define NCSI_PKT_CMD_QPNPR 0x56 /* Query Pending NC PLDM request */
#define NCSI_PKT_CMD_SNPR 0x57 /* Send NC PLDM Reply */
+#define NCSI_PKT_CMD_GMCMA 0x58 /* Get MC MAC Address */
/* NCSI packet responses */
@@ -430,6 +442,7 @@ struct ncsi_aen_hncdsc_pkt {
#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80)
#define NCSI_PKT_RSP_QPNPR (NCSI_PKT_CMD_QPNPR + 0x80)
#define NCSI_PKT_RSP_SNPR (NCSI_PKT_CMD_SNPR + 0x80)
+#define NCSI_PKT_RSP_GMCMA (NCSI_PKT_CMD_GMCMA + 0x80)
/* NCSI response code/reason */
#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 069c2659074b..bee290d0f48b 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -19,6 +19,19 @@
#include "ncsi-pkt.h"
#include "ncsi-netlink.h"
+/* Nibbles within [0xA, 0xF] add zero "0" to the returned value.
+ * Optional fields (encoded as 0xFF) will default to zero.
+ */
+static u8 decode_bcd_u8(u8 x)
+{
+ int lo = x & 0xF;
+ int hi = x >> 4;
+
+ lo = lo < 0xA ? lo : 0;
+ hi = hi < 0xA ? hi : 0;
+ return lo + hi * 10;
+}
+
static int ncsi_validate_rsp_pkt(struct ncsi_request *nr,
unsigned short payload)
{
@@ -755,9 +768,18 @@ static int ncsi_rsp_handler_gvi(struct ncsi_request *nr)
if (!nc)
return -ENODEV;
- /* Update to channel's version info */
+ /* Update channel's version info
+ *
+ * Major, minor, and update fields are supposed to be
+ * unsigned integers encoded as packed BCD.
+ *
+ * Alpha1 and alpha2 are ISO/IEC 8859-1 characters.
+ */
ncv = &nc->version;
- ncv->version = ntohl(rsp->ncsi_version);
+ ncv->major = decode_bcd_u8(rsp->major);
+ ncv->minor = decode_bcd_u8(rsp->minor);
+ ncv->update = decode_bcd_u8(rsp->update);
+ ncv->alpha1 = rsp->alpha1;
ncv->alpha2 = rsp->alpha2;
memcpy(ncv->fw_name, rsp->fw_name, 12);
ncv->fw_version = ntohl(rsp->fw_version);
@@ -1069,6 +1091,44 @@ static int ncsi_rsp_handler_netlink(struct ncsi_request *nr)
return ret;
}
+static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr)
+{
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct net_device *ndev = ndp->ndev.dev;
+ struct ncsi_rsp_gmcma_pkt *rsp;
+ struct sockaddr saddr;
+ int ret = -1;
+ int i;
+
+ rsp = (struct ncsi_rsp_gmcma_pkt *)skb_network_header(nr->rsp);
+ saddr.sa_family = ndev->type;
+ ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+
+ netdev_info(ndev, "NCSI: Received %d provisioned MAC addresses\n",
+ rsp->address_count);
+ for (i = 0; i < rsp->address_count; i++) {
+ netdev_info(ndev, "NCSI: MAC address %d: %02x:%02x:%02x:%02x:%02x:%02x\n",
+ i, rsp->addresses[i][0], rsp->addresses[i][1],
+ rsp->addresses[i][2], rsp->addresses[i][3],
+ rsp->addresses[i][4], rsp->addresses[i][5]);
+ }
+
+ for (i = 0; i < rsp->address_count; i++) {
+ memcpy(saddr.sa_data, &rsp->addresses[i], ETH_ALEN);
+ ret = ndev->netdev_ops->ndo_set_mac_address(ndev, &saddr);
+ if (ret < 0) {
+ netdev_warn(ndev, "NCSI: Unable to assign %pM to device\n",
+ saddr.sa_data);
+ continue;
+ }
+ netdev_warn(ndev, "NCSI: Set MAC address to %pM\n", saddr.sa_data);
+ break;
+ }
+
+ ndp->gma_flag = ret == 0;
+ return ret;
+}
+
static struct ncsi_rsp_handler {
unsigned char type;
int payload;
@@ -1105,7 +1165,8 @@ static struct ncsi_rsp_handler {
{ NCSI_PKT_RSP_PLDM, -1, ncsi_rsp_handler_pldm },
{ NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid },
{ NCSI_PKT_RSP_QPNPR, -1, ncsi_rsp_handler_pldm },
- { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm }
+ { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm },
+ { NCSI_PKT_RSP_GMCMA, -1, ncsi_rsp_handler_gmcma },
};
int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index ef4e76e5aef9..3126911f5042 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -639,10 +639,10 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
if (ret == 1)
continue;
return ret;
+ case NF_STOLEN:
+ return NF_DROP_GETERR(verdict);
default:
- /* Implicit handling for NF_STOLEN, as well as any other
- * non conventional verdicts.
- */
+ WARN_ON_ONCE(1);
return 0;
}
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 26ab0e9612d8..cb48a2b9cb9f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -4,6 +4,8 @@
#ifndef __IP_SET_BITMAP_IP_GEN_H
#define __IP_SET_BITMAP_IP_GEN_H
+#include <linux/rcupdate_wait.h>
+
#define mtype_do_test IPSET_TOKEN(MTYPE, _do_test)
#define mtype_gc_test IPSET_TOKEN(MTYPE, _gc_test)
#define mtype_is_filled IPSET_TOKEN(MTYPE, _is_filled)
@@ -28,6 +30,7 @@
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype MTYPE
#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
@@ -57,9 +60,6 @@ mtype_destroy(struct ip_set *set)
{
struct mtype *map = set->data;
- if (SET_WITH_TIMEOUT(set))
- del_timer_sync(&map->gc);
-
if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
ip_set_free(map->members);
@@ -288,6 +288,15 @@ mtype_gc(struct timer_list *t)
add_timer(&map->gc);
}
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct mtype *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ del_timer_sync(&map->gc);
+}
+
static const struct ip_set_type_variant mtype = {
.kadt = mtype_kadt,
.uadt = mtype_uadt,
@@ -301,6 +310,7 @@ static const struct ip_set_type_variant mtype = {
.head = mtype_head,
.list = mtype_list,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
};
#endif /* __IP_SET_BITMAP_IP_GEN_H */
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e564b5174261..3184cc6be4c9 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
ip_set_dereference((inst)->ip_set_list)[id]
#define ip_set_ref_netlink(inst,id) \
rcu_dereference_raw((inst)->ip_set_list)[id]
+#define ip_set_dereference_nfnl(p) \
+ rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
/* The set types are implemented in modules and registered set types
* can be found in ip_set_type_list. Adding/deleting types is
@@ -683,6 +685,14 @@ __ip_set_put(struct ip_set *set)
* a separate reference counter
*/
static void
+__ip_set_get_netlink(struct ip_set *set)
+{
+ write_lock_bh(&ip_set_ref_lock);
+ set->ref_netlink++;
+ write_unlock_bh(&ip_set_ref_lock);
+}
+
+static void
__ip_set_put_netlink(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -700,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set)
static struct ip_set *
ip_set_rcu_get(struct net *net, ip_set_id_t index)
{
- struct ip_set *set;
struct ip_set_net *inst = ip_set_pernet(net);
- rcu_read_lock();
- /* ip_set_list itself needs to be protected */
- set = rcu_dereference(inst->ip_set_list)[index];
- rcu_read_unlock();
-
- return set;
+ /* ip_set_list and the set pointer need to be protected */
+ return ip_set_dereference_nfnl(inst->ip_set_list)[index];
}
static inline void
@@ -1149,6 +1154,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
return ret;
cleanup:
+ set->variant->cancel_gc(set);
set->variant->destroy(set);
put_out:
module_put(set->type->me);
@@ -1177,6 +1183,14 @@ ip_set_destroy_set(struct ip_set *set)
kfree(set);
}
+static void
+ip_set_destroy_set_rcu(struct rcu_head *head)
+{
+ struct ip_set *set = container_of(head, struct ip_set, rcu);
+
+ ip_set_destroy_set(set);
+}
+
static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const attr[])
{
@@ -1188,8 +1202,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
- /* Must wait for flush to be really finished in list:set */
- rcu_barrier();
/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
@@ -1201,8 +1213,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
* counter, so if it's already zero, we can proceed
* without holding the lock.
*/
- read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
+ /* Must wait for flush to be really finished in list:set */
+ rcu_barrier();
+ read_lock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
if (s && (s->ref || s->ref_netlink)) {
@@ -1216,6 +1230,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
s = ip_set(inst, i);
if (s) {
ip_set(inst, i) = NULL;
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
ip_set_destroy_set(s);
}
}
@@ -1223,6 +1239,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
inst->is_destroyed = false;
} else {
u32 flags = flag_exist(info->nlh);
+ u16 features = 0;
+
+ read_lock_bh(&ip_set_ref_lock);
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&i);
if (!s) {
@@ -1233,10 +1252,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
ret = -IPSET_ERR_BUSY;
goto out;
}
+ features = s->type->features;
ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
-
- ip_set_destroy_set(s);
+ if (features & IPSET_TYPE_NAME) {
+ /* Must wait for flush to be really finished */
+ rcu_barrier();
+ }
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
+ call_rcu(&s->rcu, ip_set_destroy_set_rcu);
}
return 0;
out:
@@ -1693,11 +1718,11 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
do {
if (retried) {
- __ip_set_get(set);
+ __ip_set_get_netlink(set);
nfnl_unlock(NFNL_SUBSYS_IPSET);
cond_resched();
nfnl_lock(NFNL_SUBSYS_IPSET);
- __ip_set_put(set);
+ __ip_set_put_netlink(set);
}
ip_set_lock(set);
@@ -2354,6 +2379,7 @@ ip_set_net_exit(struct net *net)
set = ip_set(inst, i);
if (set) {
ip_set(inst, i) = NULL;
+ set->variant->cancel_gc(set);
ip_set_destroy_set(set);
}
}
@@ -2401,8 +2427,11 @@ ip_set_fini(void)
{
nf_unregister_sockopt(&so_set);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-
unregister_pernet_subsys(&ip_set_net_ops);
+
+ /* Wait for call_rcu() in destroy */
+ rcu_barrier();
+
pr_debug("these are the famous last words\n");
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 7c2399541771..cf3ce72c3de6 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -5,6 +5,7 @@
#define _IP_SET_HASH_GEN_H
#include <linux/rcupdate.h>
+#include <linux/rcupdate_wait.h>
#include <linux/jhash.h>
#include <linux/types.h>
#include <linux/netfilter/nfnetlink.h>
@@ -221,6 +222,7 @@ static const union nf_inet_addr zeromask = {};
#undef mtype_gc_do
#undef mtype_gc
#undef mtype_gc_init
+#undef mtype_cancel_gc
#undef mtype_variant
#undef mtype_data_match
@@ -265,6 +267,7 @@ static const union nf_inet_addr zeromask = {};
#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
@@ -429,7 +432,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
u32 i;
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference(hbucket(t, i));
+ n = (__force struct hbucket *)hbucket(t, i);
if (!n)
continue;
if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
@@ -449,10 +452,7 @@ mtype_destroy(struct ip_set *set)
struct htype *h = set->data;
struct list_head *l, *lt;
- if (SET_WITH_TIMEOUT(set))
- cancel_delayed_work_sync(&h->gc.dwork);
-
- mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
+ mtype_ahash_destroy(set, (__force struct htable *)h->table, true);
list_for_each_safe(l, lt, &h->ad) {
list_del(l);
kfree(l);
@@ -598,6 +598,15 @@ mtype_gc_init(struct htable_gc *gc)
queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
}
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct htype *h = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ cancel_delayed_work_sync(&h->gc.dwork);
+}
+
static int
mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags);
@@ -1440,6 +1449,7 @@ static const struct ip_set_type_variant mtype_variant = {
.uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
.region_lock = true,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 95aeb31c60e0..30a655e5c4fd 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -138,9 +138,9 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next,
#include "ip_set_hash_gen.h"
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-static const char *get_physindev_name(const struct sk_buff *skb)
+static const char *get_physindev_name(const struct sk_buff *skb, struct net *net)
{
- struct net_device *dev = nf_bridge_get_physindev(skb);
+ struct net_device *dev = nf_bridge_get_physindev(skb, net);
return dev ? dev->name : NULL;
}
@@ -177,7 +177,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- const char *eiface = SRCDIR ? get_physindev_name(skb) :
+ const char *eiface = SRCDIR ? get_physindev_name(skb, xt_net(par)) :
get_physoutdev_name(skb);
if (!eiface)
@@ -395,7 +395,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- const char *eiface = SRCDIR ? get_physindev_name(skb) :
+ const char *eiface = SRCDIR ? get_physindev_name(skb, xt_net(par)) :
get_physoutdev_name(skb);
if (!eiface)
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 005a7ce87217..bf4f91b78e1d 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -36,6 +36,7 @@ MODULE_ALIAS("ip_set_hash:net,port,net");
#define IP_SET_HASH_WITH_PROTO
#define IP_SET_HASH_WITH_NETS
#define IPSET_NET_COUNT 2
+#define IP_SET_HASH_WITH_NET0
/* IPv4 variant */
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index e162636525cf..6c3f28bc59b3 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set)
struct list_set *map = set->data;
struct set_elem *e, *n;
- if (SET_WITH_TIMEOUT(set))
- timer_shutdown_sync(&map->gc);
-
list_for_each_entry_safe(e, n, &map->members, list) {
list_del(&e->list);
ip_set_put_byindex(map->net, e->id);
@@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
a->extensions == b->extensions;
}
+static void
+list_set_cancel_gc(struct ip_set *set)
+{
+ struct list_set *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ timer_shutdown_sync(&map->gc);
+}
+
static const struct ip_set_type_variant set_variant = {
.kadt = list_set_kadt,
.uadt = list_set_uadt,
@@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = {
.head = list_set_head,
.list = list_set_list,
.same_set = list_set_same_set,
+ .cancel_gc = list_set_cancel_gc,
};
static void
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 9065da3cdd12..a743db073887 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -31,6 +31,7 @@
#include <linux/seq_file.h>
#include <linux/jhash.h>
#include <linux/random.h>
+#include <linux/rcupdate_wait.h>
#include <net/net_namespace.h>
#include <net/ip_vs.h>
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 3230506ae3ff..a2c16b501087 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2450,3 +2450,4 @@ static void __exit ip_vs_cleanup(void)
module_init(ip_vs_init);
module_exit(ip_vs_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IP Virtual Server");
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 5e6ec32aff2b..75f4c231f4a0 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -270,3 +270,4 @@ static void __exit ip_vs_dh_cleanup(void)
module_init(ip_vs_dh_init);
module_exit(ip_vs_dh_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs destination hashing scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index c5970ba416ae..f821ad2e19b3 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -21,6 +21,7 @@
#include <linux/interrupt.h>
#include <linux/sysctl.h>
#include <linux/list.h>
+#include <linux/rcupdate_wait.h>
#include <net/ip_vs.h>
diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c
index b846cc385279..ab117e5bc34e 100644
--- a/net/netfilter/ipvs/ip_vs_fo.c
+++ b/net/netfilter/ipvs/ip_vs_fo.c
@@ -72,3 +72,4 @@ static void __exit ip_vs_fo_cleanup(void)
module_init(ip_vs_fo_init);
module_exit(ip_vs_fo_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs weighted failover scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index ef1f45e43b63..f53899d12416 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -635,3 +635,4 @@ static void __exit ip_vs_ftp_exit(void)
module_init(ip_vs_ftp_init);
module_exit(ip_vs_ftp_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs ftp helper");
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index cf78ba4ce5ff..8ceec7a2fa8f 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -632,3 +632,4 @@ static void __exit ip_vs_lblc_cleanup(void)
module_init(ip_vs_lblc_init);
module_exit(ip_vs_lblc_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs locality-based least-connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 9eddf118b40e..0fb64707213f 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -817,3 +817,4 @@ static void __exit ip_vs_lblcr_cleanup(void)
module_init(ip_vs_lblcr_init);
module_exit(ip_vs_lblcr_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs locality-based least-connection with replication scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index 9d34d81fc6f1..c2764505e380 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -86,3 +86,4 @@ static void __exit ip_vs_lc_cleanup(void)
module_init(ip_vs_lc_init);
module_exit(ip_vs_lc_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs least connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index f56862a87518..ed7f5c889b41 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -136,3 +136,4 @@ static void __exit ip_vs_nq_cleanup(void)
module_init(ip_vs_nq_init);
module_exit(ip_vs_nq_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs never queue scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c
index c03066fdd5ca..c7708b809700 100644
--- a/net/netfilter/ipvs/ip_vs_ovf.c
+++ b/net/netfilter/ipvs/ip_vs_ovf.c
@@ -79,3 +79,4 @@ static void __exit ip_vs_ovf_cleanup(void)
module_init(ip_vs_ovf_init);
module_exit(ip_vs_ovf_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs overflow connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 0ac6705a61d3..e4ce1d9a63f9 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -185,3 +185,4 @@ static void __exit ip_vs_sip_cleanup(void)
module_init(ip_vs_sip_init);
module_exit(ip_vs_sip_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs sip helper");
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index 38495c6f6c7c..6baa34dff9f0 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -122,4 +122,5 @@ static void __exit ip_vs_rr_cleanup(void)
module_init(ip_vs_rr_init);
module_exit(ip_vs_rr_cleanup);
+MODULE_DESCRIPTION("ipvs round-robin scheduler");
MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 7663288e5358..a46f99a56618 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -137,3 +137,4 @@ static void __exit ip_vs_sed_cleanup(void)
module_init(ip_vs_sed_init);
module_exit(ip_vs_sed_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs shortest expected delay scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index c2028e412092..92e77d7a6b50 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -376,3 +376,4 @@ static void __exit ip_vs_sh_cleanup(void)
module_init(ip_vs_sh_init);
module_exit(ip_vs_sh_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs source hashing scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index da5af28ff57b..be74c0906dda 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1298,17 +1298,13 @@ static void set_sock_size(struct sock *sk, int mode, int val)
static void set_mcast_loop(struct sock *sk, u_char loop)
{
/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
- lock_sock(sk);
inet_assign_bit(MC_LOOP, sk, loop);
#ifdef CONFIG_IP_VS_IPV6
- if (sk->sk_family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
-
+ if (READ_ONCE(sk->sk_family) == AF_INET6) {
/* IPV6_MULTICAST_LOOP */
- np->mc_loop = loop ? 1 : 0;
+ inet6_assign_bit(MC6_LOOP, sk, loop);
}
#endif
- release_sock(sk);
}
/*
@@ -1320,13 +1316,13 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl)
/* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
lock_sock(sk);
- inet->mc_ttl = ttl;
+ WRITE_ONCE(inet->mc_ttl, ttl);
#ifdef CONFIG_IP_VS_IPV6
if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
/* IPV6_MULTICAST_HOPS */
- np->mcast_hops = ttl;
+ WRITE_ONCE(np->mcast_hops, ttl);
}
#endif
release_sock(sk);
@@ -1339,13 +1335,13 @@ static void set_mcast_pmtudisc(struct sock *sk, int val)
/* setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)); */
lock_sock(sk);
- inet->pmtudisc = val;
+ WRITE_ONCE(inet->pmtudisc, val);
#ifdef CONFIG_IP_VS_IPV6
if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
/* IPV6_MTU_DISCOVER */
- np->pmtudisc = val;
+ WRITE_ONCE(np->pmtudisc, val);
}
#endif
release_sock(sk);
@@ -1369,7 +1365,7 @@ static int set_mcast_if(struct sock *sk, struct net_device *dev)
struct ipv6_pinfo *np = inet6_sk(sk);
/* IPV6_MULTICAST_IF */
- np->mcast_oif = dev->ifindex;
+ WRITE_ONCE(np->mcast_oif, dev->ifindex);
}
#endif
release_sock(sk);
@@ -1439,7 +1435,7 @@ static int bind_mcastif_addr(struct socket *sock, struct net_device *dev)
sin.sin_addr.s_addr = addr;
sin.sin_port = 0;
- return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
+ return kernel_bind(sock, (struct sockaddr *)&sin, sizeof(sin));
}
static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
@@ -1505,8 +1501,8 @@ static int make_send_sock(struct netns_ipvs *ipvs, int id,
}
get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id);
- result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
- salen, 0);
+ result = kernel_connect(sock, (struct sockaddr *)&mcast_addr,
+ salen, 0);
if (result < 0) {
pr_err("Error connecting to the multicast addr\n");
goto error;
@@ -1546,7 +1542,7 @@ static int make_receive_sock(struct netns_ipvs *ipvs, int id,
get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
sock->sk->sk_bound_dev_if = dev->ifindex;
- result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
+ result = kernel_bind(sock, (struct sockaddr *)&mcast_addr, salen);
if (result < 0) {
pr_err("Error binding to the multicast addr\n");
goto error;
diff --git a/net/netfilter/ipvs/ip_vs_twos.c b/net/netfilter/ipvs/ip_vs_twos.c
index 3308e4cc740a..8d5419edde50 100644
--- a/net/netfilter/ipvs/ip_vs_twos.c
+++ b/net/netfilter/ipvs/ip_vs_twos.c
@@ -137,3 +137,4 @@ static void __exit ip_vs_twos_cleanup(void)
module_init(ip_vs_twos_init);
module_exit(ip_vs_twos_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs power of twos choice scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 09f584b564a0..9fa500927c0a 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -109,3 +109,4 @@ static void __exit ip_vs_wlc_cleanup(void)
module_init(ip_vs_wlc_init);
module_exit(ip_vs_wlc_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs weighted least connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 1bc7a0789d85..85ce0d04afac 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -263,3 +263,4 @@ static void __exit ip_vs_wrr_cleanup(void)
module_init(ip_vs_wrr_init);
module_exit(ip_vs_wrr_cleanup);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs weighted round-robin scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 9193e109e6b3..65e0259178da 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -271,7 +271,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED,
ICMPV6_EXC_HOPLIMIT, 0);
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
return false;
}
@@ -286,7 +286,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
{
if (ip_hdr(skb)->ttl <= 1) {
/* Tell the sender its packet died... */
- __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
+ IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
return false;
}
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index e502ec00b2fe..0e4beae421f8 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -31,7 +31,7 @@ struct bpf_nf_link {
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
static const struct nf_defrag_hook *
get_proto_defrag_hook(struct bpf_nf_link *link,
- const struct nf_defrag_hook __rcu *global_hook,
+ const struct nf_defrag_hook __rcu **ptr_global_hook,
const char *mod)
{
const struct nf_defrag_hook *hook;
@@ -39,7 +39,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link,
/* RCU protects us from races against module unloading */
rcu_read_lock();
- hook = rcu_dereference(global_hook);
+ hook = rcu_dereference(*ptr_global_hook);
if (!hook) {
rcu_read_unlock();
err = request_module(mod);
@@ -47,7 +47,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link,
return ERR_PTR(err < 0 ? err : -EINVAL);
rcu_read_lock();
- hook = rcu_dereference(global_hook);
+ hook = rcu_dereference(*ptr_global_hook);
}
if (hook && try_module_get(hook->owner)) {
@@ -78,7 +78,7 @@ static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
switch (link->hook_ops.pf) {
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
case NFPROTO_IPV4:
- hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4");
+ hook = get_proto_defrag_hook(link, &nf_defrag_v4_hook, "nf_defrag_ipv4");
if (IS_ERR(hook))
return PTR_ERR(hook);
@@ -87,7 +87,7 @@ static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
#endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
case NFPROTO_IPV6:
- hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6");
+ hook = get_proto_defrag_hook(link, &nf_defrag_v6_hook, "nf_defrag_ipv6");
if (IS_ERR(hook))
return PTR_ERR(hook);
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index c7a6114091ae..475358ec8212 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -230,9 +230,7 @@ static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
return 0;
}
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
- "Global functions as their definitions will be in nf_conntrack BTF");
+__bpf_kfunc_start_defs();
/* bpf_xdp_ct_alloc - Allocate a new CT entry
*
@@ -381,6 +379,8 @@ __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
struct nf_conn *nfct = (struct nf_conn *)nfct_i;
int err;
+ if (!nf_ct_is_confirmed(nfct))
+ nfct->timeout += nfct_time_stamp;
nfct->status |= IPS_CONFIRMED;
err = nf_conntrack_hash_check_insert(nfct);
if (err < 0) {
@@ -465,7 +465,7 @@ __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
return nf_ct_change_status_common(nfct, status);
}
-__diag_pop()
+__bpf_kfunc_end_defs();
BTF_SET8_START(nf_ct_kfunc_set)
BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 9fb9b8031298..cfa0fe0356de 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -82,3 +82,4 @@ out:
EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Broadcast connection tracking helper");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9f6f2e643575..2e5f3864d353 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2042,24 +2042,6 @@ out:
}
EXPORT_SYMBOL_GPL(nf_conntrack_in);
-/* Alter reply tuple (maybe alter helper). This is for NAT, and is
- implicitly racy: see __nf_conntrack_confirm */
-void nf_conntrack_alter_reply(struct nf_conn *ct,
- const struct nf_conntrack_tuple *newreply)
-{
- struct nf_conn_help *help = nfct_help(ct);
-
- /* Should be unconfirmed, so not in hash table yet */
- WARN_ON(nf_ct_is_confirmed(ct));
-
- nf_ct_dump_tuple(newreply);
-
- ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
- if (ct->master || (help && !hlist_empty(&help->expectations)))
- return;
-}
-EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
-
/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
void __nf_ct_refresh_acct(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
@@ -2187,11 +2169,11 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
if (dataoff <= 0)
- return -1;
+ return NF_DROP;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
l4num, net, &tuple))
- return -1;
+ return NF_DROP;
if (ct->status & IPS_SRC_NAT) {
memcpy(tuple.src.u3.all,
@@ -2211,7 +2193,7 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
h = nf_conntrack_find_get(net, nf_ct_zone(ct), &tuple);
if (!h)
- return 0;
+ return NF_ACCEPT;
/* Store status bits of the conntrack that is clashing to re-do NAT
* mangling according to what it has been done already to this packet.
@@ -2224,19 +2206,25 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
nat_hook = rcu_dereference(nf_nat_hook);
if (!nat_hook)
- return 0;
+ return NF_ACCEPT;
- if (status & IPS_SRC_NAT &&
- nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_SRC,
- IP_CT_DIR_ORIGINAL) == NF_DROP)
- return -1;
+ if (status & IPS_SRC_NAT) {
+ unsigned int verdict = nat_hook->manip_pkt(skb, ct,
+ NF_NAT_MANIP_SRC,
+ IP_CT_DIR_ORIGINAL);
+ if (verdict != NF_ACCEPT)
+ return verdict;
+ }
- if (status & IPS_DST_NAT &&
- nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_DST,
- IP_CT_DIR_ORIGINAL) == NF_DROP)
- return -1;
+ if (status & IPS_DST_NAT) {
+ unsigned int verdict = nat_hook->manip_pkt(skb, ct,
+ NF_NAT_MANIP_DST,
+ IP_CT_DIR_ORIGINAL);
+ if (verdict != NF_ACCEPT)
+ return verdict;
+ }
- return 0;
+ return NF_ACCEPT;
}
/* This packet is coming from userspace via nf_queue, complete the packet
@@ -2251,14 +2239,14 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct,
help = nfct_help(ct);
if (!help)
- return 0;
+ return NF_ACCEPT;
helper = rcu_dereference(help->helper);
if (!helper)
- return 0;
+ return NF_ACCEPT;
if (!(helper->flags & NF_CT_HELPER_F_USERSPACE))
- return 0;
+ return NF_ACCEPT;
switch (nf_ct_l3num(ct)) {
case NFPROTO_IPV4:
@@ -2273,42 +2261,44 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct,
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
- return 0;
+ return NF_ACCEPT;
break;
}
#endif
default:
- return 0;
+ return NF_ACCEPT;
}
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
- return -1;
+ return NF_DROP;
}
}
/* We've seen it coming out the other side: confirm it */
- return nf_conntrack_confirm(skb) == NF_DROP ? - 1 : 0;
+ return nf_conntrack_confirm(skb);
}
static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
{
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
- int err;
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
- return 0;
+ return NF_ACCEPT;
if (!nf_ct_is_confirmed(ct)) {
- err = __nf_conntrack_update(net, skb, ct, ctinfo);
- if (err < 0)
- return err;
+ int ret = __nf_conntrack_update(net, skb, ct, ctinfo);
+
+ if (ret != NF_ACCEPT)
+ return ret;
ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return NF_ACCEPT;
}
return nf_confirm_cthelper(skb, ct, ctinfo);
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 0b513f7bf9f3..dd62cc12e775 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -40,10 +40,10 @@ static const u8 nf_ct_ext_type_len[NF_CT_EXT_NUM] = {
[NF_CT_EXT_ECACHE] = sizeof(struct nf_conntrack_ecache),
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
- [NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_acct),
+ [NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_tstamp),
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- [NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_tstamp),
+ [NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_timeout),
#endif
#ifdef CONFIG_NF_CONNTRACK_LABELS
[NF_CT_EXT_LABELS] = sizeof(struct nf_conn_labels),
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index f22691f83853..4ed5878cb25b 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -194,12 +194,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
struct nf_conntrack_helper *helper = NULL;
struct nf_conn_help *help;
- /* We already got a helper explicitly attached. The function
- * nf_conntrack_alter_reply - in case NAT is in use - asks for looking
- * the helper up again. Since now the user is in full control of
- * making consistent helper configurations, skip this automatic
- * re-lookup, otherwise we'll lose the helper.
- */
+ /* We already got a helper explicitly attached (e.g. nft_ct) */
if (test_bit(IPS_HELPER_BIT, &ct->status))
return 0;
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 6e70e137a0a6..6c46aad23313 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -11,8 +11,6 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_labels.h>
-static DEFINE_SPINLOCK(nf_connlabels_lock);
-
static int replace_u32(u32 *address, u32 mask, u32 new)
{
u32 old, tmp;
@@ -60,23 +58,24 @@ EXPORT_SYMBOL_GPL(nf_connlabels_replace);
int nf_connlabels_get(struct net *net, unsigned int bits)
{
+ int v;
+
if (BIT_WORD(bits) >= NF_CT_LABELS_MAX_SIZE / sizeof(long))
return -ERANGE;
- spin_lock(&nf_connlabels_lock);
- net->ct.labels_used++;
- spin_unlock(&nf_connlabels_lock);
-
BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX);
+ v = atomic_inc_return_relaxed(&net->ct.labels_used);
+ WARN_ON_ONCE(v <= 0);
+
return 0;
}
EXPORT_SYMBOL_GPL(nf_connlabels_get);
void nf_connlabels_put(struct net *net)
{
- spin_lock(&nf_connlabels_lock);
- net->ct.labels_used--;
- spin_unlock(&nf_connlabels_lock);
+ int v = atomic_dec_return_relaxed(&net->ct.labels_used);
+
+ WARN_ON_ONCE(v < 0);
}
EXPORT_SYMBOL_GPL(nf_connlabels_put);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 334db22199c1..3b846cbdc050 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -57,6 +57,7 @@
#include "nf_internals.h"
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("List and change connection tracking table");
struct ctnetlink_list_dump_ctx {
struct nf_conn *last;
@@ -875,6 +876,7 @@ struct ctnetlink_filter_u32 {
struct ctnetlink_filter {
u8 family;
+ bool zone_filter;
u_int32_t orig_flags;
u_int32_t reply_flags;
@@ -991,13 +993,16 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
if (err)
goto err_filter;
+ if (cda[CTA_ZONE]) {
+ err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
+ if (err < 0)
+ goto err_filter;
+ filter->zone_filter = true;
+ }
+
if (!cda[CTA_FILTER])
return filter;
- err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
- if (err < 0)
- goto err_filter;
-
err = ctnetlink_parse_filter(cda[CTA_FILTER], filter);
if (err < 0)
goto err_filter;
@@ -1042,7 +1047,7 @@ err_filter:
static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda)
{
- return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS];
+ return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS] || cda[CTA_ZONE];
}
static int ctnetlink_start(struct netlink_callback *cb)
@@ -1147,6 +1152,10 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
if (filter->family && nf_ct_l3num(ct) != filter->family)
goto ignore_entry;
+ if (filter->zone_filter &&
+ !nf_ct_zone_equal_any(ct, &filter->zone))
+ goto ignore_entry;
+
if (filter->orig_flags) {
tuple = nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL);
if (!ctnetlink_filter_match_tuple(&filter->orig, tuple,
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index c928ff63b10e..f36727ed91e1 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -699,3 +699,4 @@ MODULE_ALIAS("ip_conntrack");
MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv4 and IPv6 connection tracking");
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index b6bcc8f2f46b..4cc97f971264 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -112,7 +112,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA},
/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/
/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */
-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
+/* cookie_ack */ {sCL, sCL, sCW, sES, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL},
/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
@@ -126,7 +126,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV},
/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV},
/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV},
-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
+/* cookie_echo */ {sIV, sCL, sCE, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV},
/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV},
/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
@@ -283,7 +283,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
- } else {
+ } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) {
/* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
pr_debug("Setting vtag %x for new conn OOTB\n",
@@ -412,6 +412,9 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
/* (D) vtag must be same as init_vtag as found in INIT_ACK */
if (sh->vtag != ct->proto.sctp.vtag[dir])
goto out_unlock;
+ } else if (sch->type == SCTP_CID_COOKIE_ACK) {
+ ct->proto.sctp.init[dir] = 0;
+ ct->proto.sctp.init[!dir] = 0;
} else if (sch->type == SCTP_CID_HEARTBEAT) {
if (ct->proto.sctp.vtag[dir] == 0) {
pr_debug("Setting %d vtag %x for dir %d\n", sch->type, sh->vtag, dir);
@@ -461,16 +464,18 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
}
/* If it is an INIT or an INIT ACK note down the vtag */
- if (sch->type == SCTP_CID_INIT ||
- sch->type == SCTP_CID_INIT_ACK) {
- struct sctp_inithdr _inithdr, *ih;
+ if (sch->type == SCTP_CID_INIT) {
+ struct sctp_inithdr _ih, *ih;
- ih = skb_header_pointer(skb, offset + sizeof(_sch),
- sizeof(_inithdr), &_inithdr);
- if (ih == NULL)
+ ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih);
+ if (!ih)
goto out_unlock;
- pr_debug("Setting vtag %x for dir %d\n",
- ih->init_tag, !dir);
+
+ if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir])
+ ct->proto.sctp.init[!dir] = 0;
+ ct->proto.sctp.init[dir] = 1;
+
+ pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
ct->proto.sctp.vtag[!dir] = ih->init_tag;
/* don't renew timeout on init retransmit so
@@ -481,6 +486,24 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
old_state == SCTP_CONNTRACK_CLOSED &&
nf_ct_is_confirmed(ct))
ignore = true;
+ } else if (sch->type == SCTP_CID_INIT_ACK) {
+ struct sctp_inithdr _ih, *ih;
+ __be32 vtag;
+
+ ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih);
+ if (!ih)
+ goto out_unlock;
+
+ vtag = ct->proto.sctp.vtag[!dir];
+ if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag)
+ goto out_unlock;
+ /* collision */
+ if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] &&
+ vtag != ih->init_tag)
+ goto out_unlock;
+
+ pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
+ ct->proto.sctp.vtag[!dir] = ih->init_tag;
}
ct->proto.sctp.state = new_state;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 4018acb1d674..ae493599a3ef 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -457,7 +457,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
const struct sk_buff *skb,
unsigned int dataoff,
const struct tcphdr *tcph,
- u32 end, u32 win)
+ u32 end, u32 win,
+ enum ip_conntrack_dir dir)
{
/* SYN-ACK in reply to a SYN
* or SYN from reply direction in simultaneous open.
@@ -471,7 +472,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
* Both sides must send the Window Scale option
* to enable window scaling in either direction.
*/
- if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+ if (dir == IP_CT_DIR_REPLY &&
+ !(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) {
sender->td_scale = 0;
receiver->td_scale = 0;
@@ -542,7 +544,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
if (tcph->syn) {
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
- end, win);
+ end, win, dir);
if (!tcph->ack)
/* Simultaneous open */
return NFCT_TCP_ACCEPT;
@@ -585,7 +587,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
*/
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
- end, win);
+ end, win, dir);
if (dir == IP_CT_DIR_REPLY && !tcph->ack)
return NFCT_TCP_ACCEPT;
@@ -835,7 +837,8 @@ static bool tcp_error(const struct tcphdr *th,
static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff,
- const struct tcphdr *th)
+ const struct tcphdr *th,
+ const struct nf_hook_state *state)
{
enum tcp_conntrack new_state;
struct net *net = nf_ct_net(ct);
@@ -846,7 +849,7 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
/* Invalid: delete conntrack */
if (new_state >= TCP_CONNTRACK_MAX) {
- pr_debug("nf_ct_tcp: invalid new deleting.\n");
+ tcp_error_log(skb, state, "invalid new");
return false;
}
@@ -980,7 +983,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
if (tcp_error(th, skb, dataoff, state))
return -NF_ACCEPT;
- if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th))
+ if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th, state))
return -NF_ACCEPT;
spin_lock_bh(&ct->lock);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 1d34d700bd09..920a5a29ae1d 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -316,12 +316,6 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
}
EXPORT_SYMBOL_GPL(flow_offload_refresh);
-static bool nf_flow_is_outdated(const struct flow_offload *flow)
-{
- return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) &&
- !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
-}
-
static inline bool nf_flow_has_expired(const struct flow_offload *flow)
{
return nf_flow_timeout_delta(flow->timeout) <= 0;
@@ -407,12 +401,18 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
return err;
}
+static bool nf_flow_custom_gc(struct nf_flowtable *flow_table,
+ const struct flow_offload *flow)
+{
+ return flow_table->type->gc && flow_table->type->gc(flow);
+}
+
static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
struct flow_offload *flow, void *data)
{
if (nf_flow_has_expired(flow) ||
nf_ct_is_dying(flow->ct) ||
- nf_flow_is_outdated(flow))
+ nf_flow_custom_gc(flow_table, flow))
flow_offload_teardown(flow);
if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 8cc52d2bd31b..e16f158388bb 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -193,11 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type)
return;
}
- BUG_ON(loggers[pf][type] == NULL);
-
rcu_read_lock();
logger = rcu_dereference(loggers[pf][type]);
- module_put(logger->me);
+ if (!logger)
+ WARN_ON_ONCE(1);
+ else
+ module_put(logger->me);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(nf_logger_put);
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index c66689ad2b49..58402226045e 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -111,7 +111,8 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u8 pf,
unsigned int hooknum, const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
- const struct nf_loginfo *loginfo, const char *prefix)
+ const struct nf_loginfo *loginfo, const char *prefix,
+ struct net *net)
{
const struct net_device *physoutdev __maybe_unused;
const struct net_device *physindev __maybe_unused;
@@ -121,7 +122,7 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u8 pf,
in ? in->name : "",
out ? out->name : "");
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- physindev = nf_bridge_get_physindev(skb);
+ physindev = nf_bridge_get_physindev(skb, net);
if (physindev && in != physindev)
nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
physoutdev = nf_bridge_get_physoutdev(skb);
@@ -148,7 +149,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf,
loginfo = &default_loginfo;
nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
- prefix);
+ prefix, net);
dump_arp_packet(m, loginfo, skb, skb_network_offset(skb));
nf_log_buf_close(m);
@@ -845,7 +846,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
loginfo = &default_loginfo;
nf_log_dump_packet_common(m, pf, hooknum, skb, in,
- out, loginfo, prefix);
+ out, loginfo, prefix, net);
if (in)
dump_mac_header(m, loginfo, skb);
@@ -880,7 +881,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
loginfo = &default_loginfo;
nf_log_dump_packet_common(m, pf, hooknum, skb, in, out,
- loginfo, prefix);
+ loginfo, prefix, net);
if (in)
dump_mac_header(m, loginfo, skb);
@@ -916,7 +917,7 @@ static void nf_log_unknown_packet(struct net *net, u_int8_t pf,
loginfo = &default_loginfo;
nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
- prefix);
+ prefix, net);
dump_mac_header(m, loginfo, skb);
diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c
index 141ee7783223..6e3b2f58855f 100644
--- a/net/netfilter/nf_nat_bpf.c
+++ b/net/netfilter/nf_nat_bpf.c
@@ -12,9 +12,7 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_nat.h>
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
- "Global functions as their definitions will be in nf_nat BTF");
+__bpf_kfunc_start_defs();
/* bpf_ct_set_nat_info - Set source or destination nat address
*
@@ -54,7 +52,7 @@ __bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0;
}
-__diag_pop()
+__bpf_kfunc_end_defs();
BTF_SET8_START(nf_nat_kfunc_set)
BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS)
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index c4e0516a8dfa..c3d7ecbc777c 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1263,6 +1263,7 @@ static void __exit nf_nat_cleanup(void)
}
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Network address translation core");
module_init(nf_nat_init);
module_exit(nf_nat_cleanup);
diff --git a/net/netfilter/nf_nat_ovs.c b/net/netfilter/nf_nat_ovs.c
index 551abd2da614..0f9a559f6207 100644
--- a/net/netfilter/nf_nat_ovs.c
+++ b/net/netfilter/nf_nat_ovs.c
@@ -75,9 +75,10 @@ static int nf_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
}
err = nf_nat_packet(ct, ctinfo, hooknum, skb);
+out:
if (err == NF_ACCEPT)
*action |= BIT(maniptype);
-out:
+
return err;
}
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 48cc60084d28..dc450cc81222 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -668,7 +668,7 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int
struct flowi fl;
int err;
- err = xfrm_decode_session(skb, &fl, family);
+ err = xfrm_decode_session(net, skb, &fl, family);
if (err < 0)
return err;
@@ -697,6 +697,31 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int
}
#endif
+static bool nf_nat_inet_port_was_mangled(const struct sk_buff *skb, __be16 sport)
+{
+ enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
+ const struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return false;
+
+ switch (nf_ct_protonum(ct)) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ break;
+ default:
+ return false;
+ }
+
+ dir = CTINFO2DIR(ctinfo);
+ if (dir != IP_CT_DIR_ORIGINAL)
+ return false;
+
+ return ct->tuplehash[!dir].tuple.dst.u.all != sport;
+}
+
static unsigned int
nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -707,8 +732,20 @@ nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
ret = nf_nat_ipv4_fn(priv, skb, state);
- if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr &&
- !inet_sk_transparent(sk))
+ if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk))
+ return ret;
+
+ /* skb has a socket assigned via tcp edemux. We need to check
+ * if nf_nat_ipv4_fn() has mangled the packet in a way that
+ * edemux would not have found this socket.
+ *
+ * This includes both changes to the source address and changes
+ * to the source port, which are both handled by the
+ * nf_nat_ipv4_fn() call above -- long after tcp/udp early demux
+ * might have found a socket for the old (pre-snat) address.
+ */
+ if (saddr != ip_hdr(skb)->saddr ||
+ nf_nat_inet_port_was_mangled(skb, sk->sk_dport))
skb_orphan(skb); /* TCP edemux obtained wrong socket */
return ret;
@@ -938,14 +975,36 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
}
static unsigned int
+nf_nat_ipv6_local_in(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct in6_addr saddr = ipv6_hdr(skb)->saddr;
+ struct sock *sk = skb->sk;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(priv, skb, state);
+
+ if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk))
+ return ret;
+
+ /* see nf_nat_ipv4_local_in */
+ if (ipv6_addr_cmp(&saddr, &ipv6_hdr(skb)->saddr) ||
+ nf_nat_inet_port_was_mangled(skb, sk->sk_dport))
+ skb_orphan(skb);
+
+ return ret;
+}
+
+static unsigned int
nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- unsigned int ret;
+ unsigned int ret, verdict;
struct in6_addr daddr = ipv6_hdr(skb)->daddr;
ret = nf_nat_ipv6_fn(priv, skb, state);
- if (ret != NF_DROP && ret != NF_STOLEN &&
+ verdict = ret & NF_VERDICT_MASK;
+ if (verdict != NF_DROP && verdict != NF_STOLEN &&
ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
skb_dst_drop(skb);
@@ -1051,7 +1110,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
},
/* After packet filtering, change source */
{
- .hook = nf_nat_ipv6_fn,
+ .hook = nf_nat_ipv6_local_in,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC,
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 6616ba5d0b04..5b37487d9d11 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -80,6 +80,26 @@ EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4);
static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
+static bool nf_nat_redirect_ipv6_usable(const struct inet6_ifaddr *ifa, unsigned int scope)
+{
+ unsigned int ifa_addr_type = ipv6_addr_type(&ifa->addr);
+
+ if (ifa_addr_type & IPV6_ADDR_MAPPED)
+ return false;
+
+ if ((ifa->flags & IFA_F_TENTATIVE) && (!(ifa->flags & IFA_F_OPTIMISTIC)))
+ return false;
+
+ if (scope) {
+ unsigned int ifa_scope = ifa_addr_type & IPV6_ADDR_SCOPE_MASK;
+
+ if (!(scope & ifa_scope))
+ return false;
+ }
+
+ return true;
+}
+
unsigned int
nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
unsigned int hooknum)
@@ -89,14 +109,19 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
if (hooknum == NF_INET_LOCAL_OUT) {
newdst.in6 = loopback_addr;
} else {
+ unsigned int scope = ipv6_addr_scope(&ipv6_hdr(skb)->daddr);
struct inet6_dev *idev;
- struct inet6_ifaddr *ifa;
bool addr = false;
idev = __in6_dev_get(skb->dev);
if (idev != NULL) {
+ const struct inet6_ifaddr *ifa;
+
read_lock_bh(&idev->lock);
list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ if (!nf_nat_redirect_ipv6_usable(ifa, scope))
+ continue;
+
newdst.in6 = ifa->addr;
addr = true;
break;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 63d1516816b1..e2f334f70281 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -82,11 +82,9 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
{
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
const struct sk_buff *skb = entry->skb;
- struct nf_bridge_info *nf_bridge;
- nf_bridge = nf_bridge_info_get(skb);
- if (nf_bridge) {
- entry->physin = nf_bridge_get_physindev(skb);
+ if (nf_bridge_info_exists(skb)) {
+ entry->physin = nf_bridge_get_physindev(skb, entry->state.net);
entry->physout = nf_bridge_get_physoutdev(skb);
} else {
entry->physin = NULL;
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 16915f8eef2b..fbbc4fd37349 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -153,7 +153,7 @@ void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info,
struct synproxy_options *opts)
{
opts->tsecr = opts->tsval;
- opts->tsval = tcp_time_stamp_raw() & ~0x3f;
+ opts->tsval = tcp_clock_ms() & ~0x3f;
if (opts->options & NF_SYNPROXY_OPT_WSCALE) {
opts->tsval |= opts->wscale;
@@ -617,7 +617,7 @@ synproxy_recv_client_ack(struct net *net,
struct synproxy_net *snet = synproxy_pernet(net);
int mss;
- mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ mss = __cookie_v4_check(ip_hdr(skb), th);
if (mss == 0) {
this_cpu_inc(snet->stats->cookie_invalid);
return false;
@@ -1034,7 +1034,7 @@ synproxy_recv_client_ack_ipv6(struct net *net,
struct synproxy_net *snet = synproxy_pernet(net);
int mss;
- mss = nf_cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ mss = nf_cookie_v6_check(ipv6_hdr(skb), th);
if (mss == 0) {
this_cpu_inc(snet->stats->cookie_invalid);
return false;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 41b826dff6f5..f8e3f70c35bd 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -24,6 +24,7 @@
#include <net/sock.h>
#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
+#define NFT_SET_MAX_ANONLEN 16
unsigned int nf_tables_net_id __read_mostly;
@@ -102,6 +103,7 @@ static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types
[NFT_MSG_NEWFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_REGISTER,
[NFT_MSG_GETFLOWTABLE] = AUDIT_NFT_OP_INVALID,
[NFT_MSG_DELFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+ [NFT_MSG_GETSETELEM_RESET] = AUDIT_NFT_OP_SETELEM_RESET,
};
static void nft_validate_state_update(struct nft_table *table, u8 new_validate_state)
@@ -590,9 +592,9 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- nft_setelem_data_deactivate(ctx->net, set, elem);
+ nft_setelem_data_deactivate(ctx->net, set, elem_priv);
return 0;
}
@@ -600,7 +602,7 @@ static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
struct nft_set_elem_catchall {
struct list_head list;
struct rcu_head rcu;
- void *elem;
+ struct nft_elem_priv *elem;
};
static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
@@ -608,7 +610,6 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
{
u8 genmask = nft_genmask_next(ctx->net);
struct nft_set_elem_catchall *catchall;
- struct nft_set_elem elem;
struct nft_set_ext *ext;
list_for_each_entry(catchall, &set->catchall_list, list) {
@@ -616,8 +617,7 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
if (!nft_set_elem_active(ext, genmask))
continue;
- elem.priv = catchall->elem;
- nft_setelem_data_deactivate(ctx->net, set, &elem);
+ nft_setelem_data_deactivate(ctx->net, set, catchall->elem);
break;
}
}
@@ -804,7 +804,7 @@ static struct nft_table *nft_table_lookup(const struct net *net,
static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
const struct nlattr *nla,
- u8 genmask, u32 nlpid)
+ int family, u8 genmask, u32 nlpid)
{
struct nftables_pernet *nft_net;
struct nft_table *table;
@@ -812,6 +812,7 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
nft_net = nft_pernet(net);
list_for_each_entry(table, &nft_net->tables, list) {
if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
+ table->family == family &&
nft_active_genmask(table, genmask)) {
if (nft_table_has_owner(table) &&
nlpid && table->nlpid != nlpid)
@@ -1218,6 +1219,10 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
flags & NFT_TABLE_F_OWNER))
return -EOPNOTSUPP;
+ /* No dormant off/on/off/on games in single transaction */
+ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ return -EINVAL;
+
trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
sizeof(struct nft_trans_table));
if (trans == NULL)
@@ -1431,7 +1436,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
if (!nft_is_active_next(ctx->net, chain))
continue;
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
continue;
ctx->chain = chain;
@@ -1445,8 +1450,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
if (!nft_is_active_next(ctx->net, set))
continue;
- if (nft_set_is_anonymous(set) &&
- !list_empty(&set->bindings))
+ if (nft_set_is_anonymous(set))
continue;
err = nft_delset(ctx, set);
@@ -1476,7 +1480,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
if (!nft_is_active_next(ctx->net, chain))
continue;
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
continue;
ctx->chain = chain;
@@ -1542,7 +1546,7 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_TABLE_HANDLE]) {
attr = nla[NFTA_TABLE_HANDLE];
- table = nft_table_lookup_byhandle(net, attr, genmask,
+ table = nft_table_lookup_byhandle(net, attr, family, genmask,
NETLINK_CB(skb).portid);
} else {
attr = nla[NFTA_TABLE_NAME];
@@ -2258,7 +2262,16 @@ static int nft_chain_parse_hook(struct net *net,
return -EOPNOTSUPP;
}
- type = basechain->type;
+ if (nla[NFTA_CHAIN_TYPE]) {
+ type = __nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE],
+ family);
+ if (!type) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]);
+ return -ENOENT;
+ }
+ } else {
+ type = basechain->type;
+ }
}
if (!try_module_get(type->owner)) {
@@ -2909,6 +2922,9 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
return PTR_ERR(chain);
}
+ if (nft_chain_binding(chain))
+ return -EOPNOTSUPP;
+
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
if (nla[NFTA_CHAIN_HOOK]) {
@@ -2962,6 +2978,9 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
*/
int nft_register_expr(struct nft_expr_type *type)
{
+ if (WARN_ON_ONCE(type->maxattr > NFT_EXPR_MAXATTR))
+ return -ENOMEM;
+
nfnl_lock(NFNL_SUBSYS_NFTABLES);
if (type->family == NFPROTO_UNSPEC)
list_add_tail_rcu(&type->list, &nf_tables_expressions);
@@ -3159,7 +3178,7 @@ int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla,
if (err < 0)
return err;
- if (!tb[NFTA_EXPR_DATA])
+ if (!tb[NFTA_EXPR_DATA] || !tb[NFTA_EXPR_NAME])
return -EINVAL;
type = __nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]);
@@ -3256,14 +3275,13 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
{
int err;
- if (src->ops->clone) {
- dst->ops = src->ops;
- err = src->ops->clone(dst, src);
- if (err < 0)
- return err;
- } else {
- memcpy(dst, src, src->ops->size);
- }
+ if (WARN_ON_ONCE(!src->ops->clone))
+ return -EINVAL;
+
+ dst->ops = src->ops;
+ err = src->ops->clone(dst, src);
+ if (err < 0)
+ return err;
__module_get(src->ops->type->owner);
@@ -3309,7 +3327,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
[NFTA_RULE_CHAIN] = { .type = NLA_STRING,
.len = NFT_CHAIN_MAXNAMELEN - 1 },
[NFTA_RULE_HANDLE] = { .type = NLA_U64 },
- [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED },
+ [NFTA_RULE_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
[NFTA_RULE_COMPAT] = { .type = NLA_NESTED },
[NFTA_RULE_POSITION] = { .type = NLA_U64 },
[NFTA_RULE_USERDATA] = { .type = NLA_BINARY,
@@ -3421,33 +3439,44 @@ err:
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
+static void audit_log_rule_reset(const struct nft_table *table,
+ unsigned int base_seq,
+ unsigned int nentries)
+{
+ char *buf = kasprintf(GFP_ATOMIC, "%s:%u",
+ table->name, base_seq);
+
+ audit_log_nfcfg(buf, table->family, nentries,
+ AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
+ kfree(buf);
+}
+
struct nft_rule_dump_ctx {
+ unsigned int s_idx;
char *table;
char *chain;
+ bool reset;
};
static int __nf_tables_dump_rules(struct sk_buff *skb,
unsigned int *idx,
struct netlink_callback *cb,
const struct nft_table *table,
- const struct nft_chain *chain,
- bool reset)
+ const struct nft_chain *chain)
{
+ struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
const struct nft_rule *rule, *prule;
- unsigned int s_idx = cb->args[0];
+ unsigned int entries = 0;
+ int ret = 0;
u64 handle;
prule = NULL;
list_for_each_entry_rcu(rule, &chain->rules, list) {
if (!nft_is_active(net, rule))
goto cont_skip;
- if (*idx < s_idx)
+ if (*idx < ctx->s_idx)
goto cont;
- if (*idx > s_idx) {
- memset(&cb->args[1], 0,
- sizeof(cb->args) - sizeof(cb->args[0]));
- }
if (prule)
handle = prule->handle;
else
@@ -3458,33 +3487,35 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
NFT_MSG_NEWRULE,
NLM_F_MULTI | NLM_F_APPEND,
table->family,
- table, chain, rule, handle, reset) < 0)
- return 1;
-
+ table, chain, rule, handle, ctx->reset) < 0) {
+ ret = 1;
+ break;
+ }
+ entries++;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
prule = rule;
cont_skip:
(*idx)++;
}
- return 0;
+
+ if (ctx->reset && entries)
+ audit_log_rule_reset(table, cb->seq, entries);
+
+ return ret;
}
static int nf_tables_dump_rules(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- const struct nft_rule_dump_ctx *ctx = cb->data;
+ struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
struct nft_table *table;
const struct nft_chain *chain;
unsigned int idx = 0;
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nftables_pernet *nft_net;
- bool reset = false;
-
- if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET)
- reset = true;
rcu_read_lock();
nft_net = nft_pernet(net);
@@ -3494,10 +3525,10 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
if (family != NFPROTO_UNSPEC && family != table->family)
continue;
- if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0)
+ if (ctx->table && strcmp(ctx->table, table->name) != 0)
continue;
- if (ctx && ctx->table && ctx->chain) {
+ if (ctx->table && ctx->chain) {
struct rhlist_head *list, *tmp;
list = rhltable_lookup(&table->chains_ht, ctx->chain,
@@ -3509,7 +3540,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
if (!nft_is_active(net, chain))
continue;
__nf_tables_dump_rules(skb, &idx,
- cb, table, chain, reset);
+ cb, table, chain);
break;
}
goto done;
@@ -3517,68 +3548,81 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
list_for_each_entry_rcu(chain, &table->chains, list) {
if (__nf_tables_dump_rules(skb, &idx,
- cb, table, chain, reset))
+ cb, table, chain))
goto done;
}
- if (ctx && ctx->table)
+ if (ctx->table)
break;
}
done:
rcu_read_unlock();
- cb->args[0] = idx;
+ ctx->s_idx = idx;
return skb->len;
}
+static int nf_tables_dumpreset_rules(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk));
+ int ret;
+
+ /* Mutex is held is to prevent that two concurrent dump-and-reset calls
+ * do not underrun counters and quotas. The commit_mutex is used for
+ * the lack a better lock, this is not transaction path.
+ */
+ mutex_lock(&nft_net->commit_mutex);
+ ret = nf_tables_dump_rules(skb, cb);
+ mutex_unlock(&nft_net->commit_mutex);
+
+ return ret;
+}
+
static int nf_tables_dump_rules_start(struct netlink_callback *cb)
{
+ struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
const struct nlattr * const *nla = cb->data;
- struct nft_rule_dump_ctx *ctx = NULL;
- if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) {
- ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
- if (!ctx)
- return -ENOMEM;
+ BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
- if (nla[NFTA_RULE_TABLE]) {
- ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
- GFP_ATOMIC);
- if (!ctx->table) {
- kfree(ctx);
- return -ENOMEM;
- }
- }
- if (nla[NFTA_RULE_CHAIN]) {
- ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
- GFP_ATOMIC);
- if (!ctx->chain) {
- kfree(ctx->table);
- kfree(ctx);
- return -ENOMEM;
- }
+ if (nla[NFTA_RULE_TABLE]) {
+ ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], GFP_ATOMIC);
+ if (!ctx->table)
+ return -ENOMEM;
+ }
+ if (nla[NFTA_RULE_CHAIN]) {
+ ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], GFP_ATOMIC);
+ if (!ctx->chain) {
+ kfree(ctx->table);
+ return -ENOMEM;
}
}
-
- cb->data = ctx;
return 0;
}
+static int nf_tables_dumpreset_rules_start(struct netlink_callback *cb)
+{
+ struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
+
+ ctx->reset = true;
+
+ return nf_tables_dump_rules_start(cb);
+}
+
static int nf_tables_dump_rules_done(struct netlink_callback *cb)
{
- struct nft_rule_dump_ctx *ctx = cb->data;
+ struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
- if (ctx) {
- kfree(ctx->table);
- kfree(ctx->chain);
- kfree(ctx);
- }
+ kfree(ctx->table);
+ kfree(ctx->chain);
return 0;
}
/* called with rcu_read_lock held */
-static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
- const struct nlattr * const nla[])
+static struct sk_buff *
+nf_tables_getrule_single(u32 portid, const struct nfnl_info *info,
+ const struct nlattr * const nla[], bool reset)
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
@@ -3588,57 +3632,110 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
struct net *net = info->net;
struct nft_table *table;
struct sk_buff *skb2;
- bool reset = false;
int err;
- if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
- struct netlink_dump_control c = {
- .start= nf_tables_dump_rules_start,
- .dump = nf_tables_dump_rules,
- .done = nf_tables_dump_rules_done,
- .module = THIS_MODULE,
- .data = (void *)nla,
- };
-
- return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
- }
-
table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
- return PTR_ERR(table);
+ return ERR_CAST(table);
}
chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
- return PTR_ERR(chain);
+ return ERR_CAST(chain);
}
rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
- return PTR_ERR(rule);
+ return ERR_CAST(rule);
}
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
- return -ENOMEM;
-
- if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET)
- reset = true;
+ return ERR_PTR(-ENOMEM);
- err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
+ err = nf_tables_fill_rule_info(skb2, net, portid,
info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
family, table, chain, rule, 0, reset);
- if (err < 0)
- goto err_fill_rule_info;
+ if (err < 0) {
+ kfree_skb(skb2);
+ return ERR_PTR(err);
+ }
- return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+ return skb2;
+}
-err_fill_rule_info:
- kfree_skb(skb2);
- return err;
+static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
+{
+ u32 portid = NETLINK_CB(skb).portid;
+ struct net *net = info->net;
+ struct sk_buff *skb2;
+
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .start= nf_tables_dump_rules_start,
+ .dump = nf_tables_dump_rules,
+ .done = nf_tables_dump_rules_done,
+ .module = THIS_MODULE,
+ .data = (void *)nla,
+ };
+
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+ }
+
+ skb2 = nf_tables_getrule_single(portid, info, nla, false);
+ if (IS_ERR(skb2))
+ return PTR_ERR(skb2);
+
+ return nfnetlink_unicast(skb2, net, portid);
+}
+
+static int nf_tables_getrule_reset(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
+{
+ struct nftables_pernet *nft_net = nft_pernet(info->net);
+ u32 portid = NETLINK_CB(skb).portid;
+ struct net *net = info->net;
+ struct sk_buff *skb2;
+ char *buf;
+
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .start= nf_tables_dumpreset_rules_start,
+ .dump = nf_tables_dumpreset_rules,
+ .done = nf_tables_dump_rules_done,
+ .module = THIS_MODULE,
+ .data = (void *)nla,
+ };
+
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+ }
+
+ if (!try_module_get(THIS_MODULE))
+ return -EINVAL;
+ rcu_read_unlock();
+ mutex_lock(&nft_net->commit_mutex);
+ skb2 = nf_tables_getrule_single(portid, info, nla, true);
+ mutex_unlock(&nft_net->commit_mutex);
+ rcu_read_lock();
+ module_put(THIS_MODULE);
+
+ if (IS_ERR(skb2))
+ return PTR_ERR(skb2);
+
+ buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
+ nla_len(nla[NFTA_RULE_TABLE]),
+ (char *)nla_data(nla[NFTA_RULE_TABLE]),
+ nft_net->base_seq);
+ audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
+ AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
+ kfree(buf);
+
+ return nfnetlink_unicast(skb2, net, portid);
}
void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)
@@ -3721,9 +3818,9 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
struct nft_ctx *pctx = (struct nft_ctx *)ctx;
const struct nft_data *data;
int err;
@@ -3753,7 +3850,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
{
u8 genmask = nft_genmask_next(ctx->net);
struct nft_set_elem_catchall *catchall;
- struct nft_set_elem elem;
struct nft_set_ext *ext;
int ret = 0;
@@ -3762,8 +3858,7 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
if (!nft_set_elem_active(ext, genmask))
continue;
- elem.priv = catchall->elem;
- ret = nft_setelem_validate(ctx, set, NULL, &elem);
+ ret = nft_setelem_validate(ctx, set, NULL, catchall->elem);
if (ret < 0)
return ret;
}
@@ -3951,6 +4046,11 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
+ if (nft_chain_binding(chain)) {
+ err = -EOPNOTSUPP;
+ goto err_destroy_flow_rule;
+ }
+
err = nft_delrule(&ctx, old_rule);
if (err < 0)
goto err_destroy_flow_rule;
@@ -4058,7 +4158,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
}
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
return -EOPNOTSUPP;
}
@@ -4092,7 +4192,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_active_next(net, chain))
continue;
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
continue;
ctx.chain = chain;
@@ -4219,12 +4319,16 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 },
[NFTA_SET_HANDLE] = { .type = NLA_U64 },
[NFTA_SET_EXPR] = { .type = NLA_NESTED },
- [NFTA_SET_EXPRESSIONS] = { .type = NLA_NESTED },
+ [NFTA_SET_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
+};
+
+static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = {
+ [NFTA_SET_FIELD_LEN] = { .type = NLA_U32 },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
[NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
- [NFTA_SET_DESC_CONCAT] = { .type = NLA_NESTED },
+ [NFTA_SET_DESC_CONCAT] = NLA_POLICY_NESTED_ARRAY(nft_concat_policy),
};
static struct nft_set *nft_set_lookup(const struct nft_table *table,
@@ -4310,6 +4414,9 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
if (p[1] != 'd' || strchr(p + 2, '%'))
return -EINVAL;
+ if (strnlen(name, NFT_SET_MAX_ANONLEN) >= NFT_SET_MAX_ANONLEN)
+ return -EINVAL;
+
inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
if (inuse == NULL)
return -ENOMEM;
@@ -4660,8 +4767,10 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
return -EINVAL;
set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
- if (IS_ERR(set))
+ if (IS_ERR(set)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return PTR_ERR(set);
+ }
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (skb2 == NULL)
@@ -4678,10 +4787,6 @@ err_fill_set_info:
return err;
}
-static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = {
- [NFTA_SET_FIELD_LEN] = { .type = NLA_U32 },
-};
-
static int nft_set_desc_concat_parse(const struct nlattr *attr,
struct nft_set_desc *desc)
{
@@ -4712,8 +4817,8 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr,
static int nft_set_desc_concat(struct nft_set_desc *desc,
const struct nlattr *nla)
{
+ u32 num_regs = 0, key_num_regs = 0;
struct nlattr *attr;
- u32 num_regs = 0;
int rem, err, i;
nla_for_each_nested(attr, nla, rem) {
@@ -4728,6 +4833,10 @@ static int nft_set_desc_concat(struct nft_set_desc *desc,
for (i = 0; i < desc->field_count; i++)
num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32));
+ key_num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32));
+ if (key_num_regs != num_regs)
+ return -EINVAL;
+
if (num_regs > NFT_REG32_COUNT)
return -E2BIG;
@@ -4949,16 +5058,28 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
}
desc.policy = NFT_SET_POL_PERFORMANCE;
- if (nla[NFTA_SET_POLICY] != NULL)
+ if (nla[NFTA_SET_POLICY] != NULL) {
desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+ switch (desc.policy) {
+ case NFT_SET_POL_PERFORMANCE:
+ case NFT_SET_POL_MEMORY:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
if (nla[NFTA_SET_DESC] != NULL) {
err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
if (err < 0)
return err;
- if (desc.field_count > 1 && !(flags & NFT_SET_CONCAT))
+ if (desc.field_count > 1) {
+ if (!(flags & NFT_SET_CONCAT))
+ return -EINVAL;
+ } else if (flags & NFT_SET_CONCAT) {
return -EINVAL;
+ }
} else if (flags & NFT_SET_CONCAT) {
return -EINVAL;
}
@@ -5208,9 +5329,9 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
static int nft_setelem_data_validate(const struct nft_ctx *ctx,
struct nft_set *set,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
enum nft_registers dreg;
dreg = nft_type_to_reg(set->dtype);
@@ -5223,9 +5344,9 @@ static int nft_setelem_data_validate(const struct nft_ctx *ctx,
static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- return nft_setelem_data_validate(ctx, set, elem);
+ return nft_setelem_data_validate(ctx, set, elem_priv);
}
static int nft_set_catchall_bind_check(const struct nft_ctx *ctx,
@@ -5233,7 +5354,6 @@ static int nft_set_catchall_bind_check(const struct nft_ctx *ctx,
{
u8 genmask = nft_genmask_next(ctx->net);
struct nft_set_elem_catchall *catchall;
- struct nft_set_elem elem;
struct nft_set_ext *ext;
int ret = 0;
@@ -5242,8 +5362,7 @@ static int nft_set_catchall_bind_check(const struct nft_ctx *ctx,
if (!nft_set_elem_active(ext, genmask))
continue;
- elem.priv = catchall->elem;
- ret = nft_setelem_data_validate(ctx, set, &elem);
+ ret = nft_setelem_data_validate(ctx, set, catchall->elem);
if (ret < 0)
break;
}
@@ -5310,14 +5429,14 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
static void nft_setelem_data_activate(const struct net *net,
const struct nft_set *set,
- struct nft_set_elem *elem);
+ struct nft_elem_priv *elem_priv);
static int nft_mapelem_activate(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- nft_setelem_data_activate(ctx->net, set, elem);
+ nft_setelem_data_activate(ctx->net, set, elem_priv);
return 0;
}
@@ -5327,7 +5446,6 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx,
{
u8 genmask = nft_genmask_next(ctx->net);
struct nft_set_elem_catchall *catchall;
- struct nft_set_elem elem;
struct nft_set_ext *ext;
list_for_each_entry(catchall, &set->catchall_list, list) {
@@ -5335,8 +5453,7 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx,
if (!nft_set_elem_active(ext, genmask))
continue;
- elem.priv = catchall->elem;
- nft_setelem_data_activate(ctx->net, set, &elem);
+ nft_setelem_data_activate(ctx->net, set, catchall->elem);
break;
}
}
@@ -5463,7 +5580,7 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
[NFTA_SET_ELEM_OBJREF] = { .type = NLA_STRING,
.len = NFT_OBJ_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_KEY_END] = { .type = NLA_NESTED },
- [NFTA_SET_ELEM_EXPRESSIONS] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy),
};
static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
@@ -5471,7 +5588,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
.len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING,
.len = NFT_SET_MAXNAMELEN - 1 },
- [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_LIST_ELEMENTS] = NLA_POLICY_NESTED_ARRAY(nft_set_elem_policy),
[NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
@@ -5515,13 +5632,12 @@ nla_put_failure:
static int nf_tables_fill_setelem(struct sk_buff *skb,
const struct nft_set *set,
- const struct nft_set_elem *elem,
+ const struct nft_elem_priv *elem_priv,
bool reset)
{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
- u64 timeout = 0;
nest = nla_nest_start_noflag(skb, NFTA_LIST_ELEM);
if (nest == NULL)
@@ -5557,15 +5673,11 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
htonl(*nft_set_ext_flags(ext))))
goto nla_put_failure;
- if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) {
- timeout = *nft_set_ext_timeout(ext);
- if (nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
- nf_jiffies64_to_msecs(timeout),
- NFTA_SET_ELEM_PAD))
- goto nla_put_failure;
- } else if (set->flags & NFT_SET_TIMEOUT) {
- timeout = READ_ONCE(set->timeout);
- }
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
+ nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
+ nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)),
+ NFTA_SET_ELEM_PAD))
+ goto nla_put_failure;
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
u64 expires, now = get_jiffies_64();
@@ -5580,9 +5692,6 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
nf_jiffies64_to_msecs(expires),
NFTA_SET_ELEM_PAD))
goto nla_put_failure;
-
- if (reset)
- *nft_set_ext_expiration(ext) = now + timeout;
}
if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
@@ -5612,29 +5721,41 @@ struct nft_set_dump_args {
static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
struct nft_set_dump_args *args;
- if (nft_set_elem_expired(ext))
+ if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext))
return 0;
args = container_of(iter, struct nft_set_dump_args, iter);
- return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
+ return nf_tables_fill_setelem(args->skb, set, elem_priv, args->reset);
+}
+
+static void audit_log_nft_set_reset(const struct nft_table *table,
+ unsigned int base_seq,
+ unsigned int nentries)
+{
+ char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, base_seq);
+
+ audit_log_nfcfg(buf, table->family, nentries,
+ AUDIT_NFT_OP_SETELEM_RESET, GFP_ATOMIC);
+ kfree(buf);
}
struct nft_set_dump_ctx {
const struct nft_set *set;
struct nft_ctx ctx;
+ bool reset;
};
static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
- const struct nft_set *set, bool reset)
+ const struct nft_set *set, bool reset,
+ unsigned int base_seq)
{
struct nft_set_elem_catchall *catchall;
u8 genmask = nft_genmask_cur(net);
- struct nft_set_elem elem;
struct nft_set_ext *ext;
int ret = 0;
@@ -5644,8 +5765,9 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
nft_set_elem_expired(ext))
continue;
- elem.priv = catchall->elem;
- ret = nf_tables_fill_setelem(skb, set, &elem, reset);
+ ret = nf_tables_fill_setelem(skb, set, catchall->elem, reset);
+ if (reset && !ret)
+ audit_log_nft_set_reset(set->table, base_seq, 1);
break;
}
@@ -5663,7 +5785,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
bool set_found = false;
struct nlmsghdr *nlh;
struct nlattr *nest;
- bool reset = false;
u32 portid, seq;
int event;
@@ -5711,12 +5832,9 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
if (nest == NULL)
goto nla_put_failure;
- if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET)
- reset = true;
-
args.cb = cb;
args.skb = skb;
- args.reset = reset;
+ args.reset = dump_ctx->reset;
args.iter.genmask = nft_genmask_cur(net);
args.iter.skip = cb->args[0];
args.iter.count = 0;
@@ -5725,12 +5843,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
set->ops->walk(&dump_ctx->ctx, set, &args.iter);
if (!args.iter.err && args.iter.count == cb->args[0])
- args.iter.err = nft_set_catchall_dump(net, skb, set, reset);
- rcu_read_unlock();
-
+ args.iter.err = nft_set_catchall_dump(net, skb, set,
+ dump_ctx->reset, cb->seq);
nla_nest_end(skb, nest);
nlmsg_end(skb, nlh);
+ rcu_read_unlock();
+
if (args.iter.err && args.iter.err != -EMSGSIZE)
return args.iter.err;
if (args.iter.count == cb->args[0])
@@ -5744,6 +5863,26 @@ nla_put_failure:
return -ENOSPC;
}
+static int nf_tables_dumpreset_set(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk));
+ struct nft_set_dump_ctx *dump_ctx = cb->data;
+ int ret, skip = cb->args[0];
+
+ mutex_lock(&nft_net->commit_mutex);
+
+ ret = nf_tables_dump_set(skb, cb);
+
+ if (cb->args[0] > skip)
+ audit_log_nft_set_reset(dump_ctx->ctx.table, cb->seq,
+ cb->args[0] - skip);
+
+ mutex_unlock(&nft_net->commit_mutex);
+
+ return ret;
+}
+
static int nf_tables_dump_set_start(struct netlink_callback *cb)
{
struct nft_set_dump_ctx *dump_ctx = cb->data;
@@ -5763,7 +5902,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
const struct nft_ctx *ctx, u32 seq,
u32 portid, int event, u16 flags,
const struct nft_set *set,
- const struct nft_set_elem *elem,
+ const struct nft_elem_priv *elem_priv,
bool reset)
{
struct nlmsghdr *nlh;
@@ -5785,7 +5924,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
if (nest == NULL)
goto nla_put_failure;
- err = nf_tables_fill_setelem(skb, set, elem, reset);
+ err = nf_tables_fill_setelem(skb, set, elem_priv, reset);
if (err < 0)
goto nla_put_failure;
@@ -5818,7 +5957,7 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
return 0;
}
-static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set,
+static int nft_setelem_parse_key(struct nft_ctx *ctx, const struct nft_set *set,
struct nft_data *key, struct nlattr *attr)
{
struct nft_data_desc desc = {
@@ -5871,7 +6010,7 @@ static void *nft_setelem_catchall_get(const struct net *net,
return priv;
}
-static int nft_setelem_get(struct nft_ctx *ctx, struct nft_set *set,
+static int nft_setelem_get(struct nft_ctx *ctx, const struct nft_set *set,
struct nft_set_elem *elem, u32 flags)
{
void *priv;
@@ -5890,7 +6029,7 @@ static int nft_setelem_get(struct nft_ctx *ctx, struct nft_set *set,
return 0;
}
-static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+static int nft_get_set_elem(struct nft_ctx *ctx, const struct nft_set *set,
const struct nlattr *attr, bool reset)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
@@ -5935,7 +6074,7 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return err;
err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid,
- NFT_MSG_NEWSETELEM, 0, set, &elem,
+ NFT_MSG_NEWSETELEM, 0, set, elem.priv,
reset);
if (err < 0)
goto err_fill_setelem;
@@ -5947,10 +6086,11 @@ err_fill_setelem:
return err;
}
-/* called with rcu_read_lock held */
-static int nf_tables_getsetelem(struct sk_buff *skb,
- const struct nfnl_info *info,
- const struct nlattr * const nla[])
+static int nft_set_dump_ctx_init(struct nft_set_dump_ctx *dump_ctx,
+ const struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[],
+ bool reset)
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
@@ -5958,10 +6098,6 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
struct net *net = info->net;
struct nft_table *table;
struct nft_set *set;
- struct nlattr *attr;
- struct nft_ctx ctx;
- bool reset = false;
- int rem, err = 0;
table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
genmask, 0);
@@ -5971,10 +6107,27 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
}
set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
- if (IS_ERR(set))
+ if (IS_ERR(set)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
+ }
- nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ nft_ctx_init(&dump_ctx->ctx, net, skb,
+ info->nlh, family, table, NULL, nla);
+ dump_ctx->set = set;
+ dump_ctx->reset = reset;
+ return 0;
+}
+
+/* called with rcu_read_lock held */
+static int nf_tables_getsetelem(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct nft_set_dump_ctx dump_ctx;
+ struct nlattr *attr;
+ int rem, err = 0;
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -5983,11 +6136,55 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
.done = nf_tables_dump_set_done,
.module = THIS_MODULE,
};
- struct nft_set_dump_ctx dump_ctx = {
- .set = set,
- .ctx = ctx,
+
+ err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false);
+ if (err)
+ return err;
+
+ c.data = &dump_ctx;
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+ }
+
+ if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
+ return -EINVAL;
+
+ err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false);
+ if (err)
+ return err;
+
+ nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+ err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, false);
+ if (err < 0) {
+ NL_SET_BAD_ATTR(extack, attr);
+ break;
+ }
+ }
+
+ return err;
+}
+
+static int nf_tables_getsetelem_reset(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
+{
+ struct nftables_pernet *nft_net = nft_pernet(info->net);
+ struct netlink_ext_ack *extack = info->extack;
+ struct nft_set_dump_ctx dump_ctx;
+ int rem, err = 0, nelems = 0;
+ struct nlattr *attr;
+
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .start = nf_tables_dump_set_start,
+ .dump = nf_tables_dumpreset_set,
+ .done = nf_tables_dump_set_done,
+ .module = THIS_MODULE,
};
+ err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true);
+ if (err)
+ return err;
+
c.data = &dump_ctx;
return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
@@ -5995,23 +6192,38 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
return -EINVAL;
- if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET)
- reset = true;
+ if (!try_module_get(THIS_MODULE))
+ return -EINVAL;
+ rcu_read_unlock();
+ mutex_lock(&nft_net->commit_mutex);
+ rcu_read_lock();
+
+ err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true);
+ if (err)
+ goto out_unlock;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
- err = nft_get_set_elem(&ctx, set, attr, reset);
+ err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, true);
if (err < 0) {
NL_SET_BAD_ATTR(extack, attr);
break;
}
+ nelems++;
}
+ audit_log_nft_set_reset(dump_ctx.ctx.table, nft_net->base_seq, nelems);
+
+out_unlock:
+ rcu_read_unlock();
+ mutex_unlock(&nft_net->commit_mutex);
+ rcu_read_lock();
+ module_put(THIS_MODULE);
return err;
}
static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
const struct nft_set *set,
- const struct nft_set_elem *elem,
+ const struct nft_elem_priv *elem_priv,
int event)
{
struct nftables_pernet *nft_net;
@@ -6032,7 +6244,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
- set, elem, false);
+ set, elem_priv, false);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -6107,10 +6319,11 @@ static int nft_set_ext_memcpy(const struct nft_set_ext_tmpl *tmpl, u8 id,
return 0;
}
-void *nft_set_elem_init(const struct nft_set *set,
- const struct nft_set_ext_tmpl *tmpl,
- const u32 *key, const u32 *key_end,
- const u32 *data, u64 timeout, u64 expiration, gfp_t gfp)
+struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set,
+ const struct nft_set_ext_tmpl *tmpl,
+ const u32 *key, const u32 *key_end,
+ const u32 *data,
+ u64 timeout, u64 expiration, gfp_t gfp)
{
struct nft_set_ext *ext;
void *elem;
@@ -6175,10 +6388,11 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
}
/* Drop references and destroy. Called from gc, dynset and abort path. */
-void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+void nft_set_elem_destroy(const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv,
bool destroy_expr)
{
- struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
struct nft_ctx ctx = {
.net = read_pnet(&set->net),
.family = set->table->family,
@@ -6189,10 +6403,10 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS))
nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
-
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
nft_use_dec(&(*nft_set_ext_obj(ext))->use);
- kfree(elem);
+
+ kfree(elem_priv);
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
@@ -6200,14 +6414,15 @@ EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
* path via nft_setelem_data_deactivate().
*/
void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
- const struct nft_set *set, void *elem)
+ const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv)
{
- struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS))
nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext));
- kfree(elem);
+ kfree(elem_priv);
}
int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
@@ -6302,7 +6517,7 @@ EXPORT_SYMBOL_GPL(nft_set_catchall_lookup);
static int nft_setelem_catchall_insert(const struct net *net,
struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **pext)
+ struct nft_elem_priv **priv)
{
struct nft_set_elem_catchall *catchall;
u8 genmask = nft_genmask_next(net);
@@ -6311,7 +6526,7 @@ static int nft_setelem_catchall_insert(const struct net *net,
list_for_each_entry(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
if (nft_set_elem_active(ext, genmask)) {
- *pext = ext;
+ *priv = catchall->elem;
return -EEXIST;
}
}
@@ -6329,22 +6544,23 @@ static int nft_setelem_catchall_insert(const struct net *net,
static int nft_setelem_insert(const struct net *net,
struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext, unsigned int flags)
+ struct nft_elem_priv **elem_priv,
+ unsigned int flags)
{
int ret;
if (flags & NFT_SET_ELEM_CATCHALL)
- ret = nft_setelem_catchall_insert(net, set, elem, ext);
+ ret = nft_setelem_catchall_insert(net, set, elem, elem_priv);
else
- ret = set->ops->insert(net, set, elem, ext);
+ ret = set->ops->insert(net, set, elem, elem_priv);
return ret;
}
static bool nft_setelem_is_catchall(const struct nft_set *set,
- const struct nft_set_elem *elem)
+ const struct nft_elem_priv *elem_priv)
{
- struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_CATCHALL)
@@ -6354,14 +6570,14 @@ static bool nft_setelem_is_catchall(const struct nft_set *set,
}
static void nft_setelem_activate(struct net *net, struct nft_set *set,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
- if (nft_setelem_is_catchall(set, elem)) {
+ if (nft_setelem_is_catchall(set, elem_priv)) {
nft_set_elem_change_active(net, set, ext);
} else {
- set->ops->activate(net, set, elem);
+ set->ops->activate(net, set, elem_priv);
}
}
@@ -6374,7 +6590,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net,
list_for_each_entry(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_is_active(net, ext))
+ if (!nft_is_active_next(net, ext))
continue;
kfree(elem->priv);
@@ -6417,16 +6633,21 @@ static int nft_setelem_deactivate(const struct net *net,
return ret;
}
+static void nft_setelem_catchall_destroy(struct nft_set_elem_catchall *catchall)
+{
+ list_del_rcu(&catchall->list);
+ kfree_rcu(catchall, rcu);
+}
+
static void nft_setelem_catchall_remove(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
struct nft_set_elem_catchall *catchall, *next;
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
- if (catchall->elem == elem->priv) {
- list_del_rcu(&catchall->list);
- kfree_rcu(catchall, rcu);
+ if (catchall->elem == elem_priv) {
+ nft_setelem_catchall_destroy(catchall);
break;
}
}
@@ -6434,12 +6655,12 @@ static void nft_setelem_catchall_remove(const struct net *net,
static void nft_setelem_remove(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- if (nft_setelem_is_catchall(set, elem))
- nft_setelem_catchall_remove(net, set, elem);
+ if (nft_setelem_is_catchall(set, elem_priv))
+ nft_setelem_catchall_remove(net, set, elem_priv);
else
- set->ops->remove(net, set, elem);
+ set->ops->remove(net, set, elem_priv);
}
static bool nft_setelem_valid_key_end(const struct nft_set *set,
@@ -6472,13 +6693,14 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_ext *ext, *ext2;
struct nft_set_elem elem;
struct nft_set_binding *binding;
+ struct nft_elem_priv *elem_priv;
struct nft_object *obj = NULL;
struct nft_userdata *udata;
struct nft_data_desc desc;
enum nft_registers dreg;
struct nft_trans *trans;
- u64 timeout;
u64 expiration;
+ u64 timeout;
int err, i;
u8 ulen;
@@ -6771,9 +6993,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
ext->genmask = nft_genmask_cur(ctx->net);
- err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags);
+ err = nft_setelem_insert(ctx->net, set, &elem, &elem_priv, flags);
if (err) {
if (err == -EEXIST) {
+ ext2 = nft_set_elem_ext(set, elem_priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
@@ -6807,12 +7030,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
}
- nft_trans_elem(trans) = elem;
+ nft_trans_elem_priv(trans) = elem.priv;
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
err_set_full:
- nft_setelem_remove(ctx->net, set, &elem);
+ nft_setelem_remove(ctx->net, set, elem.priv);
err_element_clash:
kfree(trans);
err_elem_free:
@@ -6860,8 +7083,10 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
set = nft_set_lookup_global(net, table, nla[NFTA_SET_ELEM_LIST_SET],
nla[NFTA_SET_ELEM_LIST_SET_ID], genmask);
- if (IS_ERR(set))
+ if (IS_ERR(set)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
+ }
if (!list_empty(&set->bindings) &&
(set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
@@ -6911,9 +7136,9 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
static void nft_setelem_data_activate(const struct net *net,
const struct nft_set *set,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_hold(nft_set_ext_data(ext), set->dtype);
@@ -6923,9 +7148,9 @@ static void nft_setelem_data_activate(const struct net *net,
void nft_setelem_data_deactivate(const struct net *net,
const struct nft_set *set,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
@@ -7010,9 +7235,9 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto fail_ops;
- nft_setelem_data_deactivate(ctx->net, set, &elem);
+ nft_setelem_data_deactivate(ctx->net, set, elem.priv);
- nft_trans_elem(trans) = elem;
+ nft_trans_elem_priv(trans) = elem.priv;
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
@@ -7030,36 +7255,29 @@ fail_elem:
static int nft_setelem_flush(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
struct nft_trans *trans;
- int err;
trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
sizeof(struct nft_trans_elem), GFP_ATOMIC);
if (!trans)
return -ENOMEM;
- if (!set->ops->flush(ctx->net, set, elem->priv)) {
- err = -ENOENT;
- goto err1;
- }
+ set->ops->flush(ctx->net, set, elem_priv);
set->ndeact++;
- nft_setelem_data_deactivate(ctx->net, set, elem);
+ nft_setelem_data_deactivate(ctx->net, set, elem_priv);
nft_trans_elem_set(trans) = set;
- nft_trans_elem(trans) = *elem;
+ nft_trans_elem_priv(trans) = elem_priv;
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
-err1:
- kfree(trans);
- return err;
}
static int __nft_set_catchall_flush(const struct nft_ctx *ctx,
struct nft_set *set,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
struct nft_trans *trans;
@@ -7068,9 +7286,9 @@ static int __nft_set_catchall_flush(const struct nft_ctx *ctx,
if (!trans)
return -ENOMEM;
- nft_setelem_data_deactivate(ctx->net, set, elem);
+ nft_setelem_data_deactivate(ctx->net, set, elem_priv);
nft_trans_elem_set(trans) = set;
- nft_trans_elem(trans) = *elem;
+ nft_trans_elem_priv(trans) = elem_priv;
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
@@ -7081,7 +7299,6 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
{
u8 genmask = nft_genmask_next(ctx->net);
struct nft_set_elem_catchall *catchall;
- struct nft_set_elem elem;
struct nft_set_ext *ext;
int ret = 0;
@@ -7090,8 +7307,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
if (!nft_set_elem_active(ext, genmask))
continue;
- elem.priv = catchall->elem;
- ret = __nft_set_catchall_flush(ctx, set, &elem);
+ ret = __nft_set_catchall_flush(ctx, set, catchall->elem);
if (ret < 0)
break;
nft_set_elem_change_active(ctx->net, set, ext);
@@ -7136,11 +7352,15 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
}
set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
- if (IS_ERR(set))
+ if (IS_ERR(set)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
+ }
- if (!list_empty(&set->bindings) &&
- (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
+ if (nft_set_is_anonymous(set))
+ return -EOPNOTSUPP;
+
+ if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT))
return -EBUSY;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
@@ -7156,10 +7376,11 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
if (err < 0) {
NL_SET_BAD_ATTR(extack, attr);
- break;
+ return err;
}
}
- return err;
+
+ return 0;
}
/*
@@ -7330,11 +7551,15 @@ nla_put_failure:
return -1;
}
-static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
+static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family)
{
const struct nft_object_type *type;
list_for_each_entry(type, &nf_tables_objects, list) {
+ if (type->family != NFPROTO_UNSPEC &&
+ type->family != family)
+ continue;
+
if (objtype == type->type)
return type;
}
@@ -7342,11 +7567,11 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
}
static const struct nft_object_type *
-nft_obj_type_get(struct net *net, u32 objtype)
+nft_obj_type_get(struct net *net, u32 objtype, u8 family)
{
const struct nft_object_type *type;
- type = __nft_obj_type_get(objtype);
+ type = __nft_obj_type_get(objtype, family);
if (type != NULL && try_module_get(type->owner))
return type;
@@ -7439,7 +7664,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- type = __nft_obj_type_get(objtype);
+ type = __nft_obj_type_get(objtype, family);
if (WARN_ON_ONCE(!type))
return -ENOENT;
@@ -7453,7 +7678,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (!nft_use_inc(&table->use))
return -EMFILE;
- type = nft_obj_type_get(net, objtype);
+ type = nft_obj_type_get(net, objtype, family);
if (IS_ERR(type)) {
err = PTR_ERR(type);
goto err_type;
@@ -7554,25 +7779,35 @@ nla_put_failure:
return -1;
}
-struct nft_obj_filter {
+static void audit_log_obj_reset(const struct nft_table *table,
+ unsigned int base_seq, unsigned int nentries)
+{
+ char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, base_seq);
+
+ audit_log_nfcfg(buf, table->family, nentries,
+ AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
+ kfree(buf);
+}
+
+struct nft_obj_dump_ctx {
+ unsigned int s_idx;
char *table;
u32 type;
+ bool reset;
};
static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- const struct nft_table *table;
- unsigned int idx = 0, s_idx = cb->args[0];
- struct nft_obj_filter *filter = cb->data;
+ struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nftables_pernet *nft_net;
+ const struct nft_table *table;
+ unsigned int entries = 0;
struct nft_object *obj;
- bool reset = false;
-
- if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
- reset = true;
+ unsigned int idx = 0;
+ int rc = 0;
rcu_read_lock();
nft_net = nft_pernet(net);
@@ -7582,89 +7817,71 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
if (family != NFPROTO_UNSPEC && family != table->family)
continue;
+ entries = 0;
list_for_each_entry_rcu(obj, &table->objects, list) {
if (!nft_is_active(net, obj))
goto cont;
- if (idx < s_idx)
+ if (idx < ctx->s_idx)
goto cont;
- if (idx > s_idx)
- memset(&cb->args[1], 0,
- sizeof(cb->args) - sizeof(cb->args[0]));
- if (filter && filter->table &&
- strcmp(filter->table, table->name))
+ if (ctx->table && strcmp(ctx->table, table->name))
goto cont;
- if (filter &&
- filter->type != NFT_OBJECT_UNSPEC &&
- obj->ops->type->type != filter->type)
+ if (ctx->type != NFT_OBJECT_UNSPEC &&
+ obj->ops->type->type != ctx->type)
goto cont;
- if (reset) {
- char *buf = kasprintf(GFP_ATOMIC,
- "%s:%u",
- table->name,
- nft_net->base_seq);
-
- audit_log_nfcfg(buf,
- family,
- obj->handle,
- AUDIT_NFT_OP_OBJ_RESET,
- GFP_ATOMIC);
- kfree(buf);
- }
- if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFT_MSG_NEWOBJ,
- NLM_F_MULTI | NLM_F_APPEND,
- table->family, table,
- obj, reset) < 0)
- goto done;
+ rc = nf_tables_fill_obj_info(skb, net,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWOBJ,
+ NLM_F_MULTI | NLM_F_APPEND,
+ table->family, table,
+ obj, ctx->reset);
+ if (rc < 0)
+ break;
+ entries++;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
+ if (ctx->reset && entries)
+ audit_log_obj_reset(table, nft_net->base_seq, entries);
+ if (rc < 0)
+ break;
}
-done:
rcu_read_unlock();
- cb->args[0] = idx;
+ ctx->s_idx = idx;
return skb->len;
}
static int nf_tables_dump_obj_start(struct netlink_callback *cb)
{
+ struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
const struct nlattr * const *nla = cb->data;
- struct nft_obj_filter *filter = NULL;
- if (nla[NFTA_OBJ_TABLE] || nla[NFTA_OBJ_TYPE]) {
- filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
- if (!filter)
+ BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+
+ if (nla[NFTA_OBJ_TABLE]) {
+ ctx->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
+ if (!ctx->table)
return -ENOMEM;
+ }
- if (nla[NFTA_OBJ_TABLE]) {
- filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
- if (!filter->table) {
- kfree(filter);
- return -ENOMEM;
- }
- }
+ if (nla[NFTA_OBJ_TYPE])
+ ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
- if (nla[NFTA_OBJ_TYPE])
- filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
- }
+ if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+ ctx->reset = true;
- cb->data = filter;
return 0;
}
static int nf_tables_dump_obj_done(struct netlink_callback *cb)
{
- struct nft_obj_filter *filter = cb->data;
+ struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
- if (filter) {
- kfree(filter->table);
- kfree(filter);
- }
+ kfree(ctx->table);
return 0;
}
@@ -7729,7 +7946,7 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
audit_log_nfcfg(buf,
family,
- obj->handle,
+ 1,
AUDIT_NFT_OP_OBJ_RESET,
GFP_ATOMIC);
kfree(buf);
@@ -7810,24 +8027,14 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
return nft_delobj(&ctx, obj);
}
-void nft_obj_notify(struct net *net, const struct nft_table *table,
- struct nft_object *obj, u32 portid, u32 seq, int event,
- u16 flags, int family, int report, gfp_t gfp)
+static void
+__nft_obj_notify(struct net *net, const struct nft_table *table,
+ struct nft_object *obj, u32 portid, u32 seq, int event,
+ u16 flags, int family, int report, gfp_t gfp)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct sk_buff *skb;
int err;
- char *buf = kasprintf(gfp, "%s:%u",
- table->name, nft_net->base_seq);
-
- audit_log_nfcfg(buf,
- family,
- obj->handle,
- event == NFT_MSG_NEWOBJ ?
- AUDIT_NFT_OP_OBJ_REGISTER :
- AUDIT_NFT_OP_OBJ_UNREGISTER,
- gfp);
- kfree(buf);
if (!report &&
!nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
@@ -7850,13 +8057,35 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
err:
nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
+
+void nft_obj_notify(struct net *net, const struct nft_table *table,
+ struct nft_object *obj, u32 portid, u32 seq, int event,
+ u16 flags, int family, int report, gfp_t gfp)
+{
+ struct nftables_pernet *nft_net = nft_pernet(net);
+ char *buf = kasprintf(gfp, "%s:%u",
+ table->name, nft_net->base_seq);
+
+ audit_log_nfcfg(buf,
+ family,
+ obj->handle,
+ event == NFT_MSG_NEWOBJ ?
+ AUDIT_NFT_OP_OBJ_REGISTER :
+ AUDIT_NFT_OP_OBJ_UNREGISTER,
+ gfp);
+ kfree(buf);
+
+ __nft_obj_notify(net, table, obj, portid, seq, event,
+ flags, family, report, gfp);
+}
EXPORT_SYMBOL_GPL(nft_obj_notify);
static void nf_tables_obj_notify(const struct nft_ctx *ctx,
struct nft_object *obj, int event)
{
- nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
- ctx->flags, ctx->family, ctx->report, GFP_KERNEL);
+ __nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid,
+ ctx->seq, event, ctx->flags, ctx->family,
+ ctx->report, GFP_KERNEL);
}
/*
@@ -8619,6 +8848,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const nla[])
{
+ struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
u8 family = info->nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
@@ -8644,13 +8874,17 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
genmask, 0);
- if (IS_ERR(table))
+ if (IS_ERR(table)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
return PTR_ERR(table);
+ }
flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
genmask);
- if (IS_ERR(flowtable))
+ if (IS_ERR(flowtable)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
return PTR_ERR(flowtable);
+ }
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
@@ -8906,7 +9140,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_rule_policy,
},
[NFT_MSG_GETRULE_RESET] = {
- .call = nf_tables_getrule,
+ .call = nf_tables_getrule_reset,
.type = NFNL_CB_RCU,
.attr_count = NFTA_RULE_MAX,
.policy = nft_rule_policy,
@@ -8960,7 +9194,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_set_elem_list_policy,
},
[NFT_MSG_GETSETELEM_RESET] = {
- .call = nf_tables_getsetelem,
+ .call = nf_tables_getsetelem_reset,
.type = NFNL_CB_RCU,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
@@ -9156,7 +9390,7 @@ static void nft_commit_release(struct nft_trans *trans)
case NFT_MSG_DESTROYSETELEM:
nf_tables_set_elem_destroy(&trans->ctx,
nft_trans_elem_set(trans),
- nft_trans_elem(trans).priv);
+ nft_trans_elem_priv(trans));
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
@@ -9385,16 +9619,12 @@ void nft_chain_del(struct nft_chain *chain)
static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
struct nft_trans_gc *trans)
{
- void **priv = trans->priv;
+ struct nft_elem_priv **priv = trans->priv;
unsigned int i;
for (i = 0; i < trans->count; i++) {
- struct nft_set_elem elem = {
- .priv = priv[i],
- };
-
- nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
- nft_setelem_remove(ctx->net, trans->set, &elem);
+ nft_setelem_data_deactivate(ctx->net, trans->set, priv[i]);
+ nft_setelem_remove(ctx->net, trans->set, priv[i]);
}
}
@@ -9407,7 +9637,7 @@ void nft_trans_gc_destroy(struct nft_trans_gc *trans)
static void nft_trans_gc_trans_free(struct rcu_head *rcu)
{
- struct nft_set_elem elem = {};
+ struct nft_elem_priv *elem_priv;
struct nft_trans_gc *trans;
struct nft_ctx ctx = {};
unsigned int i;
@@ -9416,11 +9646,11 @@ static void nft_trans_gc_trans_free(struct rcu_head *rcu)
ctx.net = read_pnet(&trans->set->net);
for (i = 0; i < trans->count; i++) {
- elem.priv = trans->priv[i];
- if (!nft_setelem_is_catchall(trans->set, &elem))
+ elem_priv = trans->priv[i];
+ if (!nft_setelem_is_catchall(trans->set, elem_priv))
atomic_dec(&trans->set->nelems);
- nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
+ nf_tables_set_elem_destroy(&ctx, trans->set, elem_priv);
}
nft_trans_gc_destroy(trans);
@@ -9518,12 +9748,15 @@ static int nft_trans_gc_space(struct nft_trans_gc *trans)
struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
unsigned int gc_seq, gfp_t gfp)
{
+ struct nft_set *set;
+
if (nft_trans_gc_space(gc))
return gc;
+ set = gc->set;
nft_trans_gc_queue_work(gc);
- return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
+ return nft_trans_gc_alloc(set, gc_seq, gfp);
}
void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
@@ -9538,15 +9771,18 @@ void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
{
+ struct nft_set *set;
+
if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
return NULL;
if (nft_trans_gc_space(gc))
return gc;
+ set = gc->set;
call_rcu(&gc->rcu, nft_trans_gc_trans_free);
- return nft_trans_gc_alloc(gc->set, 0, gfp);
+ return nft_trans_gc_alloc(set, 0, gfp);
}
void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
@@ -9561,8 +9797,8 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
call_rcu(&trans->rcu, nft_trans_gc_trans_free);
}
-struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
- unsigned int gc_seq)
+struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq)
{
struct nft_set_elem_catchall *catchall;
const struct nft_set *set = gc->set;
@@ -9588,6 +9824,35 @@ dead_elem:
return gc;
}
+struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
+{
+ struct nft_set_elem_catchall *catchall, *next;
+ u64 tstamp = nft_net_tstamp(gc->net);
+ const struct nft_set *set = gc->set;
+ struct nft_elem_priv *elem_priv;
+ struct nft_set_ext *ext;
+
+ WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net));
+
+ list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+
+ if (!__nft_set_elem_expired(ext, tstamp))
+ continue;
+
+ gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
+ if (!gc)
+ return NULL;
+
+ elem_priv = catchall->elem;
+ nft_setelem_data_deactivate(gc->net, gc->set, elem_priv);
+ nft_setelem_catchall_destroy(catchall);
+ nft_trans_gc_elem_add(gc, elem_priv);
+ }
+
+ return gc;
+}
+
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -9739,7 +10004,7 @@ static void nft_set_commit_update(struct list_head *set_update_list)
list_for_each_entry_safe(set, next, set_update_list, pending_update) {
list_del_init(&set->pending_update);
- if (!set->ops->commit)
+ if (!set->ops->commit || set->dead)
continue;
set->ops->commit(set);
@@ -9966,9 +10231,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWSETELEM:
te = (struct nft_trans_elem *)trans->data;
- nft_setelem_activate(net, te->set, &te->elem);
+ nft_setelem_activate(net, te->set, te->elem_priv);
nf_tables_setelem_notify(&trans->ctx, te->set,
- &te->elem,
+ te->elem_priv,
NFT_MSG_NEWSETELEM);
if (te->set->ops->commit &&
list_empty(&te->set->pending_update)) {
@@ -9982,10 +10247,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
te = (struct nft_trans_elem *)trans->data;
nf_tables_setelem_notify(&trans->ctx, te->set,
- &te->elem,
+ te->elem_priv,
trans->msg_type);
- nft_setelem_remove(net, te->set, &te->elem);
- if (!nft_setelem_is_catchall(te->set, &te->elem)) {
+ nft_setelem_remove(net, te->set, te->elem_priv);
+ if (!nft_setelem_is_catchall(te->set, te->elem_priv)) {
atomic_dec(&te->set->nelems);
te->set->ndeact--;
}
@@ -10105,7 +10370,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
break;
case NFT_MSG_NEWSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
- nft_trans_elem(trans).priv, true);
+ nft_trans_elem_priv(trans), true);
break;
case NFT_MSG_NEWOBJ:
nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
@@ -10235,6 +10500,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
+ nft_trans_set(trans)->dead = 1;
list_del_rcu(&nft_trans_set(trans)->list);
break;
case NFT_MSG_DELSET:
@@ -10252,8 +10518,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
}
te = (struct nft_trans_elem *)trans->data;
- nft_setelem_remove(net, te->set, &te->elem);
- if (!nft_setelem_is_catchall(te->set, &te->elem))
+ nft_setelem_remove(net, te->set, te->elem_priv);
+ if (!nft_setelem_is_catchall(te->set, te->elem_priv))
atomic_dec(&te->set->nelems);
if (te->set->ops->abort &&
@@ -10266,9 +10532,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
case NFT_MSG_DESTROYSETELEM:
te = (struct nft_trans_elem *)trans->data;
- nft_setelem_data_activate(net, te->set, &te->elem);
- nft_setelem_activate(net, te->set, &te->elem);
- if (!nft_setelem_is_catchall(te->set, &te->elem))
+ nft_setelem_data_activate(net, te->set, te->elem_priv);
+ nft_setelem_activate(net, te->set, te->elem_priv);
+ if (!nft_setelem_is_catchall(te->set, te->elem_priv))
te->set->ndeact--;
if (te->set->ops->abort &&
@@ -10357,6 +10623,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid)
bool genid_ok;
mutex_lock(&nft_net->commit_mutex);
+ nft_net->tstamp = get_jiffies_64();
genid_ok = genid == 0 || nft_net->base_seq == genid;
if (!genid_ok)
@@ -10444,9 +10711,9 @@ static int nft_check_loops(const struct nft_ctx *ctx,
static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
@@ -10731,16 +10998,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
switch (data->verdict.code) {
- default:
- switch (data->verdict.code & NF_VERDICT_MASK) {
- case NF_ACCEPT:
- case NF_DROP:
- case NF_QUEUE:
- break;
- default:
- return -EINVAL;
- }
- fallthrough;
+ case NF_ACCEPT:
+ case NF_DROP:
+ case NF_QUEUE:
+ break;
case NFT_CONTINUE:
case NFT_BREAK:
case NFT_RETURN:
@@ -10775,6 +11036,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->verdict.chain = chain;
break;
+ default:
+ return -EINVAL;
}
desc->len = sizeof(data->verdict);
@@ -11010,7 +11273,7 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
ctx.family = table->family;
ctx.table = table;
list_for_each_entry(chain, &table->chains, list) {
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
continue;
ctx.chain = chain;
@@ -11257,4 +11520,5 @@ module_exit(nf_tables_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Framework for packet filtering and classification");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 4d0ce12221f6..c3e635364701 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -115,7 +115,7 @@ static noinline void __nft_trace_verdict(const struct nft_pktinfo *pkt,
{
enum nft_trace_types type;
- switch (regs->verdict.code) {
+ switch (regs->verdict.code & NF_VERDICT_MASK) {
case NFT_CONTINUE:
case NFT_RETURN:
type = NFT_TRACETYPE_RETURN;
@@ -158,7 +158,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
else {
if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
return false;
- ptr = skb_network_header(skb) + nft_thoff(pkt);
+ ptr = skb->data + nft_thoff(pkt);
}
ptr += priv->offset;
@@ -308,10 +308,11 @@ next_rule:
switch (regs.verdict.code & NF_VERDICT_MASK) {
case NF_ACCEPT:
- case NF_DROP:
case NF_QUEUE:
case NF_STOLEN:
return regs.verdict.code;
+ case NF_DROP:
+ return NF_DROP_REASON(pkt->skb, SKB_DROP_REASON_NETFILTER_DROP, EPERM);
}
switch (regs.verdict.code) {
@@ -342,6 +343,9 @@ next_rule:
if (static_branch_unlikely(&nft_counters_enabled))
nft_update_chain_stats(basechain, pkt);
+ if (nft_base_chain(basechain)->policy == NF_DROP)
+ return NF_DROP_REASON(pkt->skb, SKB_DROP_REASON_NETFILTER_DROP, EPERM);
+
return nft_base_chain(basechain)->policy;
}
EXPORT_SYMBOL_GPL(nft_do_chain);
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 6d41c0bd3d78..a83637e3f455 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -258,17 +258,21 @@ void nft_trace_notify(const struct nft_pktinfo *pkt,
case __NFT_TRACETYPE_MAX:
break;
case NFT_TRACETYPE_RETURN:
- case NFT_TRACETYPE_RULE:
+ case NFT_TRACETYPE_RULE: {
+ unsigned int v;
+
if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, verdict))
goto nla_put_failure;
/* pkt->skb undefined iff NF_STOLEN, disable dump */
- if (verdict->code == NF_STOLEN)
+ v = verdict->code & NF_VERDICT_MASK;
+ if (v == NF_STOLEN)
info->packet_dumped = true;
else
mark = pkt->skb->mark;
break;
+ }
case NFT_TRACETYPE_POLICY:
mark = pkt->skb->mark;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 53c9e76473ba..134e05d31061 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -508,7 +508,7 @@ __build_packet_message(struct nfnl_log_net *log,
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
- struct net_device *physindev;
+ int physinif;
/* Case 2: indev is bridge group, we need to look for
* physical device (when called from ipv4) */
@@ -516,10 +516,10 @@ __build_packet_message(struct nfnl_log_net *log,
htonl(indev->ifindex)))
goto nla_put_failure;
- physindev = nf_bridge_get_physindev(skb);
- if (physindev &&
+ physinif = nf_bridge_get_physinif(skb);
+ if (physinif &&
nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
- htonl(physindev->ifindex)))
+ htonl(physinif)))
goto nla_put_failure;
}
#endif
@@ -698,8 +698,8 @@ nfulnl_log_packet(struct net *net,
unsigned int plen = 0;
struct nfnl_log_net *log = nfnl_log_pernet(net);
const struct nfnl_ct_hook *nfnl_ct = NULL;
+ enum ip_conntrack_info ctinfo = 0;
struct nf_conn *ct = NULL;
- enum ip_conntrack_info ctinfo;
if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
li = li_user;
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 8f1bfa6ccc2d..c0fc431991e8 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -315,6 +315,14 @@ static int nfnl_osf_add_callback(struct sk_buff *skb,
f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+ if (f->opt_num > ARRAY_SIZE(f->opt))
+ return -EINVAL;
+
+ if (!memchr(f->genre, 0, MAXGENRELEN) ||
+ !memchr(f->subtype, 0, MAXGENRELEN) ||
+ !memchr(f->version, 0, MAXGENRELEN))
+ return -EINVAL;
+
kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL);
if (!kf)
return -ENOMEM;
@@ -439,4 +447,5 @@ module_init(nfnl_osf_init);
module_exit(nfnl_osf_fini);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Passive OS fingerprint matching");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 556bc902af00..5cf38fc0a366 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -228,19 +228,29 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
const struct nf_ct_hook *ct_hook;
- int err;
if (verdict == NF_ACCEPT ||
verdict == NF_REPEAT ||
verdict == NF_STOP) {
+ unsigned int ct_verdict = verdict;
+
rcu_read_lock();
ct_hook = rcu_dereference(nf_ct_hook);
- if (ct_hook) {
- err = ct_hook->update(entry->state.net, entry->skb);
- if (err < 0)
- verdict = NF_DROP;
- }
+ if (ct_hook)
+ ct_verdict = ct_hook->update(entry->state.net, entry->skb);
rcu_read_unlock();
+
+ switch (ct_verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ /* follow userspace verdict, could be REPEAT */
+ break;
+ case NF_STOLEN:
+ nf_queue_entry_free(entry);
+ return;
+ default:
+ verdict = ct_verdict & NF_VERDICT_MASK;
+ break;
+ }
}
nf_reinject(entry, verdict);
}
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index e596d1a842f7..f6e791a68101 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -38,13 +38,14 @@ void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->size) {
case 8: {
+ u64 *dst64 = (void *)dst;
u64 src64;
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 8; i++) {
src64 = nft_reg_load64(&src[i]);
- nft_reg_store64(&dst[i],
+ nft_reg_store64(&dst64[i],
be64_to_cpu((__force __be64)src64));
}
break;
@@ -52,7 +53,7 @@ void nft_byteorder_eval(const struct nft_expr *expr,
for (i = 0; i < priv->len / 8; i++) {
src64 = (__force __u64)
cpu_to_be64(nft_reg_load64(&src[i]));
- nft_reg_store64(&dst[i], src64);
+ nft_reg_store64(&dst64[i], src64);
}
break;
}
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 680fe557686e..274b6f7e6bb5 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -357,9 +357,10 @@ static int nf_tables_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct nft_base_chain *basechain;
struct nftables_pernet *nft_net;
- struct nft_table *table;
struct nft_chain *chain, *nr;
+ struct nft_table *table;
struct nft_ctx ctx = {
.net = dev_net(dev),
};
@@ -371,7 +372,8 @@ static int nf_tables_netdev_event(struct notifier_block *this,
nft_net = nft_pernet(ctx.net);
mutex_lock(&nft_net->commit_mutex);
list_for_each_entry(table, &nft_net->tables, list) {
- if (table->family != NFPROTO_NETDEV)
+ if (table->family != NFPROTO_NETDEV &&
+ table->family != NFPROTO_INET)
continue;
ctx.family = table->family;
@@ -380,6 +382,11 @@ static int nf_tables_netdev_event(struct notifier_block *this,
if (!nft_is_base_chain(chain))
continue;
+ basechain = nft_base_chain(chain);
+ if (table->family == NFPROTO_INET &&
+ basechain->ops.hooknum != NF_INET_INGRESS)
+ continue;
+
ctx.chain = chain;
nft_netdev_event(event, dev, &ctx);
}
diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
index 98e4946100c5..40e230d8b712 100644
--- a/net/netfilter/nft_chain_nat.c
+++ b/net/netfilter/nft_chain_nat.c
@@ -137,6 +137,7 @@ module_init(nft_chain_nat_init);
module_exit(nft_chain_nat_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("nftables network address translation support");
#ifdef CONFIG_NF_TABLES_IPV4
MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
#endif
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 5284cd2ad532..1f9474fefe84 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -135,7 +135,7 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
[NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING },
- [NFTA_TARGET_REV] = { .type = NLA_U32 },
+ [NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_TARGET_INFO] = { .type = NLA_BINARY },
};
@@ -200,6 +200,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1]
static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
{
struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
+ u32 l4proto;
u32 flags;
int err;
@@ -212,12 +213,18 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
return -EINVAL;
flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS]));
- if (flags & ~NFT_RULE_COMPAT_F_MASK)
+ if (flags & NFT_RULE_COMPAT_F_UNUSED ||
+ flags & ~NFT_RULE_COMPAT_F_MASK)
return -EINVAL;
if (flags & NFT_RULE_COMPAT_F_INV)
*inv = true;
- *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+ l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+ if (l4proto > U16_MAX)
+ return -EINVAL;
+
+ *proto = l4proto;
+
return 0;
}
@@ -350,6 +357,12 @@ static int nft_target_validate(const struct nft_ctx *ctx,
unsigned int hook_mask = 0;
int ret;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_BRIDGE &&
+ ctx->family != NFPROTO_ARP)
+ return -EOPNOTSUPP;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
@@ -413,7 +426,7 @@ static void nft_match_eval(const struct nft_expr *expr,
static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
[NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING },
- [NFTA_MATCH_REV] = { .type = NLA_U32 },
+ [NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_MATCH_INFO] = { .type = NLA_BINARY },
};
@@ -595,6 +608,12 @@ static int nft_match_validate(const struct nft_ctx *ctx,
unsigned int hook_mask = 0;
int ret;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_BRIDGE &&
+ ctx->family != NFPROTO_ARP)
+ return -EOPNOTSUPP;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
@@ -712,7 +731,7 @@ out_put:
static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
[NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING,
.len = NFT_COMPAT_NAME_MAX-1 },
- [NFTA_COMPAT_REV] = { .type = NLA_U32 },
+ [NFTA_COMPAT_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_COMPAT_TYPE] = { .type = NLA_U32 },
};
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 86bb9d7797d9..bfd3e5a14dab 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -476,6 +476,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
break;
#endif
case NFT_CT_ID:
+ if (tb[NFTA_CT_DIRECTION])
+ return -EINVAL;
+
len = sizeof(u32);
break;
default:
@@ -1250,7 +1253,31 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
if (tb[NFTA_CT_EXPECT_L3PROTO])
priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO]));
+ switch (priv->l3num) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ if (priv->l3num != ctx->family)
+ return -EINVAL;
+
+ fallthrough;
+ case NFPROTO_INET:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]);
+ switch (priv->l4proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ case IPPROTO_DCCP:
+ case IPPROTO_SCTP:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]);
priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]);
priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]);
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 5c5cc01c73c5..c09dba57354c 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -44,33 +44,34 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv,
return 0;
}
-static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
- struct nft_regs *regs)
+static struct nft_elem_priv *nft_dynset_new(struct nft_set *set,
+ const struct nft_expr *expr,
+ struct nft_regs *regs)
{
const struct nft_dynset *priv = nft_expr_priv(expr);
struct nft_set_ext *ext;
+ void *elem_priv;
u64 timeout;
- void *elem;
if (!atomic_add_unless(&set->nelems, 1, set->size))
return NULL;
timeout = priv->timeout ? : set->timeout;
- elem = nft_set_elem_init(set, &priv->tmpl,
- &regs->data[priv->sreg_key], NULL,
- &regs->data[priv->sreg_data],
- timeout, 0, GFP_ATOMIC);
- if (IS_ERR(elem))
+ elem_priv = nft_set_elem_init(set, &priv->tmpl,
+ &regs->data[priv->sreg_key], NULL,
+ &regs->data[priv->sreg_data],
+ timeout, 0, GFP_ATOMIC);
+ if (IS_ERR(elem_priv))
goto err1;
- ext = nft_set_elem_ext(set, elem);
+ ext = nft_set_elem_ext(set, elem_priv);
if (priv->num_exprs && nft_dynset_expr_setup(priv, ext) < 0)
goto err2;
- return elem;
+ return elem_priv;
err2:
- nft_set_elem_destroy(set, elem, false);
+ nft_set_elem_destroy(set, elem_priv, false);
err1:
if (set->size)
atomic_dec(&set->nelems);
@@ -279,10 +280,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
priv->expr_array[i] = dynset_expr;
priv->num_exprs++;
- if (set->num_exprs &&
- dynset_expr->ops != set->exprs[i]->ops) {
- err = -EOPNOTSUPP;
- goto err_expr_free;
+ if (set->num_exprs) {
+ if (i >= set->num_exprs) {
+ err = -EINVAL;
+ goto err_expr_free;
+ }
+ if (dynset_expr->ops != set->exprs[i]->ops) {
+ err = -EOPNOTSUPP;
+ goto err_expr_free;
+ }
}
i++;
}
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 7f856ceb3a66..6eb571d0c3fd 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -35,6 +35,14 @@ static unsigned int optlen(const u8 *opt, unsigned int offset)
return opt[offset + 1];
}
+static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len)
+{
+ if (len % NFT_REG32_SIZE)
+ dest[len / NFT_REG32_SIZE] = 0;
+
+ return skb_copy_bits(skb, offset, dest, len);
+}
+
static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -56,8 +64,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
}
offset += priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
@@ -153,8 +160,7 @@ static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
}
offset += priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
@@ -208,9 +214,10 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
offset = i + priv->offset;
if (priv->flags & NFT_EXTHDR_F_PRESENT) {
- *dest = 1;
+ nft_reg_store8(dest, 1);
} else {
- dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (priv->len % NFT_REG32_SIZE)
+ dest[priv->len / NFT_REG32_SIZE] = 0;
memcpy(dest, opt + offset, priv->len);
}
@@ -238,7 +245,12 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
if (!tcph)
goto err;
+ if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
+ goto err;
+
+ tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
opt = (u8 *)tcph;
+
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
union {
__be16 v16;
@@ -253,15 +265,6 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
goto err;
- if (skb_ensure_writable(pkt->skb,
- nft_thoff(pkt) + i + priv->len))
- goto err;
-
- tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
- &tcphdr_len);
- if (!tcph)
- goto err;
-
offset = i + priv->offset;
switch (priv->len) {
@@ -325,9 +328,9 @@ static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr,
if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
goto drop;
- opt = (u8 *)nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
- if (!opt)
- goto err;
+ tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
+ opt = (u8 *)tcph;
+
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
unsigned int j;
@@ -392,9 +395,8 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
offset + ntohs(sch->length) > pkt->skb->len)
break;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset + priv->offset,
- dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset + priv->offset,
+ dest, priv->len) < 0)
break;
return;
}
@@ -459,7 +461,7 @@ static void nft_exthdr_dccp_eval(const struct nft_expr *expr,
type = bufp[0];
if (type == priv->type) {
- *dest = 1;
+ nft_reg_store8(dest, 1);
return;
}
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 04b51f285332..37cfe6dd712d 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -145,11 +145,15 @@ void nft_fib_store_result(void *reg, const struct nft_fib *priv,
switch (priv->result) {
case NFT_FIB_RESULT_OIF:
index = dev ? dev->ifindex : 0;
- *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index;
+ if (priv->flags & NFTA_FIB_F_PRESENT)
+ nft_reg_store8(dreg, !!index);
+ else
+ *dreg = index;
+
break;
case NFT_FIB_RESULT_OIFNAME:
if (priv->flags & NFTA_FIB_F_PRESENT)
- *dreg = !!dev;
+ nft_reg_store8(dreg, !!dev);
else
strscpy_pad(reg, dev ? dev->name : "", IFNAMSIZ);
break;
@@ -204,4 +208,5 @@ bool nft_fib_reduce(struct nft_regs_track *track,
EXPORT_SYMBOL_GPL(nft_fib_reduce);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Query routing table from nftables");
MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index ab3362c483b4..397351fa4d5f 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -384,6 +384,11 @@ static int nft_flow_offload_validate(const struct nft_ctx *ctx,
{
unsigned int hook_mask = (1 << NF_INET_FORWARD);
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain, hook_mask);
}
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index a5268e6dd32f..358e742afad7 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -270,4 +270,5 @@ module_exit(nft_fwd_netdev_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("nftables netdev packet forwarding support");
MODULE_ALIAS_NFT_AF_EXPR(5, "fwd");
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index fccb3cf7749c..6475c7abc1fe 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -78,7 +78,7 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
case NFT_GOTO:
err = nf_tables_bind_chain(ctx, chain);
if (err < 0)
- return err;
+ goto err1;
break;
default:
break;
diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c
index 28e2873ba24e..928312d01eb1 100644
--- a/net/netfilter/nft_inner.c
+++ b/net/netfilter/nft_inner.c
@@ -298,6 +298,7 @@ static int nft_inner_init(const struct nft_ctx *ctx,
int err;
if (!tb[NFTA_INNER_FLAGS] ||
+ !tb[NFTA_INNER_NUM] ||
!tb[NFTA_INNER_HDRSIZE] ||
!tb[NFTA_INNER_TYPE] ||
!tb[NFTA_INNER_EXPR])
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 145dc62c6247..cefa25e0dbb0 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -58,16 +58,19 @@ static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost)
static int nft_limit_init(struct nft_limit_priv *priv,
const struct nlattr * const tb[], bool pkts)
{
- u64 unit, tokens;
+ u64 unit, tokens, rate_with_burst;
+ bool invert = false;
if (tb[NFTA_LIMIT_RATE] == NULL ||
tb[NFTA_LIMIT_UNIT] == NULL)
return -EINVAL;
priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+ if (priv->rate == 0)
+ return -EINVAL;
+
unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
- priv->nsecs = unit * NSEC_PER_SEC;
- if (priv->rate == 0 || priv->nsecs < unit)
+ if (check_mul_overflow(unit, NSEC_PER_SEC, &priv->nsecs))
return -EOVERFLOW;
if (tb[NFTA_LIMIT_BURST])
@@ -76,18 +79,35 @@ static int nft_limit_init(struct nft_limit_priv *priv,
if (pkts && priv->burst == 0)
priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT;
- if (priv->rate + priv->burst < priv->rate)
+ if (check_add_overflow(priv->rate, priv->burst, &rate_with_burst))
return -EOVERFLOW;
if (pkts) {
- tokens = div64_u64(priv->nsecs, priv->rate) * priv->burst;
+ u64 tmp = div64_u64(priv->nsecs, priv->rate);
+
+ if (check_mul_overflow(tmp, priv->burst, &tokens))
+ return -EOVERFLOW;
} else {
+ u64 tmp;
+
/* The token bucket size limits the number of tokens can be
* accumulated. tokens_max specifies the bucket size.
* tokens_max = unit * (rate + burst) / rate.
*/
- tokens = div64_u64(priv->nsecs * (priv->rate + priv->burst),
- priv->rate);
+ if (check_mul_overflow(priv->nsecs, rate_with_burst, &tmp))
+ return -EOVERFLOW;
+
+ tokens = div64_u64(tmp, priv->rate);
+ }
+
+ if (tb[NFTA_LIMIT_FLAGS]) {
+ u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS]));
+
+ if (flags & ~NFT_LIMIT_F_INV)
+ return -EOPNOTSUPP;
+
+ if (flags & NFT_LIMIT_F_INV)
+ invert = true;
}
priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL_ACCOUNT);
@@ -96,13 +116,7 @@ static int nft_limit_init(struct nft_limit_priv *priv,
priv->limit->tokens = tokens;
priv->tokens_max = priv->limit->tokens;
-
- if (tb[NFTA_LIMIT_FLAGS]) {
- u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS]));
-
- if (flags & NFT_LIMIT_F_INV)
- priv->invert = true;
- }
+ priv->invert = invert;
priv->limit->last = ktime_get_ns();
spin_lock_init(&priv->limit->lock);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index f7da7c43333b..ba0d3683a45d 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -63,7 +63,7 @@ nft_meta_get_eval_time(enum nft_meta_keys key,
{
switch (key) {
case NFT_META_TIME_NS:
- nft_reg_store64(dest, ktime_get_real_ns());
+ nft_reg_store64((u64 *)dest, ktime_get_real_ns());
break;
case NFT_META_TIME_DAY:
nft_reg_store8(dest, nft_meta_weekday());
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 583885ce7232..808f5802c270 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -143,6 +143,11 @@ static int nft_nat_validate(const struct nft_ctx *ctx,
struct nft_nat *priv = nft_expr_priv(expr);
int err;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 8cb800989947..0a689c8e0295 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -154,6 +154,17 @@ int nft_payload_inner_offset(const struct nft_pktinfo *pkt)
return pkt->inneroff;
}
+static bool nft_payload_need_vlan_copy(const struct nft_payload *priv)
+{
+ unsigned int len = priv->offset + priv->len;
+
+ /* data past ether src/dst requested, copy needed */
+ if (len > offsetof(struct ethhdr, h_proto))
+ return true;
+
+ return false;
+}
+
void nft_payload_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -168,11 +179,11 @@ void nft_payload_eval(const struct nft_expr *expr,
switch (priv->base) {
case NFT_PAYLOAD_LL_HEADER:
- if (!skb_mac_header_was_set(skb))
+ if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) == 0)
goto err;
if (skb_vlan_tag_present(skb) &&
- priv->offset >= offsetof(struct ethhdr, h_proto)) {
+ nft_payload_need_vlan_copy(priv)) {
if (!nft_payload_copy_vlan(dest, skb,
priv->offset, priv->len))
goto err;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 35a2c28caa60..24d977138572 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -166,6 +166,11 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp
const struct nft_rt *priv = nft_expr_priv(expr);
unsigned int hooks;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
switch (priv->key) {
case NFT_RT_NEXTHOP4:
case NFT_RT_NEXTHOP6:
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 1e5e7a181e0b..32df7a16835d 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -13,6 +13,7 @@
#include <net/netfilter/nf_tables_core.h>
struct nft_bitmap_elem {
+ struct nft_elem_priv priv;
struct list_head head;
struct nft_set_ext ext;
};
@@ -104,8 +105,9 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
return NULL;
}
-static void *nft_bitmap_get(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem, unsigned int flags)
+static struct nft_elem_priv *
+nft_bitmap_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
{
const struct nft_bitmap *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
@@ -116,23 +118,23 @@ static void *nft_bitmap_get(const struct net *net, const struct nft_set *set,
!nft_set_elem_active(&be->ext, genmask))
continue;
- return be;
+ return &be->priv;
}
return ERR_PTR(-ENOENT);
}
static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext)
+ struct nft_elem_priv **elem_priv)
{
+ struct nft_bitmap_elem *new = nft_elem_priv_cast(elem->priv), *be;
struct nft_bitmap *priv = nft_set_priv(set);
- struct nft_bitmap_elem *new = elem->priv, *be;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
be = nft_bitmap_elem_find(set, new, genmask);
if (be) {
- *ext = &be->ext;
+ *elem_priv = &be->priv;
return -EEXIST;
}
@@ -144,12 +146,11 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
return 0;
}
-static void nft_bitmap_remove(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static void nft_bitmap_remove(const struct net *net, const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
{
+ struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv);
struct nft_bitmap *priv = nft_set_priv(set);
- struct nft_bitmap_elem *be = elem->priv;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
@@ -161,10 +162,10 @@ static void nft_bitmap_remove(const struct net *net,
static void nft_bitmap_activate(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
+ struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv);
struct nft_bitmap *priv = nft_set_priv(set);
- struct nft_bitmap_elem *be = elem->priv;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
@@ -174,28 +175,27 @@ static void nft_bitmap_activate(const struct net *net,
nft_set_elem_change_active(net, set, &be->ext);
}
-static bool nft_bitmap_flush(const struct net *net,
- const struct nft_set *set, void *_be)
+static void nft_bitmap_flush(const struct net *net,
+ const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
{
+ struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv);
struct nft_bitmap *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
- struct nft_bitmap_elem *be = _be;
u32 idx, off;
nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
/* Enter 10 state, similar to deactivation. */
priv->bitmap[idx] &= ~(genmask << off);
nft_set_elem_change_active(net, set, &be->ext);
-
- return true;
}
-static void *nft_bitmap_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static struct nft_elem_priv *
+nft_bitmap_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
+ struct nft_bitmap_elem *this = nft_elem_priv_cast(elem->priv), *be;
struct nft_bitmap *priv = nft_set_priv(set);
- struct nft_bitmap_elem *this = elem->priv, *be;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
@@ -209,7 +209,7 @@ static void *nft_bitmap_deactivate(const struct net *net,
priv->bitmap[idx] &= ~(genmask << off);
nft_set_elem_change_active(net, set, &be->ext);
- return be;
+ return &be->priv;
}
static void nft_bitmap_walk(const struct nft_ctx *ctx,
@@ -218,7 +218,6 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
{
const struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be;
- struct nft_set_elem elem;
list_for_each_entry_rcu(be, &priv->list, head) {
if (iter->count < iter->skip)
@@ -226,9 +225,7 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
if (!nft_set_elem_active(&be->ext, iter->genmask))
goto cont;
- elem.priv = be;
-
- iter->err = iter->fn(ctx, set, iter, &elem);
+ iter->err = iter->fn(ctx, set, iter, &be->priv);
if (iter->err < 0)
return;
@@ -265,6 +262,8 @@ static int nft_bitmap_init(const struct nft_set *set,
{
struct nft_bitmap *priv = nft_set_priv(set);
+ BUILD_BUG_ON(offsetof(struct nft_bitmap_elem, priv) != 0);
+
INIT_LIST_HEAD(&priv->list);
priv->bitmap_size = nft_bitmap_size(set->klen);
@@ -278,7 +277,7 @@ static void nft_bitmap_destroy(const struct nft_ctx *ctx,
struct nft_bitmap_elem *be, *n;
list_for_each_entry_safe(be, n, &priv->list, head)
- nf_tables_set_elem_destroy(ctx, set, be);
+ nf_tables_set_elem_destroy(ctx, set, &be->priv);
}
static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 524763659f25..6968a3b34236 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -27,6 +27,7 @@ struct nft_rhash {
};
struct nft_rhash_elem {
+ struct nft_elem_priv priv;
struct rhash_head node;
struct nft_set_ext ext;
};
@@ -35,6 +36,7 @@ struct nft_rhash_cmp_arg {
const struct nft_set *set;
const u32 *key;
u8 genmask;
+ u64 tstamp;
};
static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
@@ -61,7 +63,7 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
return 1;
if (nft_set_elem_is_dead(&he->ext))
return 1;
- if (nft_set_elem_expired(&he->ext))
+ if (__nft_set_elem_expired(&he->ext, x->tstamp))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
return 1;
@@ -86,6 +88,7 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_cur(net),
.set = set,
.key = key,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -95,8 +98,9 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
return !!he;
}
-static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem, unsigned int flags)
+static struct nft_elem_priv *
+nft_rhash_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
{
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he;
@@ -104,17 +108,19 @@ static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_cur(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
- return he;
+ return &he->priv;
return ERR_PTR(-ENOENT);
}
static bool nft_rhash_update(struct nft_set *set, const u32 *key,
- void *(*new)(struct nft_set *,
+ struct nft_elem_priv *
+ (*new)(struct nft_set *,
const struct nft_expr *,
struct nft_regs *regs),
const struct nft_expr *expr,
@@ -123,20 +129,23 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
{
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he, *prev;
+ struct nft_elem_priv *elem_priv;
struct nft_rhash_cmp_arg arg = {
.genmask = NFT_GENMASK_ANY,
.set = set,
.key = key,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
goto out;
- he = new(set, expr, regs);
- if (he == NULL)
+ elem_priv = new(set, expr, regs);
+ if (!elem_priv)
goto err1;
+ he = nft_elem_priv_cast(elem_priv);
prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
nft_rhash_params);
if (IS_ERR(prev))
@@ -144,7 +153,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
/* Another cpu may race to insert the element with the same key */
if (prev) {
- nft_set_elem_destroy(set, he, true);
+ nft_set_elem_destroy(set, &he->priv, true);
atomic_dec(&set->nelems);
he = prev;
}
@@ -154,7 +163,7 @@ out:
return true;
err2:
- nft_set_elem_destroy(set, he, true);
+ nft_set_elem_destroy(set, &he->priv, true);
atomic_dec(&set->nelems);
err1:
return false;
@@ -162,14 +171,15 @@ err1:
static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext)
+ struct nft_elem_priv **elem_priv)
{
+ struct nft_rhash_elem *he = nft_elem_priv_cast(elem->priv);
struct nft_rhash *priv = nft_set_priv(set);
- struct nft_rhash_elem *he = elem->priv;
struct nft_rhash_cmp_arg arg = {
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = nft_net_tstamp(net),
};
struct nft_rhash_elem *prev;
@@ -178,33 +188,32 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
if (IS_ERR(prev))
return PTR_ERR(prev);
if (prev) {
- *ext = &prev->ext;
+ *elem_priv = &prev->priv;
return -EEXIST;
}
return 0;
}
static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- struct nft_rhash_elem *he = elem->priv;
+ struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &he->ext);
}
-static bool nft_rhash_flush(const struct net *net,
- const struct nft_set *set, void *priv)
+static void nft_rhash_flush(const struct net *net,
+ const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
{
- struct nft_rhash_elem *he = priv;
+ struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &he->ext);
-
- return true;
}
-static void *nft_rhash_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static struct nft_elem_priv *
+nft_rhash_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he;
@@ -212,6 +221,7 @@ static void *nft_rhash_deactivate(const struct net *net,
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = nft_net_tstamp(net),
};
rcu_read_lock();
@@ -221,15 +231,15 @@ static void *nft_rhash_deactivate(const struct net *net,
rcu_read_unlock();
- return he;
+ return &he->priv;
}
static void nft_rhash_remove(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
+ struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv);
struct nft_rhash *priv = nft_set_priv(set);
- struct nft_rhash_elem *he = elem->priv;
rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
}
@@ -260,7 +270,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he;
struct rhashtable_iter hti;
- struct nft_set_elem elem;
rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
@@ -280,9 +289,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (!nft_set_elem_active(&he->ext, iter->genmask))
goto cont;
- elem.priv = he;
-
- iter->err = iter->fn(ctx, set, iter, &elem);
+ iter->err = iter->fn(ctx, set, iter, &he->priv);
if (iter->err < 0)
break;
@@ -338,12 +345,9 @@ static void nft_rhash_gc(struct work_struct *work)
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
- if (PTR_ERR(he) != -EAGAIN) {
- nft_trans_gc_destroy(gc);
- gc = NULL;
- goto try_later;
- }
- continue;
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
}
/* Ruleset has been updated, try later. */
@@ -372,7 +376,7 @@ dead_elem:
nft_trans_gc_elem_add(gc, he);
}
- gc = nft_trans_gc_catchall(gc, gc_seq);
+ gc = nft_trans_gc_catchall_async(gc, gc_seq);
try_later:
/* catchall list iteration requires rcu read side lock. */
@@ -409,6 +413,8 @@ static int nft_rhash_init(const struct nft_set *set,
struct rhashtable_params params = nft_rhash_params;
int err;
+ BUILD_BUG_ON(offsetof(struct nft_rhash_elem, priv) != 0);
+
params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT;
params.key_len = set->klen;
@@ -431,8 +437,9 @@ struct nft_rhash_ctx {
static void nft_rhash_elem_destroy(void *ptr, void *arg)
{
struct nft_rhash_ctx *rhash_ctx = arg;
+ struct nft_rhash_elem *he = ptr;
- nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr);
+ nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, &he->priv);
}
static void nft_rhash_destroy(const struct nft_ctx *ctx,
@@ -479,6 +486,7 @@ struct nft_hash {
};
struct nft_hash_elem {
+ struct nft_elem_priv priv;
struct hlist_node node;
struct nft_set_ext ext;
};
@@ -504,8 +512,9 @@ bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
return false;
}
-static void *nft_hash_get(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem, unsigned int flags)
+static struct nft_elem_priv *
+nft_hash_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
{
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
@@ -517,7 +526,7 @@ static void *nft_hash_get(const struct net *net, const struct nft_set *set,
hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) &&
nft_set_elem_active(&he->ext, genmask))
- return he;
+ return &he->priv;
}
return ERR_PTR(-ENOENT);
}
@@ -565,9 +574,9 @@ static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv,
static int nft_hash_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext)
+ struct nft_elem_priv **elem_priv)
{
- struct nft_hash_elem *this = elem->priv, *he;
+ struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he;
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
u32 hash;
@@ -577,7 +586,7 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set,
if (!memcmp(nft_set_ext_key(&this->ext),
nft_set_ext_key(&he->ext), set->klen) &&
nft_set_elem_active(&he->ext, genmask)) {
- *ext = &he->ext;
+ *elem_priv = &he->priv;
return -EEXIST;
}
}
@@ -586,28 +595,28 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set,
}
static void nft_hash_activate(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- struct nft_hash_elem *he = elem->priv;
+ struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &he->ext);
}
-static bool nft_hash_flush(const struct net *net,
- const struct nft_set *set, void *priv)
+static void nft_hash_flush(const struct net *net,
+ const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
{
- struct nft_hash_elem *he = priv;
+ struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &he->ext);
- return true;
}
-static void *nft_hash_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static struct nft_elem_priv *
+nft_hash_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
+ struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he;
struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *this = elem->priv, *he;
u8 genmask = nft_genmask_next(net);
u32 hash;
@@ -617,7 +626,7 @@ static void *nft_hash_deactivate(const struct net *net,
set->klen) &&
nft_set_elem_active(&he->ext, genmask)) {
nft_set_elem_change_active(net, set, &he->ext);
- return he;
+ return &he->priv;
}
}
return NULL;
@@ -625,9 +634,9 @@ static void *nft_hash_deactivate(const struct net *net,
static void nft_hash_remove(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- struct nft_hash_elem *he = elem->priv;
+ struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv);
hlist_del_rcu(&he->node);
}
@@ -637,7 +646,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he;
- struct nft_set_elem elem;
int i;
for (i = 0; i < priv->buckets; i++) {
@@ -647,9 +655,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (!nft_set_elem_active(&he->ext, iter->genmask))
goto cont;
- elem.priv = he;
-
- iter->err = iter->fn(ctx, set, iter, &elem);
+ iter->err = iter->fn(ctx, set, iter, &he->priv);
if (iter->err < 0)
return;
cont:
@@ -688,7 +694,7 @@ static void nft_hash_destroy(const struct nft_ctx *ctx,
for (i = 0; i < priv->buckets; i++) {
hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
hlist_del_rcu(&he->node);
- nf_tables_set_elem_destroy(ctx, set, he);
+ nf_tables_set_elem_destroy(ctx, set, &he->priv);
}
}
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 6af9c9ed4b5c..aa1d9e93a9a0 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -342,9 +342,6 @@
#include "nft_set_pipapo_avx2.h"
#include "nft_set_pipapo.h"
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
-
/**
* pipapo_refill() - For each set bit, set bits from selected mapping table item
* @map: Bitmap to be scanned for set bits
@@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_scratch *scratch;
unsigned long *res_map, *fill_map;
u8 genmask = nft_genmask_cur(net);
const u8 *rp = (const u8 *)key;
@@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
local_bh_disable();
- map_index = raw_cpu_read(nft_pipapo_scratch_index);
-
m = rcu_dereference(priv->match);
if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
goto out;
- res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0);
- fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max);
+ scratch = *raw_cpu_ptr(m->scratch);
+
+ map_index = scratch->map_index;
+
+ res_map = scratch->map + (map_index ? m->bsize_max : 0);
+ fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
@@ -460,7 +460,7 @@ next_match:
b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
last);
if (b < 0) {
- raw_cpu_write(nft_pipapo_scratch_index, map_index);
+ scratch->map_index = map_index;
local_bh_enable();
return false;
@@ -477,7 +477,7 @@ next_match:
* current inactive bitmap is clean and can be reused as
* *next* bitmap (not initial) for the next packet.
*/
- raw_cpu_write(nft_pipapo_scratch_index, map_index);
+ scratch->map_index = map_index;
local_bh_enable();
return true;
@@ -504,6 +504,7 @@ out:
* @set: nftables API set representation
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
+ * @tstamp: timestamp to check for expired elements
*
* This is essentially the same as the lookup function, except that it matches
* key data against the uncommitted copy and doesn't use preallocated maps for
@@ -513,7 +514,8 @@ out:
*/
static struct nft_pipapo_elem *pipapo_get(const struct net *net,
const struct nft_set *set,
- const u8 *data, u8 genmask)
+ const u8 *data, u8 genmask,
+ u64 tstamp)
{
struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
struct nft_pipapo *priv = nft_set_priv(set);
@@ -566,7 +568,7 @@ next_match:
goto out;
if (last) {
- if (nft_set_elem_expired(&f->mt[b].e->ext))
+ if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp))
goto next_match;
if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
@@ -599,11 +601,18 @@ out:
* @elem: nftables API element representation containing key data
* @flags: Unused
*/
-static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem, unsigned int flags)
+static struct nft_elem_priv *
+nft_pipapo_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
{
- return pipapo_get(net, set, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net));
+ struct nft_pipapo_elem *e;
+
+ e = pipapo_get(net, set, (const u8 *)elem->key.val.data,
+ nft_genmask_cur(net), get_jiffies_64());
+ if (IS_ERR(e))
+ return ERR_CAST(e);
+
+ return &e->priv;
}
/**
@@ -1102,6 +1111,25 @@ static void pipapo_map(struct nft_pipapo_match *m,
}
/**
+ * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address
+ * @m: Matching data
+ * @cpu: CPU number
+ */
+static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu)
+{
+ struct nft_pipapo_scratch *s;
+ void *mem;
+
+ s = *per_cpu_ptr(m->scratch, cpu);
+ if (!s)
+ return;
+
+ mem = s;
+ mem -= s->align_off;
+ kfree(mem);
+}
+
+/**
* pipapo_realloc_scratch() - Reallocate scratch maps for partial match results
* @clone: Copy of matching data with pending insertions and deletions
* @bsize_max: Maximum bucket size, scratch maps cover two buckets
@@ -1114,12 +1142,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
int i;
for_each_possible_cpu(i) {
- unsigned long *scratch;
+ struct nft_pipapo_scratch *scratch;
#ifdef NFT_PIPAPO_ALIGN
- unsigned long *scratch_aligned;
+ void *scratch_aligned;
+ u32 align_off;
#endif
-
- scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 +
+ scratch = kzalloc_node(struct_size(scratch, map,
+ bsize_max * 2) +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL, cpu_to_node(i));
if (!scratch) {
@@ -1133,14 +1162,25 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
return -ENOMEM;
}
- kfree(*per_cpu_ptr(clone->scratch, i));
-
- *per_cpu_ptr(clone->scratch, i) = scratch;
+ pipapo_free_scratch(clone, i);
#ifdef NFT_PIPAPO_ALIGN
- scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch);
- *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned;
+ /* Align &scratch->map (not the struct itself): the extra
+ * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
+ * above guarantee we can waste up to those bytes in order
+ * to align the map field regardless of its offset within
+ * the struct.
+ */
+ BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM);
+
+ scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
+ scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
+ align_off = scratch_aligned - (void *)scratch;
+
+ scratch = scratch_aligned;
+ scratch->align_off = align_off;
#endif
+ *per_cpu_ptr(clone->scratch, i) = scratch;
}
return 0;
@@ -1151,21 +1191,22 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
* @net: Network namespace
* @set: nftables API set representation
* @elem: nftables API element representation containing key data
- * @ext2: Filled with pointer to &struct nft_set_ext in inserted element
+ * @elem_priv: Filled with pointer to &struct nft_set_ext in inserted element
*
* Return: 0 on success, error pointer on failure.
*/
static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext2)
+ struct nft_elem_priv **elem_priv)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const u8 *start = (const u8 *)elem->key.val.data, *end;
- struct nft_pipapo_elem *e = elem->priv, *dup;
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m = priv->clone;
u8 genmask = nft_genmask_next(net);
+ struct nft_pipapo_elem *e, *dup;
+ u64 tstamp = nft_net_tstamp(net);
struct nft_pipapo_field *f;
const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
@@ -1175,7 +1216,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
end = start;
- dup = pipapo_get(net, set, start, genmask);
+ dup = pipapo_get(net, set, start, genmask, tstamp);
if (!IS_ERR(dup)) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@@ -1188,7 +1229,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) &&
!memcmp(end, dup_end->data, sizeof(*dup_end->data))) {
- *ext2 = &dup->ext;
+ *elem_priv = &dup->priv;
return -EEXIST;
}
@@ -1197,13 +1238,13 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (PTR_ERR(dup) == -ENOENT) {
/* Look for partially overlapping entries */
- dup = pipapo_get(net, set, end, nft_genmask_next(net));
+ dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp);
}
if (PTR_ERR(dup) != -ENOENT) {
if (IS_ERR(dup))
return PTR_ERR(dup);
- *ext2 = &dup->ext;
+ *elem_priv = &dup->priv;
return -ENOTEMPTY;
}
@@ -1263,7 +1304,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
put_cpu_ptr(m->scratch);
}
- *ext2 = &e->ext;
+ e = nft_elem_priv_cast(elem->priv);
+ *elem_priv = &e->priv;
pipapo_map(m, rulemap, e);
@@ -1293,11 +1335,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
if (!new->scratch)
goto out_scratch;
-#ifdef NFT_PIPAPO_ALIGN
- new->scratch_aligned = alloc_percpu(*new->scratch_aligned);
- if (!new->scratch_aligned)
- goto out_scratch;
-#endif
for_each_possible_cpu(i)
*per_cpu_ptr(new->scratch, i) = NULL;
@@ -1349,10 +1386,7 @@ out_lt:
}
out_scratch_realloc:
for_each_possible_cpu(i)
- kfree(*per_cpu_ptr(new->scratch, i));
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(new->scratch_aligned);
-#endif
+ pipapo_free_scratch(new, i);
out_scratch:
free_percpu(new->scratch);
kfree(new);
@@ -1540,23 +1574,19 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
struct nft_pipapo_elem *e)
{
- struct nft_set_elem elem = {
- .priv = e,
- };
-
- nft_setelem_data_deactivate(net, set, &elem);
+ nft_setelem_data_deactivate(net, set, &e->priv);
}
/**
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
- * @_set: nftables API set representation
+ * @set: nftables API set representation
* @m: Matching data
*/
-static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
+static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
{
- struct nft_set *set = (struct nft_set *) _set;
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
+ u64 tstamp = nft_net_tstamp(net);
int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
struct nft_trans_gc *gc;
@@ -1591,12 +1621,12 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
/* synchronous gc never fails, there is no need to set on
* NFT_SET_ELEM_DEAD_BIT.
*/
- if (nft_set_elem_expired(&e->ext)) {
+ if (__nft_set_elem_expired(&e->ext, tstamp)) {
priv->dirty = true;
- gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
- break;
+ return;
nft_pipapo_gc_deactivate(net, set, e);
pipapo_drop(m, rulemap);
@@ -1610,7 +1640,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
}
}
- gc = nft_trans_gc_catchall(gc, 0);
+ gc = nft_trans_gc_catchall_sync(gc);
if (gc) {
nft_trans_gc_queue_sync_done(gc);
priv->last_gc = jiffies;
@@ -1637,13 +1667,9 @@ static void pipapo_free_match(struct nft_pipapo_match *m)
int i;
for_each_possible_cpu(i)
- kfree(*per_cpu_ptr(m->scratch, i));
+ pipapo_free_scratch(m, i);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
free_percpu(m->scratch);
-
pipapo_free_fields(m);
kfree(m);
@@ -1672,7 +1698,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
* We also need to create a new working copy for subsequent insertions and
* deletions.
*/
-static void nft_pipapo_commit(const struct nft_set *set)
+static void nft_pipapo_commit(struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *new_clone, *old;
@@ -1732,7 +1758,7 @@ static void nft_pipapo_abort(const struct nft_set *set)
* nft_pipapo_activate() - Mark element reference as active given key, commit
* @net: Network namespace
* @set: nftables API set representation
- * @elem: nftables API element representation containing key data
+ * @elem_priv: nftables API element representation containing key data
*
* On insertion, elements are added to a copy of the matching data currently
* in use for lookups, and not directly inserted into current lookup data. Both
@@ -1741,9 +1767,9 @@ static void nft_pipapo_abort(const struct nft_set *set)
*/
static void nft_pipapo_activate(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- struct nft_pipapo_elem *e = elem->priv;
+ struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &e->ext);
}
@@ -1766,7 +1792,7 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
{
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, data, nft_genmask_next(net));
+ e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net));
if (IS_ERR(e))
return NULL;
@@ -1783,9 +1809,9 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
*
* Return: deactivated element if found, NULL otherwise.
*/
-static void *nft_pipapo_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static struct nft_elem_priv *
+nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
@@ -1796,7 +1822,7 @@ static void *nft_pipapo_deactivate(const struct net *net,
* nft_pipapo_flush() - Call pipapo_deactivate() to make element inactive
* @net: Network namespace
* @set: nftables API set representation
- * @elem: nftables API element representation containing key data
+ * @elem_priv: nftables API element representation containing key data
*
* This is functionally the same as nft_pipapo_deactivate(), with a slightly
* different interface, and it's also called once for each element in a set
@@ -1810,13 +1836,12 @@ static void *nft_pipapo_deactivate(const struct net *net,
*
* Return: true if element was found and deactivated.
*/
-static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set,
- void *elem)
+static void nft_pipapo_flush(const struct net *net, const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
{
- struct nft_pipapo_elem *e = elem;
+ struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv);
- return pipapo_deactivate(net, set, (const u8 *)nft_set_ext_key(&e->ext),
- &e->ext);
+ nft_set_elem_change_active(net, set, &e->ext);
}
/**
@@ -1939,7 +1964,7 @@ static bool pipapo_match_field(struct nft_pipapo_field *f,
* nft_pipapo_remove() - Remove element given key, commit
* @net: Network namespace
* @set: nftables API set representation
- * @elem: nftables API element representation containing key data
+ * @elem_priv: nftables API element representation containing key data
*
* Similarly to nft_pipapo_activate(), this is used as commit operation by the
* API, but it's called once per element in the pending transaction, so we can't
@@ -1947,14 +1972,15 @@ static bool pipapo_match_field(struct nft_pipapo_field *f,
* the matched element here, if any, and commit the updated matching data.
*/
static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m = priv->clone;
- struct nft_pipapo_elem *e = elem->priv;
int rules_f0, first_rule = 0;
+ struct nft_pipapo_elem *e;
const u8 *data;
+ e = nft_elem_priv_cast(elem_priv);
data = (const u8 *)nft_set_ext_key(&e->ext);
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
@@ -2031,7 +2057,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
for (r = 0; r < f->rules; r++) {
struct nft_pipapo_elem *e;
- struct nft_set_elem elem;
if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
continue;
@@ -2041,9 +2066,10 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
e = f->mt[r].e;
- elem.priv = e;
+ if (!nft_set_elem_active(&e->ext, iter->genmask))
+ goto cont;
- iter->err = iter->fn(ctx, set, iter, &elem);
+ iter->err = iter->fn(ctx, set, iter, &e->priv);
if (iter->err < 0)
goto out;
@@ -2115,6 +2141,8 @@ static int nft_pipapo_init(const struct nft_set *set,
struct nft_pipapo_field *f;
int err, i, field_count;
+ BUILD_BUG_ON(offsetof(struct nft_pipapo_elem, priv) != 0);
+
field_count = desc->field_count ? : 1;
if (field_count > NFT_PIPAPO_MAX_FIELDS)
@@ -2127,7 +2155,7 @@ static int nft_pipapo_init(const struct nft_set *set,
m->field_count = field_count;
m->bsize_max = 0;
- m->scratch = alloc_percpu(unsigned long *);
+ m->scratch = alloc_percpu(struct nft_pipapo_scratch *);
if (!m->scratch) {
err = -ENOMEM;
goto out_scratch;
@@ -2135,16 +2163,6 @@ static int nft_pipapo_init(const struct nft_set *set,
for_each_possible_cpu(i)
*per_cpu_ptr(m->scratch, i) = NULL;
-#ifdef NFT_PIPAPO_ALIGN
- m->scratch_aligned = alloc_percpu(unsigned long *);
- if (!m->scratch_aligned) {
- err = -ENOMEM;
- goto out_free;
- }
- for_each_possible_cpu(i)
- *per_cpu_ptr(m->scratch_aligned, i) = NULL;
-#endif
-
rcu_head_init(&m->rcu);
nft_pipapo_for_each_field(f, i, m) {
@@ -2175,9 +2193,6 @@ static int nft_pipapo_init(const struct nft_set *set,
return 0;
out_free:
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
free_percpu(m->scratch);
out_scratch:
kfree(m);
@@ -2209,7 +2224,7 @@ static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx,
e = f->mt[r].e;
- nf_tables_set_elem_destroy(ctx, set, e);
+ nf_tables_set_elem_destroy(ctx, set, &e->priv);
}
}
@@ -2231,11 +2246,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
nft_set_pipapo_match_destroy(ctx, set, m);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
for_each_possible_cpu(cpu)
- kfree(*per_cpu_ptr(m->scratch, cpu));
+ pipapo_free_scratch(m, cpu);
free_percpu(m->scratch);
pipapo_free_fields(m);
kfree(m);
@@ -2248,11 +2260,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
if (priv->dirty)
nft_set_pipapo_match_destroy(ctx, set, m);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(priv->clone->scratch_aligned);
-#endif
for_each_possible_cpu(cpu)
- kfree(*per_cpu_ptr(priv->clone->scratch, cpu));
+ pipapo_free_scratch(priv->clone, cpu);
free_percpu(priv->clone->scratch);
pipapo_free_fields(priv->clone);
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 25a75591583e..f59a0cd81105 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -131,23 +131,31 @@ struct nft_pipapo_field {
};
/**
+ * struct nft_pipapo_scratch - percpu data used for lookup and matching
+ * @map_index: Current working bitmap index, toggled between field matches
+ * @align_off: Offset to get the originally allocated address
+ * @map: store partial matching results during lookup
+ */
+struct nft_pipapo_scratch {
+ u8 map_index;
+ u32 align_off;
+ unsigned long map[];
+};
+
+/**
* struct nft_pipapo_match - Data used for lookup and matching
* @field_count Amount of fields in set
* @scratch: Preallocated per-CPU maps for partial matching results
- * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
* @bsize_max: Maximum lookup table bucket size of all fields, in longs
* @rcu Matching data is swapped on commits
* @f: Fields, with lookup and mapping tables
*/
struct nft_pipapo_match {
int field_count;
-#ifdef NFT_PIPAPO_ALIGN
- unsigned long * __percpu *scratch_aligned;
-#endif
- unsigned long * __percpu *scratch;
+ struct nft_pipapo_scratch * __percpu *scratch;
size_t bsize_max;
struct rcu_head rcu;
- struct nft_pipapo_field f[];
+ struct nft_pipapo_field f[] __counted_by(field_count);
};
/**
@@ -170,10 +178,12 @@ struct nft_pipapo_elem;
/**
* struct nft_pipapo_elem - API-facing representation of single set element
+ * @priv: element placeholder
* @ext: nftables API extensions
*/
struct nft_pipapo_elem {
- struct nft_set_ext ext;
+ struct nft_elem_priv priv;
+ struct nft_set_ext ext;
};
int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 52e0d026d30a..a3a8ddca9918 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -57,7 +57,7 @@
/* Jump to label if @reg is zero */
#define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \
- asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
+ asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
"je %l[" #label "]" : : : : label)
/* Store 256 bits from YMM register into memory. Contrary to bucket load
@@ -71,9 +71,6 @@
#define NFT_PIPAPO_AVX2_ZERO(reg) \
asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg)
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index);
-
/**
* nft_pipapo_avx2_prepare() - Prepare before main algorithm body
*
@@ -1120,11 +1117,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
- unsigned long *res, *fill, *scratch;
+ struct nft_pipapo_scratch *scratch;
u8 genmask = nft_genmask_cur(net);
const u8 *rp = (const u8 *)key;
struct nft_pipapo_match *m;
struct nft_pipapo_field *f;
+ unsigned long *res, *fill;
bool map_index;
int i, ret = 0;
@@ -1141,15 +1139,16 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
*/
kernel_fpu_begin_mask(0);
- scratch = *raw_cpu_ptr(m->scratch_aligned);
+ scratch = *raw_cpu_ptr(m->scratch);
if (unlikely(!scratch)) {
kernel_fpu_end();
return false;
}
- map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index);
- res = scratch + (map_index ? m->bsize_max : 0);
- fill = scratch + (map_index ? 0 : m->bsize_max);
+ map_index = scratch->map_index;
+
+ res = scratch->map + (map_index ? m->bsize_max : 0);
+ fill = scratch->map + (map_index ? 0 : m->bsize_max);
/* Starting map doesn't need to be set for this implementation */
@@ -1221,7 +1220,7 @@ next_match:
out:
if (i % 2)
- raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index);
+ scratch->map_index = !map_index;
kernel_fpu_end();
return ret >= 0;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index c6435e709231..9944fe479e53 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -19,10 +19,11 @@ struct nft_rbtree {
struct rb_root root;
rwlock_t lock;
seqcount_rwlock_t count;
- struct delayed_work gc_work;
+ unsigned long last_gc;
};
struct nft_rbtree_elem {
+ struct nft_elem_priv priv;
struct rb_node node;
struct nft_set_ext ext;
};
@@ -48,8 +49,7 @@ static int nft_rbtree_cmp(const struct nft_set *set,
static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
{
- return nft_set_elem_expired(&rbe->ext) ||
- nft_set_elem_is_dead(&rbe->ext);
+ return nft_set_elem_expired(&rbe->ext);
}
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
@@ -197,8 +197,9 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
return false;
}
-static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem, unsigned int flags)
+static struct nft_elem_priv *
+nft_rbtree_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
{
struct nft_rbtree *priv = nft_set_priv(set);
unsigned int seq = read_seqcount_begin(&priv->count);
@@ -209,34 +210,31 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
if (ret || !read_seqcount_retry(&priv->count, seq))
- return rbe;
+ return &rbe->priv;
read_lock_bh(&priv->lock);
seq = read_seqcount_begin(&priv->count);
ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
- if (!ret)
- rbe = ERR_PTR(-ENOENT);
read_unlock_bh(&priv->lock);
- return rbe;
+ if (!ret)
+ return ERR_PTR(-ENOENT);
+
+ return &rbe->priv;
}
-static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
- struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe)
+static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
{
- struct nft_set_elem elem = {
- .priv = rbe,
- };
-
- nft_setelem_data_deactivate(net, set, &elem);
+ lockdep_assert_held_write(&priv->lock);
+ nft_setelem_data_deactivate(net, set, &rbe->priv);
rb_erase(&rbe->node, &priv->root);
}
-static int nft_rbtree_gc_elem(const struct nft_set *__set,
- struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe,
- u8 genmask)
+static const struct nft_rbtree_elem *
+nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
@@ -246,7 +244,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
if (!gc)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
/* search for end interval coming before this element.
* end intervals don't carry a timeout extension, they
@@ -255,15 +253,16 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
if (nft_rbtree_interval_end(rbe_prev) &&
- nft_set_elem_active(&rbe_prev->ext, genmask))
+ nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY))
break;
prev = rb_prev(prev);
}
+ rbe_prev = NULL;
if (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
- nft_rbtree_gc_remove(net, set, priv, rbe_prev);
+ nft_rbtree_gc_elem_remove(net, set, priv, rbe_prev);
/* There is always room in this trans gc for this element,
* memory allocation never actually happens, hence, the warning
@@ -272,21 +271,21 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
*/
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (WARN_ON_ONCE(!gc))
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
nft_trans_gc_elem_add(gc, rbe_prev);
}
- nft_rbtree_gc_remove(net, set, priv, rbe);
+ nft_rbtree_gc_elem_remove(net, set, priv, rbe);
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (WARN_ON_ONCE(!gc))
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
nft_trans_gc_elem_add(gc, rbe);
nft_trans_gc_queue_sync_done(gc);
- return 0;
+ return rbe_prev;
}
static bool nft_rbtree_update_first(const struct nft_set *set,
@@ -307,13 +306,15 @@ static bool nft_rbtree_update_first(const struct nft_set *set,
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
- struct nft_set_ext **ext)
+ struct nft_elem_priv **elem_priv)
{
struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
struct rb_node *node, *next, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
+ u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
- int d, err;
+ u64 tstamp = nft_net_tstamp(net);
+ int d;
/* Descend the tree to search for an existing element greater than the
* key value to insert that is greater than the new element. This is the
@@ -357,11 +358,19 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
if (!nft_set_elem_active(&rbe->ext, genmask))
continue;
- /* perform garbage collection to avoid bogus overlap reports. */
- if (nft_set_elem_expired(&rbe->ext)) {
- err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
- if (err < 0)
- return err;
+ /* perform garbage collection to avoid bogus overlap reports
+ * but skip new elements in this transaction.
+ */
+ if (__nft_set_elem_expired(&rbe->ext, tstamp) &&
+ nft_set_elem_active(&rbe->ext, cur_genmask)) {
+ const struct nft_rbtree_elem *removed_end;
+
+ removed_end = nft_rbtree_gc_elem(set, priv, rbe);
+ if (IS_ERR(removed_end))
+ return PTR_ERR(removed_end);
+
+ if (removed_end == rbe_le || removed_end == rbe_ge)
+ return -EAGAIN;
continue;
}
@@ -415,7 +424,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*/
if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
- *ext = &rbe_ge->ext;
+ *elem_priv = &rbe_ge->priv;
return -EEXIST;
}
@@ -424,7 +433,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*/
if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) &&
nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) {
- *ext = &rbe_le->ext;
+ *elem_priv = &rbe_le->priv;
return -EEXIST;
}
@@ -476,62 +485,74 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext)
+ struct nft_elem_priv **elem_priv)
{
+ struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv);
struct nft_rbtree *priv = nft_set_priv(set);
- struct nft_rbtree_elem *rbe = elem->priv;
int err;
+ do {
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
+ cond_resched();
+
+ write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
+ err = __nft_rbtree_insert(net, set, rbe, elem_priv);
+ write_seqcount_end(&priv->count);
+ write_unlock_bh(&priv->lock);
+ } while (err == -EAGAIN);
+
+ return err;
+}
+
+static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe)
+{
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
- err = __nft_rbtree_insert(net, set, rbe, ext);
+ rb_erase(&rbe->node, &priv->root);
write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
-
- return err;
}
static void nft_rbtree_remove(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
+ struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
struct nft_rbtree *priv = nft_set_priv(set);
- struct nft_rbtree_elem *rbe = elem->priv;
- write_lock_bh(&priv->lock);
- write_seqcount_begin(&priv->count);
- rb_erase(&rbe->node, &priv->root);
- write_seqcount_end(&priv->count);
- write_unlock_bh(&priv->lock);
+ nft_rbtree_erase(priv, rbe);
}
static void nft_rbtree_activate(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ struct nft_elem_priv *elem_priv)
{
- struct nft_rbtree_elem *rbe = elem->priv;
+ struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &rbe->ext);
}
-static bool nft_rbtree_flush(const struct net *net,
- const struct nft_set *set, void *priv)
+static void nft_rbtree_flush(const struct net *net,
+ const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
{
- struct nft_rbtree_elem *rbe = priv;
+ struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &rbe->ext);
-
- return true;
}
-static void *nft_rbtree_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static struct nft_elem_priv *
+nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
+ struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv);
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
- struct nft_rbtree_elem *rbe, *this = elem->priv;
u8 genmask = nft_genmask_next(net);
+ u64 tstamp = nft_net_tstamp(net);
int d;
while (parent != NULL) {
@@ -552,12 +573,14 @@ static void *nft_rbtree_deactivate(const struct net *net,
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
+ } else if (__nft_set_elem_expired(&rbe->ext, tstamp)) {
+ break;
} else if (!nft_set_elem_active(&rbe->ext, genmask)) {
parent = parent->rb_left;
continue;
}
- nft_rbtree_flush(net, set, rbe);
- return rbe;
+ nft_rbtree_flush(net, set, &rbe->priv);
+ return &rbe->priv;
}
}
return NULL;
@@ -569,7 +592,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
- struct nft_set_elem elem;
struct rb_node *node;
read_lock_bh(&priv->lock);
@@ -581,9 +603,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
if (!nft_set_elem_active(&rbe->ext, iter->genmask))
goto cont;
- elem.priv = rbe;
-
- iter->err = iter->fn(ctx, set, iter, &elem);
+ iter->err = iter->fn(ctx, set, iter, &rbe->priv);
if (iter->err < 0) {
read_unlock_bh(&priv->lock);
return;
@@ -594,46 +614,35 @@ cont:
read_unlock_bh(&priv->lock);
}
-static void nft_rbtree_gc(struct work_struct *work)
+static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ nft_setelem_data_deactivate(net, set, &rbe->priv);
+ nft_rbtree_erase(priv, rbe);
+}
+
+static void nft_rbtree_gc(struct nft_set *set)
{
+ struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
- struct nftables_pernet *nft_net;
- struct nft_rbtree *priv;
+ struct net *net = read_pnet(&set->net);
+ u64 tstamp = nft_net_tstamp(net);
+ struct rb_node *node, *next;
struct nft_trans_gc *gc;
- struct rb_node *node;
- struct nft_set *set;
- unsigned int gc_seq;
- struct net *net;
- priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
- nft_net = nft_pernet(net);
- gc_seq = READ_ONCE(nft_net->gc_seq);
-
- if (nft_set_gc_is_pending(set))
- goto done;
- gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
if (!gc)
- goto done;
-
- write_lock_bh(&priv->lock);
- write_seqcount_begin(&priv->count);
- for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+ return;
- /* Ruleset has been updated, try later. */
- if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
- nft_trans_gc_destroy(gc);
- gc = NULL;
- goto try_later;
- }
+ for (node = rb_first(&priv->root); node ; node = next) {
+ next = rb_next(node);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- if (nft_set_elem_is_dead(&rbe->ext))
- goto dead_elem;
-
/* elements are reversed in the rbtree for historical reasons,
* from highest to lowest value, that is why end element is
* always visited before the start element.
@@ -642,41 +651,37 @@ static void nft_rbtree_gc(struct work_struct *work)
rbe_end = rbe;
continue;
}
- if (!nft_set_elem_expired(&rbe->ext))
+ if (!__nft_set_elem_expired(&rbe->ext, tstamp))
continue;
- nft_set_elem_dead(&rbe->ext);
-
- if (!rbe_end)
- continue;
-
- nft_set_elem_dead(&rbe_end->ext);
-
- gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
goto try_later;
- nft_trans_gc_elem_add(gc, rbe_end);
- rbe_end = NULL;
-dead_elem:
- gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ /* end element needs to be removed first, it has
+ * no timeout extension.
+ */
+ if (rbe_end) {
+ nft_rbtree_gc_remove(net, set, priv, rbe_end);
+ nft_trans_gc_elem_add(gc, rbe_end);
+ rbe_end = NULL;
+ }
+
+ gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
goto try_later;
+ nft_rbtree_gc_remove(net, set, priv, rbe);
nft_trans_gc_elem_add(gc, rbe);
}
- gc = nft_trans_gc_catchall(gc, gc_seq);
-
try_later:
- write_seqcount_end(&priv->count);
- write_unlock_bh(&priv->lock);
- if (gc)
- nft_trans_gc_queue_async_done(gc);
-done:
- queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
- nft_set_gc_interval(set));
+ if (gc) {
+ gc = nft_trans_gc_catchall_sync(gc);
+ nft_trans_gc_queue_sync_done(gc);
+ priv->last_gc = jiffies;
+ }
}
static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
@@ -691,15 +696,12 @@ static int nft_rbtree_init(const struct nft_set *set,
{
struct nft_rbtree *priv = nft_set_priv(set);
+ BUILD_BUG_ON(offsetof(struct nft_rbtree_elem, priv) != 0);
+
rwlock_init(&priv->lock);
seqcount_rwlock_init(&priv->count, &priv->lock);
priv->root = RB_ROOT;
- INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);
- if (set->flags & NFT_SET_TIMEOUT)
- queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
- nft_set_gc_interval(set));
-
return 0;
}
@@ -710,12 +712,10 @@ static void nft_rbtree_destroy(const struct nft_ctx *ctx,
struct nft_rbtree_elem *rbe;
struct rb_node *node;
- cancel_delayed_work_sync(&priv->gc_work);
- rcu_barrier();
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nf_tables_set_elem_destroy(ctx, set, rbe);
+ nf_tables_set_elem_destroy(ctx, set, &rbe->priv);
}
}
@@ -737,6 +737,21 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
+static void nft_rbtree_commit(struct nft_set *set)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+
+ if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
+ nft_rbtree_gc(set);
+}
+
+static void nft_rbtree_gc_init(const struct nft_set *set)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+
+ priv->last_gc = jiffies;
+}
+
const struct nft_set_type nft_set_rbtree_type = {
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
.ops = {
@@ -750,6 +765,8 @@ const struct nft_set_type nft_set_rbtree_type = {
.deactivate = nft_rbtree_deactivate,
.flush = nft_rbtree_flush,
.activate = nft_rbtree_activate,
+ .commit = nft_rbtree_commit,
+ .gc_init = nft_rbtree_gc_init,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
.get = nft_rbtree_get,
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 9ed85be79452..f30163e2ca62 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -242,6 +242,11 @@ static int nft_socket_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN) |
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 13da882669a4..1d737f89dfc1 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -186,7 +186,6 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx,
break;
#endif
case NFPROTO_INET:
- case NFPROTO_BRIDGE:
err = nf_synproxy_ipv4_init(snet, ctx->net);
if (err)
goto nf_ct_failure;
@@ -219,7 +218,6 @@ static void nft_synproxy_do_destroy(const struct nft_ctx *ctx)
break;
#endif
case NFPROTO_INET:
- case NFPROTO_BRIDGE:
nf_synproxy_ipv4_fini(snet, ctx->net);
nf_synproxy_ipv6_fini(snet, ctx->net);
break;
@@ -253,6 +251,11 @@ static int nft_synproxy_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_FORWARD));
}
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index ae15cd693f0e..71412adb73d4 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -316,6 +316,11 @@ static int nft_tproxy_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING);
}
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 9f21953c7433..f735d79d8be5 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -713,6 +713,7 @@ static const struct nft_object_ops nft_tunnel_obj_ops = {
static struct nft_object_type nft_tunnel_obj_type __read_mostly = {
.type = NFT_OBJECT_TUNNEL,
+ .family = NFPROTO_NETDEV,
.ops = &nft_tunnel_obj_ops,
.maxattr = NFTA_TUNNEL_KEY_MAX,
.policy = nft_tunnel_key_policy,
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index 452f8587adda..1c866757db55 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -235,6 +235,11 @@ static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *e
const struct nft_xfrm *priv = nft_expr_priv(expr);
unsigned int hooks;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
switch (priv->dir) {
case XFRM_POLICY_IN:
hooks = (1 << NF_INET_FORWARD) |
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index e85ce69924ae..50332888c8d2 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -76,18 +76,23 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
*/
return false;
- filp = sk->sk_socket->file;
- if (filp == NULL)
+ read_lock_bh(&sk->sk_callback_lock);
+ filp = sk->sk_socket ? sk->sk_socket->file : NULL;
+ if (filp == NULL) {
+ read_unlock_bh(&sk->sk_callback_lock);
return ((info->match ^ info->invert) &
(XT_OWNER_UID | XT_OWNER_GID)) == 0;
+ }
if (info->match & XT_OWNER_UID) {
kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
if ((uid_gte(filp->f_cred->fsuid, uid_min) &&
uid_lte(filp->f_cred->fsuid, uid_max)) ^
- !(info->invert & XT_OWNER_UID))
+ !(info->invert & XT_OWNER_UID)) {
+ read_unlock_bh(&sk->sk_callback_lock);
return false;
+ }
}
if (info->match & XT_OWNER_GID) {
@@ -112,10 +117,13 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
}
}
- if (match ^ !(info->invert & XT_OWNER_GID))
+ if (match ^ !(info->invert & XT_OWNER_GID)) {
+ read_unlock_bh(&sk->sk_callback_lock);
return false;
+ }
}
+ read_unlock_bh(&sk->sk_callback_lock);
return true;
}
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index ec6ed6fda96c..343e65f377d4 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -59,7 +59,7 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
(!!outdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
return false;
- physdev = nf_bridge_get_physindev(skb);
+ physdev = nf_bridge_get_physindev(skb, xt_net(par));
indev = physdev ? physdev->name : NULL;
if ((info->bitmask & XT_PHYSDEV_OP_ISIN &&
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 7ddb9a78e3fc..ef93e0d3bee0 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -561,7 +561,7 @@ recent_mt_proc_write(struct file *file, const char __user *input,
{
struct recent_table *t = pde_data(file_inode(file));
struct recent_entry *e;
- char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
+ char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:255.255.255.255")];
const char *c = buf;
union nf_inet_addr addr = {};
u_int16_t family;
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index e8961094a282..b46a6a512058 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -149,6 +149,8 @@ static int sctp_mt_check(const struct xt_mtchk_param *par)
{
const struct xt_sctp_info *info = par->matchinfo;
+ if (info->flag_count > ARRAY_SIZE(info->flag_info))
+ return -EINVAL;
if (info->flags & ~XT_SCTP_VALID_FLAGS)
return -EINVAL;
if (info->invflags & ~XT_SCTP_VALID_FLAGS)
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index 177b40d08098..117d4615d668 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -96,11 +96,32 @@ static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par)
return ret ^ data->invert;
}
+static int u32_mt_checkentry(const struct xt_mtchk_param *par)
+{
+ const struct xt_u32 *data = par->matchinfo;
+ const struct xt_u32_test *ct;
+ unsigned int i;
+
+ if (data->ntests > ARRAY_SIZE(data->tests))
+ return -EINVAL;
+
+ for (i = 0; i < data->ntests; ++i) {
+ ct = &data->tests[i];
+
+ if (ct->nnums > ARRAY_SIZE(ct->location) ||
+ ct->nvalues > ARRAY_SIZE(ct->value))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static struct xt_match xt_u32_mt_reg __read_mostly = {
.name = "u32",
.revision = 0,
.family = NFPROTO_UNSPEC,
.match = u32_mt,
+ .checkentry = u32_mt_checkentry,
.matchsize = sizeof(struct xt_u32),
.me = THIS_MODULE,
};
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index f1d5b8465217..a07c2216d28b 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -54,6 +54,28 @@ static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = {
[NLBL_CALIPSO_A_MTYPE] = { .type = NLA_U32 },
};
+static const struct netlbl_calipso_ops *calipso_ops;
+
+/**
+ * netlbl_calipso_ops_register - Register the CALIPSO operations
+ * @ops: ops to register
+ *
+ * Description:
+ * Register the CALIPSO packet engine operations.
+ *
+ */
+const struct netlbl_calipso_ops *
+netlbl_calipso_ops_register(const struct netlbl_calipso_ops *ops)
+{
+ return xchg(&calipso_ops, ops);
+}
+EXPORT_SYMBOL(netlbl_calipso_ops_register);
+
+static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void)
+{
+ return READ_ONCE(calipso_ops);
+}
+
/* NetLabel Command Handlers
*/
/**
@@ -96,15 +118,18 @@ static int netlbl_calipso_add_pass(struct genl_info *info,
*
*/
static int netlbl_calipso_add(struct sk_buff *skb, struct genl_info *info)
-
{
int ret_val = -EINVAL;
struct netlbl_audit audit_info;
+ const struct netlbl_calipso_ops *ops = netlbl_calipso_ops_get();
if (!info->attrs[NLBL_CALIPSO_A_DOI] ||
!info->attrs[NLBL_CALIPSO_A_MTYPE])
return -EINVAL;
+ if (!ops)
+ return -EOPNOTSUPP;
+
netlbl_netlink_auditinfo(&audit_info);
switch (nla_get_u32(info->attrs[NLBL_CALIPSO_A_MTYPE])) {
case CALIPSO_MAP_PASS:
@@ -363,28 +388,6 @@ int __init netlbl_calipso_genl_init(void)
return genl_register_family(&netlbl_calipso_gnl_family);
}
-static const struct netlbl_calipso_ops *calipso_ops;
-
-/**
- * netlbl_calipso_ops_register - Register the CALIPSO operations
- * @ops: ops to register
- *
- * Description:
- * Register the CALIPSO packet engine operations.
- *
- */
-const struct netlbl_calipso_ops *
-netlbl_calipso_ops_register(const struct netlbl_calipso_ops *ops)
-{
- return xchg(&calipso_ops, ops);
-}
-EXPORT_SYMBOL(netlbl_calipso_ops_register);
-
-static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void)
-{
- return READ_ONCE(calipso_ops);
-}
-
/**
* calipso_doi_add - Add a new DOI to the CALIPSO protocol engine
* @doi_def: the DOI structure
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 642b9d382fb4..9c962347cf85 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -352,7 +352,7 @@ static void netlink_overrun(struct sock *sk)
if (!nlk_test_bit(RECV_NO_ENOBUFS, sk)) {
if (!test_and_set_bit(NETLINK_S_CONGESTED,
&nlk_sk(sk)->state)) {
- sk->sk_err = ENOBUFS;
+ WRITE_ONCE(sk->sk_err, ENOBUFS);
sk_error_report(sk);
}
}
@@ -374,7 +374,7 @@ static void netlink_skb_destructor(struct sk_buff *skb)
if (is_vmalloc_addr(skb->head)) {
if (!skb->cloned ||
!atomic_dec_return(&(skb_shinfo(skb)->dataref)))
- vfree(skb->head);
+ vfree_atomic(skb->head);
skb->head = NULL;
}
@@ -1204,8 +1204,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
return sock;
}
-static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
- int broadcast)
+struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
{
struct sk_buff *skb;
void *data;
@@ -1520,8 +1519,7 @@ out:
int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
u32 portid,
u32 group, gfp_t allocation,
- int (*filter)(struct sock *dsk,
- struct sk_buff *skb, void *data),
+ netlink_filter_fn filter,
void *filter_data)
{
struct net *net = sock_net(ssk);
@@ -1605,7 +1603,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
goto out;
}
- sk->sk_err = p->code;
+ WRITE_ONCE(sk->sk_err, p->code);
sk_error_report(sk);
out:
return ret;
@@ -1991,7 +1989,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
ret = netlink_dump(sk);
if (ret) {
- sk->sk_err = -ret;
+ WRITE_ONCE(sk->sk_err, -ret);
sk_error_report(sk);
}
}
@@ -2511,7 +2509,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
err_bad_put:
nlmsg_free(skb);
err_skb:
- NETLINK_CB(in_skb).sk->sk_err = ENOBUFS;
+ WRITE_ONCE(NETLINK_CB(in_skb).sk->sk_err, ENOBUFS);
sk_error_report(NETLINK_CB(in_skb).sk);
}
EXPORT_SYMBOL(netlink_ack);
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 9c4f231be275..1eeff9422856 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -257,5 +257,6 @@ static void __exit netlink_diag_exit(void)
module_init(netlink_diag_init);
module_exit(netlink_diag_exit);
+MODULE_DESCRIPTION("Netlink-based socket monitoring/diagnostic interface (sock_diag)");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 16 /* AF_NETLINK */);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 8315d31b53db..8c7af02f8454 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -225,7 +225,8 @@ static void genl_op_from_split(struct genl_op_iter *iter)
}
if (i + cnt < family->n_split_ops &&
- family->split_ops[i + cnt].flags & GENL_CMD_CAP_DUMP) {
+ family->split_ops[i + cnt].flags & GENL_CMD_CAP_DUMP &&
+ (!cnt || family->split_ops[i + cnt].cmd == iter->doit.cmd)) {
iter->dumpit = family->split_ops[i + cnt];
genl_op_fill_in_reject_policy_split(family, &iter->dumpit);
cnt++;
@@ -630,6 +631,138 @@ static int genl_validate_ops(const struct genl_family *family)
return 0;
}
+static void *genl_sk_priv_alloc(struct genl_family *family)
+{
+ void *priv;
+
+ priv = kzalloc(family->sock_priv_size, GFP_KERNEL);
+ if (!priv)
+ return ERR_PTR(-ENOMEM);
+
+ if (family->sock_priv_init)
+ family->sock_priv_init(priv);
+
+ return priv;
+}
+
+static void genl_sk_priv_free(const struct genl_family *family, void *priv)
+{
+ if (family->sock_priv_destroy)
+ family->sock_priv_destroy(priv);
+ kfree(priv);
+}
+
+static int genl_sk_privs_alloc(struct genl_family *family)
+{
+ if (!family->sock_priv_size)
+ return 0;
+
+ family->sock_privs = kzalloc(sizeof(*family->sock_privs), GFP_KERNEL);
+ if (!family->sock_privs)
+ return -ENOMEM;
+ xa_init(family->sock_privs);
+ return 0;
+}
+
+static void genl_sk_privs_free(const struct genl_family *family)
+{
+ unsigned long id;
+ void *priv;
+
+ if (!family->sock_priv_size)
+ return;
+
+ xa_for_each(family->sock_privs, id, priv)
+ genl_sk_priv_free(family, priv);
+
+ xa_destroy(family->sock_privs);
+ kfree(family->sock_privs);
+}
+
+static void genl_sk_priv_free_by_sock(struct genl_family *family,
+ struct sock *sk)
+{
+ void *priv;
+
+ if (!family->sock_priv_size)
+ return;
+ priv = xa_erase(family->sock_privs, (unsigned long) sk);
+ if (!priv)
+ return;
+ genl_sk_priv_free(family, priv);
+}
+
+static void genl_release(struct sock *sk, unsigned long *groups)
+{
+ struct genl_family *family;
+ unsigned int id;
+
+ down_read(&cb_lock);
+
+ idr_for_each_entry(&genl_fam_idr, family, id)
+ genl_sk_priv_free_by_sock(family, sk);
+
+ up_read(&cb_lock);
+}
+
+/**
+ * __genl_sk_priv_get - Get family private pointer for socket, if exists
+ *
+ * @family: family
+ * @sk: socket
+ *
+ * Lookup a private memory for a Generic netlink family and specified socket.
+ *
+ * Caller should make sure this is called in RCU read locked section.
+ *
+ * Return: valid pointer on success, otherwise negative error value
+ * encoded by ERR_PTR(), NULL in case priv does not exist.
+ */
+void *__genl_sk_priv_get(struct genl_family *family, struct sock *sk)
+{
+ if (WARN_ON_ONCE(!family->sock_privs))
+ return ERR_PTR(-EINVAL);
+ return xa_load(family->sock_privs, (unsigned long) sk);
+}
+
+/**
+ * genl_sk_priv_get - Get family private pointer for socket
+ *
+ * @family: family
+ * @sk: socket
+ *
+ * Lookup a private memory for a Generic netlink family and specified socket.
+ * Allocate the private memory in case it was not already done.
+ *
+ * Return: valid pointer on success, otherwise negative error value
+ * encoded by ERR_PTR().
+ */
+void *genl_sk_priv_get(struct genl_family *family, struct sock *sk)
+{
+ void *priv, *old_priv;
+
+ priv = __genl_sk_priv_get(family, sk);
+ if (priv)
+ return priv;
+
+ /* priv for the family does not exist so far, create it. */
+
+ priv = genl_sk_priv_alloc(family);
+ if (IS_ERR(priv))
+ return ERR_CAST(priv);
+
+ old_priv = xa_cmpxchg(family->sock_privs, (unsigned long) sk, NULL,
+ priv, GFP_KERNEL);
+ if (old_priv) {
+ genl_sk_priv_free(family, priv);
+ if (xa_is_err(old_priv))
+ return ERR_PTR(xa_err(old_priv));
+ /* Race happened, priv for the socket was already inserted. */
+ return old_priv;
+ }
+ return priv;
+}
+
/**
* genl_register_family - register a generic netlink family
* @family: generic netlink family
@@ -658,6 +791,10 @@ int genl_register_family(struct genl_family *family)
goto errout_locked;
}
+ err = genl_sk_privs_alloc(family);
+ if (err)
+ goto errout_locked;
+
/*
* Sadly, a few cases need to be special-cased
* due to them having previously abused the API
@@ -678,7 +815,7 @@ int genl_register_family(struct genl_family *family)
start, end + 1, GFP_KERNEL);
if (family->id < 0) {
err = family->id;
- goto errout_locked;
+ goto errout_sk_privs_free;
}
err = genl_validate_assign_mc_groups(family);
@@ -697,6 +834,8 @@ int genl_register_family(struct genl_family *family)
errout_remove:
idr_remove(&genl_fam_idr, family->id);
+errout_sk_privs_free:
+ genl_sk_privs_free(family);
errout_locked:
genl_unlock_all();
return err;
@@ -727,6 +866,9 @@ int genl_unregister_family(const struct genl_family *family)
up_write(&cb_lock);
wait_event(genl_sk_destructing_waitq,
atomic_read(&genl_sk_destructing_cnt) == 0);
+
+ genl_sk_privs_free(family);
+
genl_unlock();
genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0);
@@ -1687,9 +1829,12 @@ static int genl_bind(struct net *net, int group)
continue;
grp = &family->mcgrps[i];
- if ((grp->flags & GENL_UNS_ADMIN_PERM) &&
+ if ((grp->flags & GENL_MCAST_CAP_NET_ADMIN) &&
!ns_capable(net->user_ns, CAP_NET_ADMIN))
ret = -EPERM;
+ if ((grp->flags & GENL_MCAST_CAP_SYS_ADMIN) &&
+ !ns_capable(net->user_ns, CAP_SYS_ADMIN))
+ ret = -EPERM;
break;
}
@@ -1704,6 +1849,7 @@ static int __net_init genl_pernet_init(struct net *net)
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
.bind = genl_bind,
+ .release = genl_release,
};
/* we'll bump the group number right afterwards */
diff --git a/net/netlink/policy.c b/net/netlink/policy.c
index 87e3de0fde89..1f8909c16f14 100644
--- a/net/netlink/policy.c
+++ b/net/netlink/policy.c
@@ -21,7 +21,7 @@ struct netlink_policy_dump_state {
struct {
const struct nla_policy *policy;
unsigned int maxtype;
- } policies[];
+ } policies[] __counted_by(n_alloc);
};
static int add_policy(struct netlink_policy_dump_state **statep,
@@ -29,7 +29,7 @@ static int add_policy(struct netlink_policy_dump_state **statep,
unsigned int maxtype)
{
struct netlink_policy_dump_state *state = *statep;
- unsigned int n_alloc, i;
+ unsigned int old_n_alloc, n_alloc, i;
if (!policy || !maxtype)
return 0;
@@ -52,12 +52,13 @@ static int add_policy(struct netlink_policy_dump_state **statep,
if (!state)
return -ENOMEM;
- memset(&state->policies[state->n_alloc], 0,
- flex_array_size(state, policies, n_alloc - state->n_alloc));
-
- state->policies[state->n_alloc].policy = policy;
- state->policies[state->n_alloc].maxtype = maxtype;
+ old_n_alloc = state->n_alloc;
state->n_alloc = n_alloc;
+ memset(&state->policies[old_n_alloc], 0,
+ flex_array_size(state, policies, n_alloc - old_n_alloc));
+
+ state->policies[old_n_alloc].policy = policy;
+ state->policies[old_n_alloc].maxtype = maxtype;
*statep = state;
return 0;
@@ -229,6 +230,8 @@ int netlink_policy_dump_attr_size_estimate(const struct nla_policy *pt)
case NLA_S16:
case NLA_S32:
case NLA_S64:
+ case NLA_SINT:
+ case NLA_UINT:
/* maximum is common, u64 min/max with padding */
return common +
2 * (nla_attr_size(0) + nla_attr_size(sizeof(u64)));
@@ -287,6 +290,7 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state,
case NLA_U16:
case NLA_U32:
case NLA_U64:
+ case NLA_UINT:
case NLA_MSECS: {
struct netlink_range_validation range;
@@ -296,8 +300,10 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state,
type = NL_ATTR_TYPE_U16;
else if (pt->type == NLA_U32)
type = NL_ATTR_TYPE_U32;
- else
+ else if (pt->type == NLA_U64)
type = NL_ATTR_TYPE_U64;
+ else
+ type = NL_ATTR_TYPE_UINT;
if (pt->validation_type == NLA_VALIDATE_MASK) {
if (nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MASK,
@@ -319,7 +325,8 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state,
case NLA_S8:
case NLA_S16:
case NLA_S32:
- case NLA_S64: {
+ case NLA_S64:
+ case NLA_SINT: {
struct netlink_range_validation_signed range;
if (pt->type == NLA_S8)
@@ -328,8 +335,10 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state,
type = NL_ATTR_TYPE_S16;
else if (pt->type == NLA_S32)
type = NL_ATTR_TYPE_S32;
- else
+ else if (pt->type == NLA_S64)
type = NL_ATTR_TYPE_S64;
+ else
+ type = NL_ATTR_TYPE_SINT;
nla_get_range_signed(pt, &range);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 96e91ab71573..0eed00184adf 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -487,7 +487,7 @@ static struct sock *nr_make_new(struct sock *osk)
sock_init_data(NULL, sk);
sk->sk_type = osk->sk_type;
- sk->sk_priority = osk->sk_priority;
+ sk->sk_priority = READ_ONCE(osk->sk_priority);
sk->sk_protocol = osk->sk_protocol;
sk->sk_rcvbuf = osk->sk_rcvbuf;
sk->sk_sndbuf = osk->sk_sndbuf;
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index d63d2e5dc60c..dae378f1d52b 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -858,4 +858,5 @@ void nfc_digital_unregister_device(struct nfc_digital_dev *ddev)
}
EXPORT_SYMBOL(nfc_digital_unregister_device);
+MODULE_DESCRIPTION("NFC Digital protocol stack");
MODULE_LICENSE("GPL");
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index f60e424e0607..18be13fb9b75 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -145,6 +145,13 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool device,
static struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
{
+ /* Since using nfc_llcp_local may result in usage of nfc_dev, whenever
+ * we hold a reference to local, we also need to hold a reference to
+ * the device to avoid UAF.
+ */
+ if (!nfc_get_device(local->dev->idx))
+ return NULL;
+
kref_get(&local->ref);
return local;
@@ -177,10 +184,18 @@ static void local_release(struct kref *ref)
int nfc_llcp_local_put(struct nfc_llcp_local *local)
{
+ struct nfc_dev *dev;
+ int ret;
+
if (local == NULL)
return 0;
- return kref_put(&local->ref, local_release);
+ dev = local->dev;
+
+ ret = kref_put(&local->ref, local_release);
+ nfc_put_device(dev);
+
+ return ret;
}
static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local,
@@ -203,17 +218,13 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local,
if (tmp_sock->ssap == ssap && tmp_sock->dsap == dsap) {
llcp_sock = tmp_sock;
+ sock_hold(&llcp_sock->sk);
break;
}
}
read_unlock(&local->sockets.lock);
- if (llcp_sock == NULL)
- return NULL;
-
- sock_hold(&llcp_sock->sk);
-
return llcp_sock;
}
@@ -346,7 +357,8 @@ static int nfc_llcp_wks_sap(const char *service_name, size_t service_name_len)
static
struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local,
- const u8 *sn, size_t sn_len)
+ const u8 *sn, size_t sn_len,
+ bool needref)
{
struct sock *sk;
struct nfc_llcp_sock *llcp_sock, *tmp_sock;
@@ -382,6 +394,8 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local,
if (memcmp(sn, tmp_sock->service_name, sn_len) == 0) {
llcp_sock = tmp_sock;
+ if (needref)
+ sock_hold(&llcp_sock->sk);
break;
}
}
@@ -423,7 +437,8 @@ u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local,
* to this service name.
*/
if (nfc_llcp_sock_from_sn(local, sock->service_name,
- sock->service_name_len) != NULL) {
+ sock->service_name_len,
+ false) != NULL) {
mutex_unlock(&local->sdp_lock);
return LLCP_SAP_MAX;
@@ -824,16 +839,7 @@ out:
static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local,
const u8 *sn, size_t sn_len)
{
- struct nfc_llcp_sock *llcp_sock;
-
- llcp_sock = nfc_llcp_sock_from_sn(local, sn, sn_len);
-
- if (llcp_sock == NULL)
- return NULL;
-
- sock_hold(&llcp_sock->sk);
-
- return llcp_sock;
+ return nfc_llcp_sock_from_sn(local, sn, sn_len, true);
}
static const u8 *nfc_llcp_connect_sn(const struct sk_buff *skb, size_t *sn_len)
@@ -968,8 +974,17 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
}
new_sock = nfc_llcp_sock(new_sk);
- new_sock->dev = local->dev;
+
new_sock->local = nfc_llcp_local_get(local);
+ if (!new_sock->local) {
+ reason = LLCP_DM_REJ;
+ sock_put(&new_sock->sk);
+ release_sock(&sock->sk);
+ sock_put(&sock->sk);
+ goto fail;
+ }
+
+ new_sock->dev = local->dev;
new_sock->rw = sock->rw;
new_sock->miux = sock->miux;
new_sock->nfc_protocol = sock->nfc_protocol;
@@ -1298,7 +1313,8 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
}
llcp_sock = nfc_llcp_sock_from_sn(local, service_name,
- service_name_len);
+ service_name_len,
+ true);
if (!llcp_sock) {
sap = 0;
goto add_snl;
@@ -1318,6 +1334,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
if (sap == LLCP_SAP_MAX) {
sap = 0;
+ nfc_llcp_sock_put(llcp_sock);
goto add_snl;
}
@@ -1335,6 +1352,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
pr_debug("%p %d\n", llcp_sock, sap);
+ nfc_llcp_sock_put(llcp_sock);
add_snl:
sdp = nfc_llcp_build_sdres_tlv(tid, sap);
if (sdp == NULL)
@@ -1603,7 +1621,16 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
if (local == NULL)
return -ENOMEM;
- local->dev = ndev;
+ /* As we are going to initialize local's refcount, we need to get the
+ * nfc_dev to avoid UAF, otherwise there is no point in continuing.
+ * See nfc_llcp_local_get().
+ */
+ local->dev = nfc_get_device(ndev->idx);
+ if (!local->dev) {
+ kfree(local);
+ return -ENODEV;
+ }
+
INIT_LIST_HEAD(&local->list);
kref_init(&local->ref);
mutex_init(&local->sdp_lock);
@@ -1636,7 +1663,9 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0);
INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
+ spin_lock(&llcp_devices_lock);
list_add(&local->list, &llcp_devices);
+ spin_unlock(&llcp_devices_lock);
return 0;
}
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 645677f84dba..819157bbb5a2 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -796,6 +796,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg,
}
if (sk->sk_type == SOCK_DGRAM) {
+ if (sk->sk_state != LLCP_BOUND) {
+ release_sock(sk);
+ return -ENOTCONN;
+ }
+
DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr,
msg->msg_name);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index fff755dde30d..cdad47b140fa 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -909,6 +909,11 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
return -EINVAL;
}
+ if (protocol >= NFC_PROTO_MAX) {
+ pr_err("the requested nfc protocol is invalid\n");
+ return -EINVAL;
+ }
+
if (!(nci_target->supported_protocols & (1 << protocol))) {
pr_err("target does not support the requested protocol 0x%x\n",
protocol);
@@ -1203,6 +1208,10 @@ void nci_free_device(struct nci_dev *ndev)
{
nfc_free_device(ndev->nfc_dev);
nci_hci_deallocate(ndev);
+
+ /* drop partial rx data packet if present */
+ if (ndev->rx_data_reassembly)
+ kfree_skb(ndev->rx_data_reassembly);
kfree(ndev);
}
EXPORT_SYMBOL(nci_free_device);
@@ -1572,4 +1581,5 @@ static void nci_cmd_work(struct work_struct *work)
}
}
+MODULE_DESCRIPTION("NFC Controller Interface");
MODULE_LICENSE("GPL");
diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c
index 0935527d1d12..6a93533c480e 100644
--- a/net/nfc/nci/spi.c
+++ b/net/nfc/nci/spi.c
@@ -151,6 +151,8 @@ static int send_acknowledge(struct nci_spi *nspi, u8 acknowledge)
int ret;
skb = nci_skb_alloc(nspi->ndev, 0, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
/* add the NCI SPI header to the start of the buffer */
hdr = skb_push(skb, NCI_SPI_HDR_LEN);
@@ -317,4 +319,5 @@ done:
}
EXPORT_SYMBOL_GPL(nci_spi_read);
+MODULE_DESCRIPTION("NFC Controller Interface (NCI) SPI link layer");
MODULE_LICENSE("GPL");
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index fd66014d8a76..6fcd7e2ca81f 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -311,11 +311,18 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
return 0;
}
-static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
- const struct nshhdr *nh)
+static noinline_for_stack int push_nsh(struct sk_buff *skb,
+ struct sw_flow_key *key,
+ const struct nlattr *a)
{
+ u8 buffer[NSH_HDR_MAX_LEN];
+ struct nshhdr *nh = (struct nshhdr *)buffer;
int err;
+ err = nsh_hdr_from_nlattr(a, nh, NSH_HDR_MAX_LEN);
+ if (err)
+ return err;
+
err = nsh_push(skb, nh);
if (err)
return err;
@@ -873,7 +880,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
prepare_frag(vport, skb, orig_network_offset,
ovs_key_mac_proto(key));
- dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
+ dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL,
DST_OBSOLETE_NONE, DST_NOCOUNT);
ovs_rt.dst.dev = vport->dev;
@@ -890,7 +897,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
prepare_frag(vport, skb, orig_network_offset,
ovs_key_mac_proto(key));
memset(&ovs_rt, 0, sizeof(ovs_rt));
- dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
+ dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL,
DST_OBSOLETE_NONE, DST_NOCOUNT);
ovs_rt.dst.dev = vport->dev;
@@ -1439,17 +1446,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = pop_eth(skb, key);
break;
- case OVS_ACTION_ATTR_PUSH_NSH: {
- u8 buffer[NSH_HDR_MAX_LEN];
- struct nshhdr *nh = (struct nshhdr *)buffer;
-
- err = nsh_hdr_from_nlattr(nla_data(a), nh,
- NSH_HDR_MAX_LEN);
- if (unlikely(err))
- break;
- err = push_nsh(skb, key, nh);
+ case OVS_ACTION_ATTR_PUSH_NSH:
+ err = push_nsh(skb, key, nla_data(a));
break;
- }
case OVS_ACTION_ATTR_POP_NSH:
err = pop_nsh(skb, key);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 0b9a785dea45..3019a4406ca4 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -985,7 +985,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
if (err)
return err;
- nf_conn_act_ct_ext_add(ct);
+ nf_conn_act_ct_ext_add(skb, ct, ctinfo);
} else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
labels_nonzero(&info->labels.mask)) {
err = ovs_ct_set_labels(ct, key, &info->labels.value,
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 4f3b1798e0b2..d108ae0bd0ee 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -220,16 +220,13 @@ static struct mask_array *tbl_mask_array_alloc(int size)
struct mask_array *new;
size = max(MASK_ARRAY_SIZE_MIN, size);
- new = kzalloc(sizeof(struct mask_array) +
- sizeof(struct sw_flow_mask *) * size +
+ new = kzalloc(struct_size(new, masks, size) +
sizeof(u64) * size, GFP_KERNEL);
if (!new)
return NULL;
new->masks_usage_zero_cntr = (u64 *)((u8 *)new +
- sizeof(struct mask_array) +
- sizeof(struct sw_flow_mask *) *
- size);
+ struct_size(new, masks, size));
new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) +
sizeof(u64) * size,
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 9e659db78c05..f524dc3e4862 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -48,7 +48,7 @@ struct mask_array {
int count, max;
struct mask_array_stats __percpu *masks_usage_stats;
u64 *masks_usage_zero_cntr;
- struct sw_flow_mask __rcu *masks[];
+ struct sw_flow_mask __rcu *masks[] __counted_by(max);
};
struct table_instance {
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index 0c33889a8515..ed11cd12b512 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -39,13 +39,13 @@ struct dp_meter {
u32 max_delta_t;
u64 used;
struct ovs_flow_stats stats;
- struct dp_meter_band bands[];
+ struct dp_meter_band bands[] __counted_by(n_bands);
};
struct dp_meter_instance {
struct rcu_head rcu;
u32 n_meters;
- struct dp_meter __rcu *dp_meters[];
+ struct dp_meter __rcu *dp_meters[] __counted_by(n_meters);
};
struct dp_meter_table {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8f97648d652f..c9bbc2686690 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2121,13 +2121,13 @@ static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
+ enum skb_drop_reason drop_reason = SKB_CONSUMED;
struct sock *sk;
struct sockaddr_ll *sll;
struct packet_sock *po;
u8 *skb_head = skb->data;
int skb_len = skb->len;
unsigned int snaplen, res;
- bool is_drop_n_account = false;
if (skb->pkt_type == PACKET_LOOPBACK)
goto drop;
@@ -2217,9 +2217,9 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
return 0;
drop_n_acct:
- is_drop_n_account = true;
atomic_inc(&po->tp_drops);
atomic_inc(&sk->sk_drops);
+ drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR;
drop_n_restore:
if (skb_head != skb->data && skb_shared(skb)) {
@@ -2227,16 +2227,14 @@ drop_n_restore:
skb->len = skb_len;
}
drop:
- if (!is_drop_n_account)
- consume_skb(skb);
- else
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return 0;
}
static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
+ enum skb_drop_reason drop_reason = SKB_CONSUMED;
struct sock *sk;
struct packet_sock *po;
struct sockaddr_ll *sll;
@@ -2250,7 +2248,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct sk_buff *copy_skb = NULL;
struct timespec64 ts;
__u32 ts_status;
- bool is_drop_n_account = false;
unsigned int slot_id = 0;
int vnet_hdr_sz = 0;
@@ -2498,19 +2495,16 @@ drop_n_restore:
skb->len = skb_len;
}
drop:
- if (!is_drop_n_account)
- consume_skb(skb);
- else
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return 0;
drop_n_account:
spin_unlock(&sk->sk_receive_queue.lock);
atomic_inc(&po->tp_drops);
- is_drop_n_account = true;
+ drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR;
sk->sk_data_ready(sk);
- kfree_skb(copy_skb);
+ kfree_skb_reason(copy_skb, drop_reason);
goto drop_n_restore;
}
@@ -3607,7 +3601,12 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
if (dev) {
sll->sll_hatype = dev->type;
sll->sll_halen = dev->addr_len;
- memcpy(sll->sll_addr_flex, dev->dev_addr, dev->addr_len);
+
+ /* Let __fortify_memcpy_chk() know the actual buffer size. */
+ memcpy(((struct sockaddr_storage *)sll)->__data +
+ offsetof(struct sockaddr_ll, sll_addr) -
+ offsetofend(struct sockaddr_ll, sll_family),
+ dev->dev_addr, dev->addr_len);
} else {
sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
sll->sll_halen = 0;
@@ -4295,7 +4294,7 @@ static void packet_mm_open(struct vm_area_struct *vma)
struct sock *sk = sock->sk;
if (sk)
- atomic_inc(&pkt_sk(sk)->mapped);
+ atomic_long_inc(&pkt_sk(sk)->mapped);
}
static void packet_mm_close(struct vm_area_struct *vma)
@@ -4305,7 +4304,7 @@ static void packet_mm_close(struct vm_area_struct *vma)
struct sock *sk = sock->sk;
if (sk)
- atomic_dec(&pkt_sk(sk)->mapped);
+ atomic_long_dec(&pkt_sk(sk)->mapped);
}
static const struct vm_operations_struct packet_mmap_ops = {
@@ -4400,7 +4399,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
err = -EBUSY;
if (!closing) {
- if (atomic_read(&po->mapped))
+ if (atomic_long_read(&po->mapped))
goto out;
if (packet_read_pending(rb))
goto out;
@@ -4503,7 +4502,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
err = -EBUSY;
mutex_lock(&po->pg_vec_lock);
- if (closing || atomic_read(&po->mapped) == 0) {
+ if (closing || atomic_long_read(&po->mapped) == 0) {
err = 0;
spin_lock_bh(&rb_queue->lock);
swap(rb->pg_vec, pg_vec);
@@ -4521,9 +4520,9 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
po->prot_hook.func = (po->rx_ring.pg_vec) ?
tpacket_rcv : packet_rcv;
skb_queue_purge(rb_queue);
- if (atomic_read(&po->mapped))
- pr_err("packet_mmap: vma is busy: %d\n",
- atomic_read(&po->mapped));
+ if (atomic_long_read(&po->mapped))
+ pr_err("packet_mmap: vma is busy: %ld\n",
+ atomic_long_read(&po->mapped));
}
mutex_unlock(&po->pg_vec_lock);
@@ -4601,7 +4600,7 @@ static int packet_mmap(struct file *file, struct socket *sock,
}
}
- atomic_inc(&po->mapped);
+ atomic_long_inc(&po->mapped);
vma->vm_ops = &packet_mmap_ops;
err = 0;
@@ -4782,5 +4781,6 @@ out:
module_init(packet_init);
module_exit(packet_exit);
+MODULE_DESCRIPTION("Packet socket support (AF_PACKET)");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_PACKET);
diff --git a/net/packet/diag.c b/net/packet/diag.c
index f6b200cb3c06..9a7980e3309d 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -262,4 +262,5 @@ static void __exit packet_diag_exit(void)
module_init(packet_diag_init);
module_exit(packet_diag_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("PACKET socket monitoring via SOCK_DIAG");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 17 /* AF_PACKET */);
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 63f4865202c1..d5d70712007a 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -94,7 +94,7 @@ struct packet_fanout {
spinlock_t lock;
refcount_t sk_ref;
struct packet_type prot_hook ____cacheline_aligned_in_smp;
- struct sock __rcu *arr[];
+ struct sock __rcu *arr[] __counted_by(max_num_members);
};
struct packet_rollover {
@@ -122,7 +122,7 @@ struct packet_sock {
__be16 num;
struct packet_rollover *rollover;
struct packet_mclist *mclist;
- atomic_t mapped;
+ atomic_long_t mapped;
enum tpacket_versions tp_version;
unsigned int tp_hdrlen;
unsigned int tp_reserve;
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 81a794e36f53..ddd211a151d0 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -31,7 +31,8 @@ enum psample_nl_multicast_groups {
static const struct genl_multicast_group psample_nl_mcgrps[] = {
[PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME },
- [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME },
+ [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME,
+ .flags = GENL_MCAST_CAP_NET_ADMIN, },
};
static struct genl_family psample_nl_family __ro_after_init;
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index b1db0b519179..abb0c70ffc8b 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -512,7 +512,9 @@ static int ctrl_cmd_del_server(struct sockaddr_qrtr *from,
if (!node)
return -ENOENT;
- return server_del(node, port, true);
+ server_del(node, port, true);
+
+ return 0;
}
static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from,
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 01c4cdfef45d..8435a20968ef 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -419,7 +419,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval,
rs->rs_rx_traces = trace.rx_traces;
for (i = 0; i < rs->rs_rx_traces; i++) {
- if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) {
+ if (trace.rx_trace_pos[i] >= RDS_MSG_RX_DGRAM_TRACE_MAX) {
rs->rs_rx_traces = 0;
return -EFAULT;
}
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index d36f3f6b4351..b15cf316b23a 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -86,11 +86,13 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
break;
case RDMA_CM_EVENT_ADDR_RESOLVED:
- rdma_set_service_type(cm_id, conn->c_tos);
- rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32);
- /* XXX do we need to clean up if this fails? */
- ret = rdma_resolve_route(cm_id,
- RDS_RDMA_RESOLVE_TIMEOUT_MS);
+ if (conn) {
+ rdma_set_service_type(cm_id, conn->c_tos);
+ rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32);
+ /* XXX do we need to clean up if this fails? */
+ ret = rdma_resolve_route(cm_id,
+ RDS_RDMA_RESOLVE_TIMEOUT_MS);
+ }
break;
case RDMA_CM_EVENT_ROUTE_RESOLVED:
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index f0c477c5d1db..a0046e99d6df 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -145,7 +145,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
addrlen = sizeof(sin);
}
- ret = sock->ops->bind(sock, addr, addrlen);
+ ret = kernel_bind(sock, addr, addrlen);
if (ret) {
rdsdebug("bind failed with %d at address %pI6c\n",
ret, &conn->c_laddr);
@@ -173,7 +173,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
* own the socket
*/
rds_tcp_set_callbacks(sock, cp);
- ret = sock->ops->connect(sock, addr, addrlen, O_NONBLOCK);
+ ret = kernel_connect(sock, addr, addrlen, O_NONBLOCK);
rdsdebug("connect to address %pI6c returned %d\n", &conn->c_faddr, ret);
if (ret == -EINPROGRESS)
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 014fa24418c1..05008ce5c421 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -165,7 +165,7 @@ int rds_tcp_accept_one(struct socket *sock)
struct ipv6_pinfo *inet6;
inet6 = inet6_sk(new_sock->sk);
- dev_if = inet6->mcast_oif;
+ dev_if = READ_ONCE(inet6->mcast_oif);
} else {
dev_if = new_sock->sk->sk_bound_dev_if;
}
@@ -306,7 +306,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
addr_len = sizeof(*sin);
}
- ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len);
+ ret = kernel_bind(sock, (struct sockaddr *)&ss, addr_len);
if (ret < 0) {
rdsdebug("could not bind %s listener socket: %d\n",
isv6 ? "IPv6" : "IPv4", ret);
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 01fca7a10b4b..c3feb4f49d09 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -48,6 +48,7 @@ struct rfkill {
bool persistent;
bool polling_paused;
bool suspended;
+ bool need_sync;
const struct rfkill_ops *ops;
void *data;
@@ -368,6 +369,17 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
rfkill_event(rfkill);
}
+static void rfkill_sync(struct rfkill *rfkill)
+{
+ lockdep_assert_held(&rfkill_global_mutex);
+
+ if (!rfkill->need_sync)
+ return;
+
+ rfkill_set_block(rfkill, rfkill_global_states[rfkill->type].cur);
+ rfkill->need_sync = false;
+}
+
static void rfkill_update_global_state(enum rfkill_type type, bool blocked)
{
int i;
@@ -730,6 +742,10 @@ static ssize_t soft_show(struct device *dev, struct device_attribute *attr,
{
struct rfkill *rfkill = to_rfkill(dev);
+ mutex_lock(&rfkill_global_mutex);
+ rfkill_sync(rfkill);
+ mutex_unlock(&rfkill_global_mutex);
+
return sysfs_emit(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0);
}
@@ -751,6 +767,7 @@ static ssize_t soft_store(struct device *dev, struct device_attribute *attr,
return -EINVAL;
mutex_lock(&rfkill_global_mutex);
+ rfkill_sync(rfkill);
rfkill_set_block(rfkill, state);
mutex_unlock(&rfkill_global_mutex);
@@ -783,6 +800,10 @@ static ssize_t state_show(struct device *dev, struct device_attribute *attr,
{
struct rfkill *rfkill = to_rfkill(dev);
+ mutex_lock(&rfkill_global_mutex);
+ rfkill_sync(rfkill);
+ mutex_unlock(&rfkill_global_mutex);
+
return sysfs_emit(buf, "%d\n", user_state_from_blocked(rfkill->state));
}
@@ -805,6 +826,7 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr,
return -EINVAL;
mutex_lock(&rfkill_global_mutex);
+ rfkill_sync(rfkill);
rfkill_set_block(rfkill, state == RFKILL_USER_STATE_SOFT_BLOCKED);
mutex_unlock(&rfkill_global_mutex);
@@ -1032,14 +1054,10 @@ static void rfkill_uevent_work(struct work_struct *work)
static void rfkill_sync_work(struct work_struct *work)
{
- struct rfkill *rfkill;
- bool cur;
-
- rfkill = container_of(work, struct rfkill, sync_work);
+ struct rfkill *rfkill = container_of(work, struct rfkill, sync_work);
mutex_lock(&rfkill_global_mutex);
- cur = rfkill_global_states[rfkill->type].cur;
- rfkill_set_block(rfkill, cur);
+ rfkill_sync(rfkill);
mutex_unlock(&rfkill_global_mutex);
}
@@ -1087,6 +1105,7 @@ int __must_check rfkill_register(struct rfkill *rfkill)
round_jiffies_relative(POLL_INTERVAL));
if (!rfkill->persistent || rfkill_epo_lock_active) {
+ rfkill->need_sync = true;
schedule_work(&rfkill->sync_work);
} else {
#ifdef CONFIG_RFKILL_INPUT
@@ -1161,7 +1180,6 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
init_waitqueue_head(&data->read_wait);
mutex_lock(&rfkill_global_mutex);
- mutex_lock(&data->mtx);
/*
* start getting events from elsewhere but hold mtx to get
* startup events added first
@@ -1171,11 +1189,13 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
if (!ev)
goto free;
+ rfkill_sync(rfkill);
rfkill_fill_event(&ev->ev, rfkill, RFKILL_OP_ADD);
+ mutex_lock(&data->mtx);
list_add_tail(&ev->list, &data->events);
+ mutex_unlock(&data->mtx);
}
list_add(&data->list, &rfkill_fds);
- mutex_unlock(&data->mtx);
mutex_unlock(&rfkill_global_mutex);
file->private_data = data;
@@ -1183,7 +1203,6 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
return stream_open(inode, file);
free:
- mutex_unlock(&data->mtx);
mutex_unlock(&rfkill_global_mutex);
mutex_destroy(&data->mtx);
list_for_each_entry_safe(ev, tmp, &data->events, list)
@@ -1332,11 +1351,11 @@ static long rfkill_fop_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct rfkill_data *data = file->private_data;
- int ret = -ENOSYS;
+ int ret = -ENOTTY;
u32 size;
if (_IOC_TYPE(cmd) != RFKILL_IOC_MAGIC)
- return -ENOSYS;
+ return -ENOTTY;
mutex_lock(&data->mtx);
switch (_IOC_NR(cmd)) {
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index e9d1b2f2ff0a..4e32d659524e 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -108,13 +108,13 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
rfkill->clk = devm_clk_get(&pdev->dev, NULL);
- gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_OUT_LOW);
+ gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_ASIS);
if (IS_ERR(gpio))
return PTR_ERR(gpio);
rfkill->reset_gpio = gpio;
- gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_OUT_LOW);
+ gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_ASIS);
if (IS_ERR(gpio))
return PTR_ERR(gpio);
@@ -126,6 +126,14 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
return -EINVAL;
}
+ ret = gpiod_direction_output(rfkill->reset_gpio, true);
+ if (ret)
+ return ret;
+
+ ret = gpiod_direction_output(rfkill->shutdown_gpio, true);
+ if (ret)
+ return ret;
+
rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev,
rfkill->type, &rfkill_gpio_ops,
rfkill);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 49dafe9ac72f..ef81d019b20f 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -182,21 +182,47 @@ void rose_kill_by_neigh(struct rose_neigh *neigh)
*/
static void rose_kill_by_device(struct net_device *dev)
{
- struct sock *s;
+ struct sock *sk, *array[16];
+ struct rose_sock *rose;
+ bool rescan;
+ int i, cnt;
+start:
+ rescan = false;
+ cnt = 0;
spin_lock_bh(&rose_list_lock);
- sk_for_each(s, &rose_list) {
- struct rose_sock *rose = rose_sk(s);
+ sk_for_each(sk, &rose_list) {
+ rose = rose_sk(sk);
+ if (rose->device == dev) {
+ if (cnt == ARRAY_SIZE(array)) {
+ rescan = true;
+ break;
+ }
+ sock_hold(sk);
+ array[cnt++] = sk;
+ }
+ }
+ spin_unlock_bh(&rose_list_lock);
+ for (i = 0; i < cnt; i++) {
+ sk = array[cnt];
+ rose = rose_sk(sk);
+ lock_sock(sk);
+ spin_lock_bh(&rose_list_lock);
if (rose->device == dev) {
- rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
+ rose_disconnect(sk, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
if (rose->neighbour)
rose->neighbour->use--;
netdev_put(rose->device, &rose->dev_tracker);
rose->device = NULL;
}
+ spin_unlock_bh(&rose_list_lock);
+ release_sock(sk);
+ sock_put(sk);
+ cond_resched();
}
- spin_unlock_bh(&rose_list_lock);
+ if (rescan)
+ goto start;
}
/*
@@ -583,7 +609,7 @@ static struct sock *rose_make_new(struct sock *osk)
#endif
sk->sk_type = osk->sk_type;
- sk->sk_priority = osk->sk_priority;
+ sk->sk_priority = READ_ONCE(osk->sk_priority);
sk->sk_protocol = osk->sk_protocol;
sk->sk_rcvbuf = osk->sk_rcvbuf;
sk->sk_sndbuf = osk->sk_sndbuf;
@@ -656,7 +682,10 @@ static int rose_release(struct socket *sock)
break;
}
+ spin_lock_bh(&rose_list_lock);
netdev_put(rose->device, &rose->dev_tracker);
+ rose->device = NULL;
+ spin_unlock_bh(&rose_list_lock);
sock->sk = NULL;
release_sock(sk);
sock_put(sk);
@@ -1315,9 +1344,11 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case TIOCINQ: {
struct sk_buff *skb;
long amount = 0L;
- /* These two are safe on a single CPU system as only user tasks fiddle here */
+
+ spin_lock_irq(&sk->sk_receive_queue.lock);
if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
amount = skb->len;
+ spin_unlock_irq(&sk->sk_receive_queue.lock);
return put_user(amount, (unsigned int __user *) argp);
}
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index fa8aec78f63d..465bfe5eb061 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -259,15 +259,61 @@ static int rxrpc_listen(struct socket *sock, int backlog)
}
/**
+ * rxrpc_kernel_lookup_peer - Obtain remote transport endpoint for an address
+ * @sock: The socket through which it will be accessed
+ * @srx: The network address
+ * @gfp: Allocation flags
+ *
+ * Lookup or create a remote transport endpoint record for the specified
+ * address and return it with a ref held.
+ */
+struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock,
+ struct sockaddr_rxrpc *srx, gfp_t gfp)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ int ret;
+
+ ret = rxrpc_validate_address(rx, srx, sizeof(*srx));
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ return rxrpc_lookup_peer(rx->local, srx, gfp);
+}
+EXPORT_SYMBOL(rxrpc_kernel_lookup_peer);
+
+/**
+ * rxrpc_kernel_get_peer - Get a reference on a peer
+ * @peer: The peer to get a reference on.
+ *
+ * Get a record for the remote peer in a call.
+ */
+struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer)
+{
+ return peer ? rxrpc_get_peer(peer, rxrpc_peer_get_application) : NULL;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_peer);
+
+/**
+ * rxrpc_kernel_put_peer - Allow a kernel app to drop a peer reference
+ * @peer: The peer to drop a ref on
+ */
+void rxrpc_kernel_put_peer(struct rxrpc_peer *peer)
+{
+ rxrpc_put_peer(peer, rxrpc_peer_put_application);
+}
+EXPORT_SYMBOL(rxrpc_kernel_put_peer);
+
+/**
* rxrpc_kernel_begin_call - Allow a kernel service to begin a call
* @sock: The socket on which to make the call
- * @srx: The address of the peer to contact
+ * @peer: The peer to contact
* @key: The security context to use (defaults to socket setting)
* @user_call_ID: The ID to use
* @tx_total_len: Total length of data to transmit during the call (or -1)
* @hard_timeout: The maximum lifespan of the call in sec
* @gfp: The allocation constraints
* @notify_rx: Where to send notifications instead of socket queue
+ * @service_id: The ID of the service to contact
* @upgrade: Request service upgrade for call
* @interruptibility: The call is interruptible, or can be canceled.
* @debug_id: The debug ID for tracing to be assigned to the call
@@ -280,13 +326,14 @@ static int rxrpc_listen(struct socket *sock, int backlog)
* supplying @srx and @key.
*/
struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
- struct sockaddr_rxrpc *srx,
+ struct rxrpc_peer *peer,
struct key *key,
unsigned long user_call_ID,
s64 tx_total_len,
u32 hard_timeout,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
+ u16 service_id,
bool upgrade,
enum rxrpc_interruptibility interruptibility,
unsigned int debug_id)
@@ -295,13 +342,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
struct rxrpc_call_params p;
struct rxrpc_call *call;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
- int ret;
_enter(",,%x,%lx", key_serial(key), user_call_ID);
- ret = rxrpc_validate_address(rx, srx, sizeof(*srx));
- if (ret < 0)
- return ERR_PTR(ret);
+ if (WARN_ON_ONCE(peer->local != rx->local))
+ return ERR_PTR(-EIO);
lock_sock(&rx->sk);
@@ -319,12 +364,13 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
memset(&cp, 0, sizeof(cp));
cp.local = rx->local;
+ cp.peer = peer;
cp.key = key;
cp.security_level = rx->min_sec_level;
cp.exclusive = false;
cp.upgrade = upgrade;
- cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp, debug_id);
+ cp.service_id = service_id;
+ call = rxrpc_new_client_call(rx, &cp, &p, gfp, debug_id);
/* The socket has been unlocked. */
if (!IS_ERR(call)) {
call->notify_rx = notify_rx;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index e8e14c6f904d..7818aae1be8e 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -68,6 +68,7 @@ struct rxrpc_net {
atomic_t nr_calls; /* Count of allocated calls */
atomic_t nr_conns;
+ struct list_head bundle_proc_list; /* List of bundles for proc */
struct list_head conn_proc_list; /* List of conns in this namespace for proc */
struct list_head service_conns; /* Service conns in this namespace */
rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */
@@ -198,11 +199,19 @@ struct rxrpc_host_header {
*/
struct rxrpc_skb_priv {
struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
- u16 offset; /* Offset of data */
- u16 len; /* Length of data */
- u8 flags;
+ union {
+ struct {
+ u16 offset; /* Offset of data */
+ u16 len; /* Length of data */
+ u8 flags;
#define RXRPC_RX_VERIFIED 0x01
-
+ };
+ struct {
+ rxrpc_seq_t first_ack; /* First packet in acks table */
+ u8 nr_acks; /* Number of acks+nacks */
+ u8 nr_nacks; /* Number of nacks */
+ };
+ };
struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
};
@@ -364,6 +373,7 @@ struct rxrpc_conn_proto {
struct rxrpc_conn_parameters {
struct rxrpc_local *local; /* Representation of local endpoint */
+ struct rxrpc_peer *peer; /* Representation of remote endpoint */
struct key *key; /* Security details */
bool exclusive; /* T if conn is exclusive */
bool upgrade; /* T if service ID can be upgraded */
@@ -431,6 +441,7 @@ struct rxrpc_bundle {
struct rxrpc_local *local; /* Representation of local endpoint */
struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* Security details */
+ struct list_head proc_link; /* Link in net->bundle_proc_list */
const struct rxrpc_security *security; /* applied security module */
refcount_t ref;
atomic_t active; /* Number of active users */
@@ -444,6 +455,7 @@ struct rxrpc_bundle {
struct rb_node local_node; /* Node in local->client_conns */
struct list_head waiting_calls; /* Calls waiting for channels */
unsigned long avail_chans; /* Mask of available channels */
+ unsigned int conn_ids[4]; /* Connection IDs. */
struct rxrpc_connection *conns[4]; /* The connections in the bundle (max 4) */
};
@@ -506,7 +518,7 @@ struct rxrpc_connection {
enum rxrpc_call_completion completion; /* Completion condition */
s32 abort_code; /* Abort code of connection abort */
int debug_id; /* debug ID for printks */
- atomic_t serial; /* packet serial number counter */
+ rxrpc_serial_t tx_serial; /* Outgoing packet serial number counter */
unsigned int hi_serial; /* highest serial number received */
u32 service_id; /* Service ID, possibly upgraded */
u32 security_level; /* Security level selected */
@@ -688,11 +700,11 @@ struct rxrpc_call {
u8 cong_dup_acks; /* Count of ACKs showing missing packets */
u8 cong_cumul_acks; /* Cumulative ACK count */
ktime_t cong_tstamp; /* Last time cwnd was changed */
+ struct sk_buff *cong_last_nack; /* Last ACK with nacks received */
/* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
u16 ackr_sack_base; /* Starting slot in SACK table ring */
- rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
rxrpc_seq_t ackr_window; /* Base of SACK window */
rxrpc_seq_t ackr_wtop; /* Base of SACK window */
unsigned int ackr_nr_unacked; /* Number of unacked packets */
@@ -726,7 +738,8 @@ struct rxrpc_call {
struct rxrpc_ack_summary {
u16 nr_acks; /* Number of ACKs in packet */
u16 nr_new_acks; /* Number of new ACKs in packet */
- u16 nr_rot_new_acks; /* Number of rotated new ACKs */
+ u16 nr_new_nacks; /* Number of new nacks in packet */
+ u16 nr_retained_nacks; /* Number of nacks retained between ACKs */
u8 ack_reason;
bool saw_nacks; /* Saw NACKs in packet */
bool new_low_nack; /* T if new low NACK found */
@@ -819,6 +832,20 @@ static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb)
#include <trace/events/rxrpc.h>
/*
+ * Allocate the next serial number on a connection. 0 must be skipped.
+ */
+static inline rxrpc_serial_t rxrpc_get_next_serial(struct rxrpc_connection *conn)
+{
+ rxrpc_serial_t serial;
+
+ serial = conn->tx_serial;
+ if (serial == 0)
+ serial = 1;
+ conn->tx_serial = serial + 1;
+ return serial;
+}
+
+/*
* af_rxrpc.c
*/
extern atomic_t rxrpc_n_rx_skbs;
@@ -867,7 +894,6 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long
struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int);
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct rxrpc_conn_parameters *,
- struct sockaddr_rxrpc *,
struct rxrpc_call_params *, gfp_t,
unsigned int);
void rxrpc_start_call_timer(struct rxrpc_call *call);
@@ -1076,6 +1102,7 @@ void rxrpc_send_version_request(struct rxrpc_local *local,
/*
* local_object.c
*/
+void rxrpc_local_dont_fragment(const struct rxrpc_local *local, bool set);
struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *);
struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *, enum rxrpc_local_trace);
struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *, enum rxrpc_local_trace);
@@ -1167,6 +1194,7 @@ void rxrpc_put_peer(struct rxrpc_peer *, enum rxrpc_peer_trace);
*/
extern const struct seq_operations rxrpc_call_seq_ops;
extern const struct seq_operations rxrpc_connection_seq_ops;
+extern const struct seq_operations rxrpc_bundle_seq_ops;
extern const struct seq_operations rxrpc_peer_seq_ops;
extern const struct seq_operations rxrpc_local_seq_ops;
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index e363f21a2014..0f78544d043b 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -43,8 +43,6 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
unsigned long expiry = rxrpc_soft_ack_delay;
unsigned long now = jiffies, ack_at;
- call->ackr_serial = serial;
-
if (rxrpc_soft_ack_delay < expiry)
expiry = rxrpc_soft_ack_delay;
if (call->peer->srtt_us != 0)
@@ -114,6 +112,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
{
struct rxrpc_ackpacket *ack = NULL;
+ struct rxrpc_skb_priv *sp;
struct rxrpc_txbuf *txb;
unsigned long resend_at;
rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted);
@@ -141,14 +140,15 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
* explicitly NAK'd packets.
*/
if (ack_skb) {
+ sp = rxrpc_skb(ack_skb);
ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
- for (i = 0; i < ack->nAcks; i++) {
+ for (i = 0; i < sp->nr_acks; i++) {
rxrpc_seq_t seq;
if (ack->acks[i] & 1)
continue;
- seq = ntohl(ack->firstPacket) + i;
+ seq = sp->first_ack + i;
if (after(txb->seq, transmitted))
break;
if (after(txb->seq, seq))
@@ -373,7 +373,6 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call)
bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
{
unsigned long now, next, t;
- rxrpc_serial_t ackr_serial;
bool resend = false, expired = false;
s32 abort_code;
@@ -423,8 +422,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
if (time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now);
cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET);
- ackr_serial = xchg(&call->ackr_serial, 0);
- rxrpc_send_ACK(call, RXRPC_ACK_DELAY, ackr_serial,
+ rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0,
rxrpc_propose_ack_ping_for_lost_ack);
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 773eecd1e979..9fc9a6c3f685 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -193,7 +193,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
* Allocate a new client call.
*/
static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
- struct sockaddr_rxrpc *srx,
struct rxrpc_conn_parameters *cp,
struct rxrpc_call_params *p,
gfp_t gfp,
@@ -211,10 +210,12 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
now = ktime_get_real();
call->acks_latest_ts = now;
call->cong_tstamp = now;
- call->dest_srx = *srx;
+ call->dest_srx = cp->peer->srx;
+ call->dest_srx.srx_service = cp->service_id;
call->interruptibility = p->interruptibility;
call->tx_total_len = p->tx_total_len;
call->key = key_get(cp->key);
+ call->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_call);
call->local = rxrpc_get_local(cp->local, rxrpc_local_get_call);
call->security_level = cp->security_level;
if (p->kernel)
@@ -306,10 +307,6 @@ static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp)
_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
- call->peer = rxrpc_lookup_peer(local, &call->dest_srx, gfp);
- if (!call->peer)
- goto error;
-
ret = rxrpc_look_up_bundle(call, gfp);
if (ret < 0)
goto error;
@@ -334,7 +331,6 @@ error:
*/
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
struct rxrpc_call_params *p,
gfp_t gfp,
unsigned int debug_id)
@@ -349,13 +345,18 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
_enter("%p,%lx", rx, p->user_call_ID);
+ if (WARN_ON_ONCE(!cp->peer)) {
+ release_sock(&rx->sk);
+ return ERR_PTR(-EIO);
+ }
+
limiter = rxrpc_get_call_slot(p, gfp);
if (!limiter) {
release_sock(&rx->sk);
return ERR_PTR(-ERESTARTSYS);
}
- call = rxrpc_alloc_client_call(rx, srx, cp, p, gfp, debug_id);
+ call = rxrpc_alloc_client_call(rx, cp, p, gfp, debug_id);
if (IS_ERR(call)) {
release_sock(&rx->sk);
up(limiter);
@@ -545,8 +546,8 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
*/
static void rxrpc_cleanup_ring(struct rxrpc_call *call)
{
- skb_queue_purge(&call->recvmsg_queue);
- skb_queue_purge(&call->rx_oos_queue);
+ rxrpc_purge_queue(&call->recvmsg_queue);
+ rxrpc_purge_queue(&call->rx_oos_queue);
}
/*
@@ -685,6 +686,7 @@ static void rxrpc_destroy_call(struct work_struct *work)
del_timer_sync(&call->timer);
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
rxrpc_cleanup_ring(call);
while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
struct rxrpc_txbuf, call_link))) {
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 981ca5b98bcb..3b9b267a4431 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -73,6 +73,7 @@ static void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local)
static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call,
gfp_t gfp)
{
+ static atomic_t rxrpc_bundle_id;
struct rxrpc_bundle *bundle;
bundle = kzalloc(sizeof(*bundle), gfp);
@@ -85,10 +86,15 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call,
bundle->upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags);
bundle->service_id = call->dest_srx.srx_service;
bundle->security_level = call->security_level;
+ bundle->debug_id = atomic_inc_return(&rxrpc_bundle_id);
refcount_set(&bundle->ref, 1);
atomic_set(&bundle->active, 1);
INIT_LIST_HEAD(&bundle->waiting_calls);
trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new);
+
+ write_lock(&bundle->local->rxnet->conn_lock);
+ list_add_tail(&bundle->proc_link, &bundle->local->rxnet->bundle_proc_list);
+ write_unlock(&bundle->local->rxnet->conn_lock);
}
return bundle;
}
@@ -105,7 +111,11 @@ struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle,
static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
{
- trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free);
+ trace_rxrpc_bundle(bundle->debug_id, refcount_read(&bundle->ref),
+ rxrpc_bundle_free);
+ write_lock(&bundle->local->rxnet->conn_lock);
+ list_del(&bundle->proc_link);
+ write_unlock(&bundle->local->rxnet->conn_lock);
rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle);
key_put(bundle->key);
kfree(bundle);
@@ -239,7 +249,6 @@ dont_reuse:
*/
int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp)
{
- static atomic_t rxrpc_bundle_id;
struct rxrpc_bundle *bundle, *candidate;
struct rxrpc_local *local = call->local;
struct rb_node *p, **pp, *parent;
@@ -306,7 +315,6 @@ int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp)
}
_debug("new bundle");
- candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id);
rb_link_node(&candidate->local_node, parent, pp);
rb_insert_color(&candidate->local_node, &local->client_bundles);
call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
@@ -337,6 +345,7 @@ static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle,
old = bundle->conns[slot];
if (old) {
bundle->conns[slot] = NULL;
+ bundle->conn_ids[slot] = 0;
trace_rxrpc_client(old, -1, rxrpc_client_replace);
rxrpc_put_connection(old, rxrpc_conn_put_noreuse);
}
@@ -350,6 +359,7 @@ static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle,
rxrpc_activate_bundle(bundle);
conn->bundle_shift = shift;
bundle->conns[slot] = conn;
+ bundle->conn_ids[slot] = conn->debug_id;
for (i = 0; i < RXRPC_MAXCALLS; i++)
set_bit(shift + i, &bundle->avail_chans);
return true;
@@ -670,6 +680,7 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
if (bundle->conns[bindex] == conn) {
_debug("clear slot %u", bindex);
bundle->conns[bindex] = NULL;
+ bundle->conn_ids[bindex] = 0;
for (i = 0; i < RXRPC_MAXCALLS; i++)
clear_bit(conn->bundle_shift + i, &bundle->avail_chans);
rxrpc_put_client_connection_id(bundle->local, conn);
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 95f4bc206b3d..1f251d758cb9 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -95,6 +95,14 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
_enter("%d", conn->debug_id);
+ if (sp && sp->hdr.type == RXRPC_PACKET_TYPE_ACK) {
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &pkt.ack, sizeof(pkt.ack)) < 0)
+ return;
+ if (pkt.ack.reason == RXRPC_ACK_PING_RESPONSE)
+ return;
+ }
+
chan = &conn->channels[channel];
/* If the last call got moved on whilst we were waiting to run, just
@@ -117,7 +125,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
iov[2].iov_base = &ack_info;
iov[2].iov_len = sizeof(ack_info);
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
pkt.whdr.epoch = htonl(conn->proto.epoch);
pkt.whdr.cid = htonl(conn->proto.cid | channel);
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index ac85d4644a3c..df8a271948a1 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -212,7 +212,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
conn->idle_timestamp = jiffies;
if (atomic_dec_and_test(&conn->active))
rxrpc_set_service_reap_timer(conn->rxnet,
- jiffies + rxrpc_connection_expiry);
+ jiffies + rxrpc_connection_expiry * HZ);
}
rxrpc_put_call(call, rxrpc_call_put_io_thread);
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index 89ac05a711a4..39c908a3ca6e 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -25,7 +25,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
struct rxrpc_conn_proto k;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rb_node *p;
- unsigned int seq = 0;
+ unsigned int seq = 1;
k.epoch = sp->hdr.epoch;
k.cid = sp->hdr.cid & RXRPC_CIDMASK;
@@ -35,6 +35,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
* under just the RCU read lock, so we have to check for
* changes.
*/
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
read_seqbegin_or_lock(&peer->service_conn_lock, &seq);
p = rcu_dereference_raw(peer->service_conns.rb_node);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 030d64f282f3..9691de00ade7 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -45,11 +45,9 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
}
cumulative_acks += summary->nr_new_acks;
- cumulative_acks += summary->nr_rot_new_acks;
if (cumulative_acks > 255)
cumulative_acks = 255;
- summary->mode = call->cong_mode;
summary->cwnd = call->cong_cwnd;
summary->ssthresh = call->cong_ssthresh;
summary->cumulative_acks = cumulative_acks;
@@ -151,6 +149,7 @@ out_no_clear_ca:
cwnd = RXRPC_TX_MAX_WINDOW;
call->cong_cwnd = cwnd;
call->cong_cumul_acks = cumulative_acks;
+ summary->mode = call->cong_mode;
trace_rxrpc_congest(call, summary, acked_serial, change);
if (resend)
rxrpc_resend(call, skb);
@@ -213,7 +212,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) {
if (before_eq(txb->seq, call->acks_hard_ack))
continue;
- summary->nr_rot_new_acks++;
if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) {
set_bit(RXRPC_CALL_TX_LAST, &call->flags);
rot_last = true;
@@ -254,6 +252,11 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
{
ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
+ if (unlikely(call->cong_last_nack)) {
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
+ call->cong_last_nack = NULL;
+ }
+
switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
@@ -643,12 +646,8 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call,
clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
smp_mb(); /* Read data before setting avail bit */
set_bit(i, &call->rtt_avail);
- if (type != rxrpc_rtt_rx_cancel)
- rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial,
- sent_at, resp_time);
- else
- trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_cancel, i,
- orig_serial, acked_serial, 0, 0);
+ rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial,
+ sent_at, resp_time);
matched = true;
}
@@ -707,6 +706,43 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
}
/*
+ * Determine how many nacks from the previous ACK have now been satisfied.
+ */
+static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ rxrpc_seq_t seq)
+{
+ struct sk_buff *skb = call->cong_last_nack;
+ struct rxrpc_ackpacket ack;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned int i, new_acks = 0, retained_nacks = 0;
+ rxrpc_seq_t old_seq = sp->first_ack;
+ u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(ack);
+
+ if (after_eq(seq, old_seq + sp->nr_acks)) {
+ summary->nr_new_acks += sp->nr_nacks;
+ summary->nr_new_acks += seq - (old_seq + sp->nr_acks);
+ summary->nr_retained_nacks = 0;
+ } else if (seq == old_seq) {
+ summary->nr_retained_nacks = sp->nr_nacks;
+ } else {
+ for (i = 0; i < sp->nr_acks; i++) {
+ if (acks[i] == RXRPC_ACK_TYPE_NACK) {
+ if (before(old_seq + i, seq))
+ new_acks++;
+ else
+ retained_nacks++;
+ }
+ }
+
+ summary->nr_new_acks += new_acks;
+ summary->nr_retained_nacks = retained_nacks;
+ }
+
+ return old_seq + sp->nr_acks;
+}
+
+/*
* Process individual soft ACKs.
*
* Each ACK in the array corresponds to one packet and can be either an ACK or
@@ -715,25 +751,51 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
* the timer on the basis that the peer might just not have processed them at
* the time the ACK was sent.
*/
-static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
- rxrpc_seq_t seq, int nr_acks,
- struct rxrpc_ack_summary *summary)
+static void rxrpc_input_soft_acks(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct sk_buff *skb,
+ rxrpc_seq_t seq,
+ rxrpc_seq_t since)
{
- unsigned int i;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned int i, old_nacks = 0;
+ rxrpc_seq_t lowest_nak = seq + sp->nr_acks;
+ u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
- for (i = 0; i < nr_acks; i++) {
+ for (i = 0; i < sp->nr_acks; i++) {
if (acks[i] == RXRPC_ACK_TYPE_ACK) {
summary->nr_acks++;
- summary->nr_new_acks++;
+ if (after_eq(seq, since))
+ summary->nr_new_acks++;
} else {
- if (!summary->saw_nacks &&
- call->acks_lowest_nak != seq + i) {
- call->acks_lowest_nak = seq + i;
- summary->new_low_nack = true;
- }
summary->saw_nacks = true;
+ if (before(seq, since)) {
+ /* Overlap with previous ACK */
+ old_nacks++;
+ } else {
+ summary->nr_new_nacks++;
+ sp->nr_nacks++;
+ }
+
+ if (before(seq, lowest_nak))
+ lowest_nak = seq;
}
+ seq++;
}
+
+ if (lowest_nak != call->acks_lowest_nak) {
+ call->acks_lowest_nak = lowest_nak;
+ summary->new_low_nack = true;
+ }
+
+ /* We *can* have more nacks than we did - the peer is permitted to drop
+ * packets it has soft-acked and re-request them. Further, it is
+ * possible for the nack distribution to change whilst the number of
+ * nacks stays the same or goes down.
+ */
+ if (old_nacks < summary->nr_retained_nacks)
+ summary->nr_new_acks += summary->nr_retained_nacks - old_nacks;
+ summary->nr_retained_nacks = old_nacks;
}
/*
@@ -777,7 +839,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_ackinfo info;
rxrpc_serial_t ack_serial, acked_serial;
- rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
+ rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since;
int nr_acks, offset, ioffset;
_enter("");
@@ -793,6 +855,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
prev_pkt = ntohl(ack.previousPacket);
hard_ack = first_soft_ack - 1;
nr_acks = ack.nAcks;
+ sp->first_ack = first_soft_ack;
+ sp->nr_acks = nr_acks;
summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ?
ack.reason : RXRPC_ACK__INVALID);
@@ -801,28 +865,21 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
summary.ack_reason, nr_acks);
rxrpc_inc_stat(call->rxnet, stat_rx_acks[ack.reason]);
- switch (ack.reason) {
- case RXRPC_ACK_PING_RESPONSE:
- rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
- rxrpc_rtt_rx_ping_response);
- break;
- case RXRPC_ACK_REQUESTED:
- rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
- rxrpc_rtt_rx_requested_ack);
- break;
- default:
- if (acked_serial != 0)
+ if (acked_serial != 0) {
+ switch (ack.reason) {
+ case RXRPC_ACK_PING_RESPONSE:
rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
- rxrpc_rtt_rx_cancel);
- break;
- }
-
- if (ack.reason == RXRPC_ACK_PING) {
- rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial,
- rxrpc_propose_ack_respond_to_ping);
- } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
- rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial,
- rxrpc_propose_ack_respond_to_ack);
+ rxrpc_rtt_rx_ping_response);
+ break;
+ case RXRPC_ACK_REQUESTED:
+ rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
+ rxrpc_rtt_rx_requested_ack);
+ break;
+ default:
+ rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
+ rxrpc_rtt_rx_other_ack);
+ break;
+ }
}
/* If we get an EXCEEDS_WINDOW ACK from the server, it probably
@@ -835,7 +892,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_is_client_call(call)) {
rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
0, -ENETRESET);
- return;
+ goto send_response;
}
/* If we get an OUT_OF_SEQUENCE ACK from the server, that can also
@@ -849,7 +906,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_is_client_call(call)) {
rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
0, -ENETRESET);
- return;
+ goto send_response;
}
/* Discard any out-of-order or duplicate ACKs (outside lock). */
@@ -857,7 +914,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
first_soft_ack, call->acks_first_seq,
prev_pkt, call->acks_prev_seq);
- return;
+ goto send_response;
}
info.rxMTU = 0;
@@ -869,6 +926,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (nr_acks > 0)
skb_condense(skb);
+ if (call->cong_last_nack) {
+ since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack);
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
+ call->cong_last_nack = NULL;
+ } else {
+ summary.nr_new_acks = first_soft_ack - call->acks_first_seq;
+ call->acks_lowest_nak = first_soft_ack + nr_acks;
+ since = first_soft_ack;
+ }
+
call->acks_latest_ts = skb->tstamp;
call->acks_first_seq = first_soft_ack;
call->acks_prev_seq = prev_pkt;
@@ -877,7 +944,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
case RXRPC_ACK_PING:
break;
default:
- if (after(acked_serial, call->acks_highest_serial))
+ if (acked_serial && after(acked_serial, call->acks_highest_serial))
call->acks_highest_serial = acked_serial;
break;
}
@@ -897,7 +964,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
case RXRPC_CALL_SERVER_AWAIT_ACK:
break;
default:
- return;
+ goto send_response;
}
if (before(hard_ack, call->acks_hard_ack) ||
@@ -909,15 +976,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (after(hard_ack, call->acks_hard_ack)) {
if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack);
- return;
+ goto send_response;
}
}
if (nr_acks > 0) {
if (offset > (int)skb->len - nr_acks)
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack);
- rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack,
- nr_acks, &summary);
+ rxrpc_input_soft_acks(call, &summary, skb, first_soft_ack, since);
+ rxrpc_get_skb(skb, rxrpc_skb_get_last_nack);
+ call->cong_last_nack = skb;
}
if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
@@ -927,6 +995,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_propose_ack_ping_for_lost_reply);
rxrpc_congestion_management(call, skb, &summary, acked_serial);
+
+send_response:
+ if (ack.reason == RXRPC_ACK_PING)
+ rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial,
+ rxrpc_propose_ack_respond_to_ping);
+ else if (sp->hdr.flags & RXRPC_REQUEST_ACK)
+ rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial,
+ rxrpc_propose_ack_respond_to_ack);
}
/*
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 7d910aee4f8c..34d307368135 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -37,6 +37,17 @@ static void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, int err,
}
/*
+ * Set or clear the Don't Fragment flag on a socket.
+ */
+void rxrpc_local_dont_fragment(const struct rxrpc_local *local, bool set)
+{
+ if (set)
+ ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO);
+ else
+ ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DONT);
+}
+
+/*
* Compare a local to an address. Return -ve, 0 or +ve to indicate less than,
* same or greater than.
*
@@ -87,7 +98,7 @@ static void rxrpc_client_conn_reap_timeout(struct timer_list *timer)
struct rxrpc_local *local =
container_of(timer, struct rxrpc_local, client_conn_reap_timer);
- if (local->kill_all_client_conns &&
+ if (!local->kill_all_client_conns &&
test_and_set_bit(RXRPC_CLIENT_CONN_REAP_TIMER, &local->client_conn_flags))
rxrpc_wake_up_io_thread(local);
}
@@ -203,7 +214,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
ip_sock_set_recverr(usk);
/* we want to set the don't fragment bit */
- ip_sock_set_mtu_discover(usk, IP_PMTUDISC_DO);
+ rxrpc_local_dont_fragment(local, true);
/* We want receive timestamps. */
sock_enable_timestamps(usk);
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index a0319c040c25..a4c135d0fbcc 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -45,6 +45,7 @@ static __net_init int rxrpc_init_net(struct net *net)
atomic_set(&rxnet->nr_calls, 1);
atomic_set(&rxnet->nr_conns, 1);
+ INIT_LIST_HEAD(&rxnet->bundle_proc_list);
INIT_LIST_HEAD(&rxnet->conn_proc_list);
INIT_LIST_HEAD(&rxnet->service_conns);
rwlock_init(&rxnet->conn_lock);
@@ -78,6 +79,9 @@ static __net_init int rxrpc_init_net(struct net *net)
proc_create_net("conns", 0444, rxnet->proc_net,
&rxrpc_connection_seq_ops,
sizeof(struct seq_net_private));
+ proc_create_net("bundles", 0444, rxnet->proc_net,
+ &rxrpc_bundle_seq_ops,
+ sizeof(struct seq_net_private));
proc_create_net("peers", 0444, rxnet->proc_net,
&rxrpc_peer_seq_ops,
sizeof(struct seq_net_private));
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 5e53429c6922..4a292f860ae3 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -216,7 +216,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n;
len = iov[0].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
txb->wire.serial = htonl(serial);
trace_rxrpc_tx_ack(call->debug_id, serial,
ntohl(txb->ack.firstPacket),
@@ -302,7 +302,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
iov[0].iov_base = &pkt;
iov[0].iov_len = sizeof(pkt);
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
pkt.whdr.serial = htonl(serial);
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt));
@@ -334,7 +334,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
_enter("%x,{%d}", txb->seq, txb->len);
/* Each transmission of a Tx packet needs a new serial number */
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
txb->wire.serial = htonl(serial);
if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
@@ -494,14 +494,12 @@ send_fragmentable:
switch (conn->local->srx.transport.family) {
case AF_INET6:
case AF_INET:
- ip_sock_set_mtu_discover(conn->local->socket->sk,
- IP_PMTUDISC_DONT);
+ rxrpc_local_dont_fragment(conn->local, false);
rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag);
ret = do_udp_sendmsg(conn->local->socket, &msg, len);
conn->peer->last_tx_at = ktime_get_seconds();
- ip_sock_set_mtu_discover(conn->local->socket->sk,
- IP_PMTUDISC_DO);
+ rxrpc_local_dont_fragment(conn->local, true);
break;
default:
@@ -560,7 +558,7 @@ void rxrpc_send_conn_abort(struct rxrpc_connection *conn)
len = iov[0].iov_len + iov[1].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 8d7a715a0bb1..49dcda67a0d5 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -22,6 +22,8 @@
#include <net/ip6_route.h>
#include "ar-internal.h"
+static const struct sockaddr_rxrpc rxrpc_null_addr;
+
/*
* Hash a peer key.
*/
@@ -457,39 +459,53 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet)
}
/**
- * rxrpc_kernel_get_peer - Get the peer address of a call
+ * rxrpc_kernel_get_call_peer - Get the peer address of a call
* @sock: The socket on which the call is in progress.
* @call: The call to query
- * @_srx: Where to place the result
*
- * Get the address of the remote peer in a call.
+ * Get a record for the remote peer in a call.
*/
-void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
- struct sockaddr_rxrpc *_srx)
+struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call)
{
- *_srx = call->peer->srx;
+ return call->peer;
}
-EXPORT_SYMBOL(rxrpc_kernel_get_peer);
+EXPORT_SYMBOL(rxrpc_kernel_get_call_peer);
/**
* rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT
- * @sock: The socket on which the call is in progress.
- * @call: The call to query
- * @_srtt: Where to store the SRTT value.
+ * @peer: The peer to query
*
- * Get the call's peer smoothed RTT in uS.
+ * Get the call's peer smoothed RTT in uS or UINT_MAX if we have no samples.
*/
-bool rxrpc_kernel_get_srtt(struct socket *sock, struct rxrpc_call *call,
- u32 *_srtt)
+unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *peer)
{
- struct rxrpc_peer *peer = call->peer;
+ return peer->rtt_count > 0 ? peer->srtt_us >> 3 : UINT_MAX;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_srtt);
- if (peer->rtt_count == 0) {
- *_srtt = 1000000; /* 1S */
- return false;
- }
+/**
+ * rxrpc_kernel_remote_srx - Get the address of a peer
+ * @peer: The peer to query
+ *
+ * Get a pointer to the address from a peer record. The caller is responsible
+ * for making sure that the address is not deallocated.
+ */
+const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer)
+{
+ return peer ? &peer->srx : &rxrpc_null_addr;
+}
+EXPORT_SYMBOL(rxrpc_kernel_remote_srx);
- *_srtt = call->peer->srtt_us >> 3;
- return true;
+/**
+ * rxrpc_kernel_remote_addr - Get the peer transport address of a call
+ * @peer: The peer to query
+ *
+ * Get a pointer to the transport address from a peer record. The caller is
+ * responsible for making sure that the address is not deallocated.
+ */
+const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer)
+{
+ return (const struct sockaddr *)
+ (peer ? &peer->srx.transport : &rxrpc_null_addr.transport);
}
-EXPORT_SYMBOL(rxrpc_kernel_get_srtt);
+EXPORT_SYMBOL(rxrpc_kernel_remote_addr);
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 682636d3b060..26dc2f26d92d 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -181,7 +181,7 @@ print:
atomic_read(&conn->active),
state,
key_serial(conn->key),
- atomic_read(&conn->serial),
+ conn->tx_serial,
conn->hi_serial,
conn->channels[0].call_id,
conn->channels[1].call_id,
@@ -199,6 +199,82 @@ const struct seq_operations rxrpc_connection_seq_ops = {
};
/*
+ * generate a list of extant virtual bundles in /proc/net/rxrpc/bundles
+ */
+static void *rxrpc_bundle_seq_start(struct seq_file *seq, loff_t *_pos)
+ __acquires(rxnet->conn_lock)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ read_lock(&rxnet->conn_lock);
+ return seq_list_start_head(&rxnet->bundle_proc_list, *_pos);
+}
+
+static void *rxrpc_bundle_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ return seq_list_next(v, &rxnet->bundle_proc_list, pos);
+}
+
+static void rxrpc_bundle_seq_stop(struct seq_file *seq, void *v)
+ __releases(rxnet->conn_lock)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ read_unlock(&rxnet->conn_lock);
+}
+
+static int rxrpc_bundle_seq_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_bundle *bundle;
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ char lbuff[50], rbuff[50];
+
+ if (v == &rxnet->bundle_proc_list) {
+ seq_puts(seq,
+ "Proto Local "
+ " Remote "
+ " SvID Ref Act Flg Key |"
+ " Bundle Conn_0 Conn_1 Conn_2 Conn_3\n"
+ );
+ return 0;
+ }
+
+ bundle = list_entry(v, struct rxrpc_bundle, proc_link);
+
+ sprintf(lbuff, "%pISpc", &bundle->local->srx.transport);
+ sprintf(rbuff, "%pISpc", &bundle->peer->srx.transport);
+ seq_printf(seq,
+ "UDP %-47.47s %-47.47s %4x %3u %3d"
+ " %c%c%c %08x | %08x %08x %08x %08x %08x\n",
+ lbuff,
+ rbuff,
+ bundle->service_id,
+ refcount_read(&bundle->ref),
+ atomic_read(&bundle->active),
+ bundle->try_upgrade ? 'U' : '-',
+ bundle->exclusive ? 'e' : '-',
+ bundle->upgrade ? 'u' : '-',
+ key_serial(bundle->key),
+ bundle->debug_id,
+ bundle->conn_ids[0],
+ bundle->conn_ids[1],
+ bundle->conn_ids[2],
+ bundle->conn_ids[3]);
+
+ return 0;
+}
+
+const struct seq_operations rxrpc_bundle_seq_ops = {
+ .start = rxrpc_bundle_seq_start,
+ .next = rxrpc_bundle_seq_next,
+ .stop = rxrpc_bundle_seq_stop,
+ .show = rxrpc_bundle_seq_show,
+};
+
+/*
* generate a list of extant virtual peers in /proc/net/rxrpc/peers
*/
static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 1bf571a66e02..6b32d61d4cdc 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -664,7 +664,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
len = iov[0].iov_len + iov[1].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
@@ -721,10 +721,12 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
+ rxrpc_local_dont_fragment(conn->local, false);
ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len);
+ rxrpc_local_dont_fragment(conn->local, true);
if (ret < 0) {
trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
rxrpc_tx_point_rxkad_response);
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 8e0b94714e84..5677d5690a02 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -572,6 +572,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
__acquires(&call->user_mutex)
{
struct rxrpc_conn_parameters cp;
+ struct rxrpc_peer *peer;
struct rxrpc_call *call;
struct key *key;
@@ -584,21 +585,29 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
return ERR_PTR(-EDESTADDRREQ);
}
+ peer = rxrpc_lookup_peer(rx->local, srx, GFP_KERNEL);
+ if (!peer) {
+ release_sock(&rx->sk);
+ return ERR_PTR(-ENOMEM);
+ }
+
key = rx->key;
if (key && !rx->key->payload.data[0])
key = NULL;
memset(&cp, 0, sizeof(cp));
cp.local = rx->local;
+ cp.peer = peer;
cp.key = rx->key;
cp.security_level = rx->min_sec_level;
cp.exclusive = rx->exclusive | p->exclusive;
cp.upgrade = p->upgrade;
cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL,
+ call = rxrpc_new_client_call(rx, &cp, &p->call, GFP_KERNEL,
atomic_inc_return(&rxrpc_debug_id));
/* The socket is now unlocked */
+ rxrpc_put_peer(peer, rxrpc_peer_put_application);
_leave(" = %p\n", call);
return call;
}
diff --git a/net/sched/Makefile b/net/sched/Makefile
index b5fd49641d91..82c3f78ca486 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -13,7 +13,6 @@ obj-$(CONFIG_NET_ACT_POLICE) += act_police.o
obj-$(CONFIG_NET_ACT_GACT) += act_gact.o
obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o
obj-$(CONFIG_NET_ACT_SAMPLE) += act_sample.o
-obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o
obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9d3f26bf0440..3e30d7260493 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -816,6 +816,9 @@ EXPORT_SYMBOL(tcf_idr_cleanup);
* its reference and bind counters, and return 1. Otherwise insert temporary
* error pointer (to prevent concurrent users from inserting actions with same
* index) and return 0.
+ *
+ * May return -EAGAIN for binding actions in case of a parallel add/delete on
+ * the requested index.
*/
int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
@@ -824,43 +827,61 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
struct tcf_idrinfo *idrinfo = tn->idrinfo;
struct tc_action *p;
int ret;
+ u32 max;
-again:
- mutex_lock(&idrinfo->lock);
if (*index) {
+again:
+ rcu_read_lock();
p = idr_find(&idrinfo->action_idr, *index);
+
if (IS_ERR(p)) {
/* This means that another process allocated
* index but did not assign the pointer yet.
*/
- mutex_unlock(&idrinfo->lock);
+ rcu_read_unlock();
goto again;
}
- if (p) {
- refcount_inc(&p->tcfa_refcnt);
- if (bind)
- atomic_inc(&p->tcfa_bindcnt);
- *a = p;
- ret = 1;
- } else {
- *a = NULL;
- ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
- *index, GFP_KERNEL);
- if (!ret)
- idr_replace(&idrinfo->action_idr,
- ERR_PTR(-EBUSY), *index);
+ if (!p) {
+ /* Empty slot, try to allocate it */
+ max = *index;
+ rcu_read_unlock();
+ goto new;
+ }
+
+ if (!refcount_inc_not_zero(&p->tcfa_refcnt)) {
+ /* Action was deleted in parallel */
+ rcu_read_unlock();
+ return -EAGAIN;
}
+
+ if (bind)
+ atomic_inc(&p->tcfa_bindcnt);
+ *a = p;
+
+ rcu_read_unlock();
+
+ return 1;
} else {
+ /* Find a slot */
*index = 1;
- *a = NULL;
- ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
- UINT_MAX, GFP_KERNEL);
- if (!ret)
- idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY),
- *index);
+ max = UINT_MAX;
}
+
+new:
+ *a = NULL;
+
+ mutex_lock(&idrinfo->lock);
+ ret = idr_alloc_u32(&idrinfo->action_idr, ERR_PTR(-EBUSY), index, max,
+ GFP_KERNEL);
mutex_unlock(&idrinfo->lock);
+
+ /* N binds raced for action allocation,
+ * retry for all the ones that failed.
+ */
+ if (ret == -ENOSPC && *index == max)
+ ret = -EAGAIN;
+
return ret;
}
EXPORT_SYMBOL(tcf_idr_check_alloc);
@@ -1098,7 +1119,8 @@ repeat:
}
} else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) {
if (unlikely(!rcu_access_pointer(a->goto_chain))) {
- net_warn_ratelimited("can't go to NULL chain!\n");
+ tcf_set_drop_reason(skb,
+ SKB_DROP_REASON_TC_CHAIN_NOTFOUND);
return TC_ACT_SHOT;
}
tcf_action_goto_chain_exec(a, res);
@@ -1118,8 +1140,7 @@ int tcf_action_destroy(struct tc_action *actions[], int bind)
struct tc_action *a;
int ret = 0, i;
- for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
- a = actions[i];
+ tcf_act_for_each_action(i, a, actions) {
actions[i] = NULL;
ops = a->ops;
ret = __tcf_idr_release(a, bind, true);
@@ -1136,18 +1157,29 @@ static int tcf_action_put(struct tc_action *p)
return __tcf_action_put(p, false);
}
-/* Put all actions in this array, skip those NULL's. */
static void tcf_action_put_many(struct tc_action *actions[])
{
+ struct tc_action *a;
+ int i;
+
+ tcf_act_for_each_action(i, a, actions) {
+ const struct tc_action_ops *ops = a->ops;
+ if (tcf_action_put(a))
+ module_put(ops->owner);
+ }
+}
+
+static void tca_put_bound_many(struct tc_action *actions[], int init_res[])
+{
+ struct tc_action *a;
int i;
- for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
- struct tc_action *a = actions[i];
- const struct tc_action_ops *ops;
+ tcf_act_for_each_action(i, a, actions) {
+ const struct tc_action_ops *ops = a->ops;
- if (!a)
+ if (init_res[i] == ACT_P_CREATED)
continue;
- ops = a->ops;
+
if (tcf_action_put(a))
module_put(ops->owner);
}
@@ -1211,8 +1243,7 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
int err = -EINVAL, i;
struct nlattr *nest;
- for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
- a = actions[i];
+ tcf_act_for_each_action(i, a, actions) {
nest = nla_nest_start_noflag(skb, i + 1);
if (nest == NULL)
goto nla_put_failure;
@@ -1274,30 +1305,29 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
[TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY),
};
-void tcf_idr_insert_many(struct tc_action *actions[])
+void tcf_idr_insert_many(struct tc_action *actions[], int init_res[])
{
+ struct tc_action *a;
int i;
- for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
- struct tc_action *a = actions[i];
+ tcf_act_for_each_action(i, a, actions) {
struct tcf_idrinfo *idrinfo;
- if (!a)
+ if (init_res[i] == ACT_P_BOUND)
continue;
+
idrinfo = a->idrinfo;
mutex_lock(&idrinfo->lock);
- /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc if
- * it is just created, otherwise this is just a nop.
- */
+ /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
idr_replace(&idrinfo->action_idr, a, a->tcfa_index);
mutex_unlock(&idrinfo->lock);
}
}
-struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
- bool rtnl_held,
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, u32 flags,
struct netlink_ext_ack *extack)
{
+ bool police = flags & TCA_ACT_FLAGS_POLICE;
struct nlattr *tb[TCA_ACT_MAX + 1];
struct tc_action_ops *a_o;
char act_name[IFNAMSIZ];
@@ -1329,6 +1359,8 @@ struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
a_o = tc_lookup_action_n(act_name);
if (a_o == NULL) {
#ifdef CONFIG_MODULES
+ bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL);
+
if (rtnl_held)
rtnl_unlock();
request_module("act_%s", act_name);
@@ -1445,9 +1477,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
- a_o = tc_action_load_ops(tb[i], flags & TCA_ACT_FLAGS_POLICE,
- !(flags & TCA_ACT_FLAGS_NO_RTNL),
- extack);
+ a_o = tc_action_load_ops(tb[i], flags, extack);
if (IS_ERR(a_o)) {
err = PTR_ERR(a_o);
goto err_mod;
@@ -1488,7 +1518,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
/* We have to commit them all together, because if any error happened in
* between, we could not handle the failure gracefully.
*/
- tcf_idr_insert_many(actions);
+ tcf_idr_insert_many(actions, init_res);
*attr_size = tcf_action_full_attrs_size(sz);
err = i - 1;
@@ -1497,10 +1527,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
err:
tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND);
err_mod:
- for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
- if (ops[i])
- module_put(ops[i]->owner);
- }
+ for (i = 0; i < TCA_ACT_MAX_PRIO && ops[i]; i++)
+ module_put(ops[i]->owner);
return err;
}
@@ -1753,10 +1781,10 @@ err_out:
static int tcf_action_delete(struct net *net, struct tc_action *actions[])
{
+ struct tc_action *a;
int i;
- for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
- struct tc_action *a = actions[i];
+ tcf_act_for_each_action(i, a, actions) {
const struct tc_action_ops *ops = a->ops;
/* Actions can be deleted concurrently so we must save their
* type and id to search again after reference is released.
@@ -1768,7 +1796,7 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[])
if (tcf_action_put(a)) {
/* last reference, action was deleted concurrently */
module_put(ops->owner);
- } else {
+ } else {
int ret;
/* now do the delete */
@@ -1780,31 +1808,45 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[])
return 0;
}
-static int
-tcf_reoffload_del_notify(struct net *net, struct tc_action *action)
+static struct sk_buff *tcf_reoffload_del_notify_msg(struct net *net,
+ struct tc_action *action)
{
size_t attr_size = tcf_action_fill_size(action);
struct tc_action *actions[TCA_ACT_MAX_PRIO] = {
[0] = action,
};
- const struct tc_action_ops *ops = action->ops;
struct sk_buff *skb;
- int ret;
- skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
- GFP_KERNEL);
+ skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL);
if (!skb)
- return -ENOBUFS;
+ return ERR_PTR(-ENOBUFS);
if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1, NULL) <= 0) {
kfree_skb(skb);
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
+ }
+
+ return skb;
+}
+
+static int tcf_reoffload_del_notify(struct net *net, struct tc_action *action)
+{
+ const struct tc_action_ops *ops = action->ops;
+ struct sk_buff *skb;
+ int ret;
+
+ if (!rtnl_notify_needed(net, 0, RTNLGRP_TC)) {
+ skb = NULL;
+ } else {
+ skb = tcf_reoffload_del_notify_msg(net, action);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
}
ret = tcf_idr_release_unsafe(action);
if (ret == ACT_P_DELETED) {
module_put(ops->owner);
- ret = rtnetlink_send(skb, net, 0, RTNLGRP_TC, 0);
+ ret = rtnetlink_maybe_send(skb, net, 0, RTNLGRP_TC, 0);
} else {
kfree_skb(skb);
}
@@ -1870,23 +1912,41 @@ int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb,
return 0;
}
-static int
-tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
- u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
+static struct sk_buff *tcf_del_notify_msg(struct net *net, struct nlmsghdr *n,
+ struct tc_action *actions[],
+ u32 portid, size_t attr_size,
+ struct netlink_ext_ack *extack)
{
- int ret;
struct sk_buff *skb;
- skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
- GFP_KERNEL);
+ skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL);
if (!skb)
- return -ENOBUFS;
+ return ERR_PTR(-ENOBUFS);
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
0, 2, extack) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
kfree_skb(skb);
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
+ }
+
+ return skb;
+}
+
+static int tcf_del_notify(struct net *net, struct nlmsghdr *n,
+ struct tc_action *actions[], u32 portid,
+ size_t attr_size, struct netlink_ext_ack *extack)
+{
+ struct sk_buff *skb;
+ int ret;
+
+ if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
+ skb = NULL;
+ } else {
+ skb = tcf_del_notify_msg(net, n, actions, portid, attr_size,
+ extack);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
}
/* now do the delete */
@@ -1897,9 +1957,8 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
return ret;
}
- ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
- return ret;
+ return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
}
static int
@@ -1950,26 +2009,44 @@ err:
return ret;
}
-static int
-tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
- u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
+static struct sk_buff *tcf_add_notify_msg(struct net *net, struct nlmsghdr *n,
+ struct tc_action *actions[],
+ u32 portid, size_t attr_size,
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
- skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
- GFP_KERNEL);
+ skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL);
if (!skb)
- return -ENOBUFS;
+ return ERR_PTR(-ENOBUFS);
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
RTM_NEWACTION, 0, 0, extack) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
kfree_skb(skb);
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ return skb;
+}
+
+static int tcf_add_notify(struct net *net, struct nlmsghdr *n,
+ struct tc_action *actions[], u32 portid,
+ size_t attr_size, struct netlink_ext_ack *extack)
+{
+ struct sk_buff *skb;
+
+ if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
+ skb = NULL;
+ } else {
+ skb = tcf_add_notify_msg(net, n, actions, portid, attr_size,
+ extack);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+ }
+
+ return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
}
static int tcf_action_add(struct net *net, struct nlattr *nla,
@@ -1977,7 +2054,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
size_t attr_size = 0;
- int loop, ret, i;
+ int loop, ret;
struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
int init_res[TCA_ACT_MAX_PRIO] = {};
@@ -1990,13 +2067,11 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
if (ret < 0)
return ret;
+
ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
- /* only put existing actions */
- for (i = 0; i < TCA_ACT_MAX_PRIO; i++)
- if (init_res[i] == ACT_P_CREATED)
- actions[i] = NULL;
- tcf_action_put_many(actions);
+ /* only put bound actions */
+ tca_put_bound_many(actions, init_res);
return ret;
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index b0455fda7d0b..6cfee6658103 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -318,7 +318,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
} else if (ret > 0) {
/* Don't override defaults. */
if (bind)
- return 0;
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*act, bind);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 0d7aee8933c5..f8762756657d 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -146,7 +146,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
} else if (ret > 0) {
ci = to_connmark(*a);
if (bind) {
- err = 0;
+ err = ACT_P_BOUND;
goto out_free;
}
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 8ed285023a40..7f8b1f2f2ed9 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -77,8 +77,8 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
} else if (err > 0) {
- if (bind)/* dont override defaults */
- return 0;
+ if (bind) /* dont override defaults */
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 7c652d14528b..6124d8b128d1 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -278,8 +278,39 @@ err_nat:
return err;
}
+static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow)
+{
+ return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) &&
+ test_bit(IPS_HW_OFFLOAD_BIT, &flow->ct->status) &&
+ !test_bit(NF_FLOW_HW_PENDING, &flow->flags) &&
+ !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
+}
+
+static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft);
+
+static void tcf_ct_nf_get(struct nf_flowtable *ft)
+{
+ struct tcf_ct_flow_table *ct_ft =
+ container_of(ft, struct tcf_ct_flow_table, nf_ft);
+
+ tcf_ct_flow_table_get_ref(ct_ft);
+}
+
+static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft);
+
+static void tcf_ct_nf_put(struct nf_flowtable *ft)
+{
+ struct tcf_ct_flow_table *ct_ft =
+ container_of(ft, struct tcf_ct_flow_table, nf_ft);
+
+ tcf_ct_flow_table_put(ct_ft);
+}
+
static struct nf_flowtable_type flowtable_ct = {
+ .gc = tcf_ct_flow_is_outdated,
.action = tcf_ct_flow_table_fill_actions,
+ .get = tcf_ct_nf_get,
+ .put = tcf_ct_nf_put,
.owner = THIS_MODULE,
};
@@ -328,9 +359,13 @@ err_alloc:
return err;
}
+static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft)
+{
+ refcount_inc(&ct_ft->ref);
+}
+
static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
{
- struct flow_block_cb *block_cb, *tmp_cb;
struct tcf_ct_flow_table *ct_ft;
struct flow_block *block;
@@ -338,13 +373,9 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
rwork);
nf_flow_table_free(&ct_ft->nf_ft);
- /* Remove any remaining callbacks before cleanup */
block = &ct_ft->nf_ft.flow_block;
down_write(&ct_ft->nf_ft.flow_block_lock);
- list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) {
- list_del(&block_cb->list);
- flow_block_cb_free(block_cb);
- }
+ WARN_ON(!list_empty(&block->cb_list));
up_write(&ct_ft->nf_ft.flow_block_lock);
kfree(ct_ft);
@@ -367,6 +398,17 @@ static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
entry->tuplehash[dir].tuple.tc.iifidx = act_ct_ext->ifindex[dir];
}
+static void tcf_ct_flow_ct_ext_ifidx_update(struct flow_offload *entry)
+{
+ struct nf_conn_act_ct_ext *act_ct_ext;
+
+ act_ct_ext = nf_conn_act_ct_ext_find(entry->ct);
+ if (act_ct_ext) {
+ tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_ORIGINAL);
+ tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY);
+ }
+}
+
static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
bool tcp, bool bidirectional)
@@ -662,6 +704,8 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
else
ctinfo = IP_CT_ESTABLISHED_REPLY;
+ nf_conn_act_ct_ext_fill(skb, ct, ctinfo);
+ tcf_ct_flow_ct_ext_ifidx_update(flow);
flow_offload_refresh(nf_ft, flow, force_refresh);
if (!test_bit(IPS_ASSURED_BIT, &ct->status)) {
/* Process this flow in SW to allow promoting to ASSURED */
@@ -690,7 +734,6 @@ static struct tc_action_ops act_ct_ops;
struct tc_ct_action_net {
struct tc_action_net tn; /* Must be first */
- bool labels;
};
/* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
@@ -807,7 +850,6 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
if (err || !frag)
return err;
- skb_get(skb);
err = nf_ct_handle_fragments(net, skb, zone, family, &proto, &mru);
if (err)
return err;
@@ -829,8 +871,13 @@ static void tcf_ct_params_free(struct tcf_ct_params *params)
}
if (params->ct_ft)
tcf_ct_flow_table_put(params->ct_ft);
- if (params->tmpl)
+ if (params->tmpl) {
+ if (params->put_labels)
+ nf_connlabels_put(nf_ct_net(params->tmpl));
+
nf_ct_put(params->tmpl);
+ }
+
kfree(params);
}
@@ -951,12 +998,8 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
nh_ofs = skb_network_offset(skb);
skb_pull_rcsum(skb, nh_ofs);
err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag);
- if (err == -EINPROGRESS) {
- retval = TC_ACT_STOLEN;
- goto out_clear;
- }
if (err)
- goto drop;
+ goto out_frag;
err = nf_ct_skb_network_trim(skb, family);
if (err)
@@ -1021,7 +1064,7 @@ do_nat:
tcf_ct_act_set_labels(ct, p->labels, p->labels_mask);
if (!nf_ct_is_confirmed(ct))
- nf_conn_act_ct_ext_add(ct);
+ nf_conn_act_ct_ext_add(skb, ct, ctinfo);
/* This will take care of sending queued events
* even if the connection is already confirmed.
@@ -1043,6 +1086,11 @@ out_clear:
qdisc_skb_cb(skb)->pkt_len = skb->len;
return retval;
+out_frag:
+ if (err != -EINPROGRESS)
+ tcf_action_inc_drop_qstats(&c->common);
+ return TC_ACT_CONSUMED;
+
drop:
tcf_action_inc_drop_qstats(&c->common);
return TC_ACT_SHOT;
@@ -1154,9 +1202,9 @@ static int tcf_ct_fill_params(struct net *net,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
- struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id);
struct nf_conntrack_zone zone;
int err, family, proto, len;
+ bool put_labels = false;
struct nf_conn *tmpl;
char *name;
@@ -1186,15 +1234,20 @@ static int tcf_ct_fill_params(struct net *net,
}
if (tb[TCA_CT_LABELS]) {
+ unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8;
+
if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) {
NL_SET_ERR_MSG_MOD(extack, "Conntrack labels isn't enabled.");
return -EOPNOTSUPP;
}
- if (!tn->labels) {
+ if (nf_connlabels_get(net, n_bits - 1)) {
NL_SET_ERR_MSG_MOD(extack, "Failed to set connlabel length");
return -EOPNOTSUPP;
+ } else {
+ put_labels = true;
}
+
tcf_ct_set_key_val(tb,
p->labels, TCA_CT_LABELS,
p->labels_mask, TCA_CT_LABELS_MASK,
@@ -1238,10 +1291,15 @@ static int tcf_ct_fill_params(struct net *net,
}
}
+ p->put_labels = put_labels;
+
if (p->ct_action & TCA_CT_ACT_COMMIT)
__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
return 0;
err:
+ if (put_labels)
+ nf_connlabels_put(net);
+
nf_ct_put(p->tmpl);
p->tmpl = NULL;
return err;
@@ -1291,7 +1349,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
res = ACT_P_CREATED;
} else {
if (bind)
- return 0;
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
@@ -1513,6 +1571,9 @@ static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data,
if (bind) {
struct flow_action_entry *entry = entry_data;
+ if (tcf_ct_helper(act))
+ return -EOPNOTSUPP;
+
entry->id = FLOW_ACTION_CT;
entry->ct.action = tcf_ct_action(act);
entry->ct.zone = tcf_ct_zone(act);
@@ -1542,32 +1603,13 @@ static struct tc_action_ops act_ct_ops = {
static __net_init int ct_init_net(struct net *net)
{
- unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8;
struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id);
- if (nf_connlabels_get(net, n_bits - 1)) {
- tn->labels = false;
- pr_err("act_ct: Failed to set connlabels length");
- } else {
- tn->labels = true;
- }
-
return tc_action_net_init(net, &tn->tn, &act_ct_ops);
}
static void __net_exit ct_exit_net(struct list_head *net_list)
{
- struct net *net;
-
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list) {
- struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id);
-
- if (tn->labels)
- nf_connlabels_put(net);
- }
- rtnl_unlock();
-
tc_action_net_exit(net_list, act_ct_ops.net_id);
}
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 4d15b6a6169c..e620f9a84afe 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -221,7 +221,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
ret = ACT_P_CREATED;
} else if (err > 0) {
if (bind) /* don't override defaults */
- return 0;
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 904ab3d457ef..4af3b7ec249f 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -108,7 +108,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
ret = ACT_P_CREATED;
} else if (err > 0) {
if (bind)/* dont override defaults */
- return 0;
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index c9a811f4c7ee..c681cd011afd 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -356,7 +356,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
return err;
if (err && bind)
- return 0;
+ return ACT_P_BOUND;
if (!err) {
ret = tcf_idr_create_from_flags(tn, index, est, a,
@@ -677,4 +677,5 @@ static void __exit gate_cleanup_module(void)
module_init(gate_init_module);
module_exit(gate_cleanup_module);
+MODULE_DESCRIPTION("TC gate action");
MODULE_LICENSE("GPL v2");
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index bc7611b0744c..0e867d13beb5 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -548,7 +548,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
exists = err;
if (exists && bind) {
kfree(p);
- return 0;
+ return ACT_P_BOUND;
}
if (!exists) {
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
deleted file mode 100644
index 598d6e299152..000000000000
--- a/net/sched/act_ipt.c
+++ /dev/null
@@ -1,464 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/act_ipt.c iptables target interface
- *
- *TODO: Add other tables. For now we only support the ipv4 table targets
- *
- * Copyright: Jamal Hadi Salim (2002-13)
- */
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <net/netlink.h>
-#include <net/pkt_sched.h>
-#include <linux/tc_act/tc_ipt.h>
-#include <net/tc_act/tc_ipt.h>
-#include <net/tc_wrapper.h>
-#include <net/ip.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-
-static struct tc_action_ops act_ipt_ops;
-static struct tc_action_ops act_xt_ops;
-
-static int ipt_init_target(struct net *net, struct xt_entry_target *t,
- char *table, unsigned int hook)
-{
- struct xt_tgchk_param par;
- struct xt_target *target;
- struct ipt_entry e = {};
- int ret = 0;
-
- target = xt_request_find_target(AF_INET, t->u.user.name,
- t->u.user.revision);
- if (IS_ERR(target))
- return PTR_ERR(target);
-
- t->u.kernel.target = target;
- memset(&par, 0, sizeof(par));
- par.net = net;
- par.table = table;
- par.entryinfo = &e;
- par.target = target;
- par.targinfo = t->data;
- par.hook_mask = 1 << hook;
- par.family = NFPROTO_IPV4;
-
- ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
- if (ret < 0) {
- module_put(t->u.kernel.target->me);
- return ret;
- }
- return 0;
-}
-
-static void ipt_destroy_target(struct xt_entry_target *t, struct net *net)
-{
- struct xt_tgdtor_param par = {
- .target = t->u.kernel.target,
- .targinfo = t->data,
- .family = NFPROTO_IPV4,
- .net = net,
- };
- if (par.target->destroy != NULL)
- par.target->destroy(&par);
- module_put(par.target->me);
-}
-
-static void tcf_ipt_release(struct tc_action *a)
-{
- struct tcf_ipt *ipt = to_ipt(a);
-
- if (ipt->tcfi_t) {
- ipt_destroy_target(ipt->tcfi_t, a->idrinfo->net);
- kfree(ipt->tcfi_t);
- }
- kfree(ipt->tcfi_tname);
-}
-
-static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
- [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ },
- [TCA_IPT_HOOK] = NLA_POLICY_RANGE(NLA_U32, NF_INET_PRE_ROUTING,
- NF_INET_NUMHOOKS),
- [TCA_IPT_INDEX] = { .type = NLA_U32 },
- [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) },
-};
-
-static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a,
- const struct tc_action_ops *ops,
- struct tcf_proto *tp, u32 flags)
-{
- struct tc_action_net *tn = net_generic(net, id);
- bool bind = flags & TCA_ACT_FLAGS_BIND;
- struct nlattr *tb[TCA_IPT_MAX + 1];
- struct tcf_ipt *ipt;
- struct xt_entry_target *td, *t;
- char *tname;
- bool exists = false;
- int ret = 0, err;
- u32 hook = 0;
- u32 index = 0;
-
- if (nla == NULL)
- return -EINVAL;
-
- err = nla_parse_nested_deprecated(tb, TCA_IPT_MAX, nla, ipt_policy,
- NULL);
- if (err < 0)
- return err;
-
- if (tb[TCA_IPT_INDEX] != NULL)
- index = nla_get_u32(tb[TCA_IPT_INDEX]);
-
- err = tcf_idr_check_alloc(tn, &index, a, bind);
- if (err < 0)
- return err;
- exists = err;
- if (exists && bind)
- return 0;
-
- if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
- if (exists)
- tcf_idr_release(*a, bind);
- else
- tcf_idr_cleanup(tn, index);
- return -EINVAL;
- }
-
- td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
- if (nla_len(tb[TCA_IPT_TARG]) != td->u.target_size) {
- if (exists)
- tcf_idr_release(*a, bind);
- else
- tcf_idr_cleanup(tn, index);
- return -EINVAL;
- }
-
- if (!exists) {
- ret = tcf_idr_create(tn, index, est, a, ops, bind,
- false, flags);
- if (ret) {
- tcf_idr_cleanup(tn, index);
- return ret;
- }
- ret = ACT_P_CREATED;
- } else {
- if (bind)/* dont override defaults */
- return 0;
-
- if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
- tcf_idr_release(*a, bind);
- return -EEXIST;
- }
- }
-
- err = -EINVAL;
- hook = nla_get_u32(tb[TCA_IPT_HOOK]);
- switch (hook) {
- case NF_INET_PRE_ROUTING:
- break;
- case NF_INET_POST_ROUTING:
- break;
- default:
- goto err1;
- }
-
- if (tb[TCA_IPT_TABLE]) {
- /* mangle only for now */
- if (nla_strcmp(tb[TCA_IPT_TABLE], "mangle"))
- goto err1;
- }
-
- tname = kstrdup("mangle", GFP_KERNEL);
- if (unlikely(!tname))
- goto err1;
-
- t = kmemdup(td, td->u.target_size, GFP_KERNEL);
- if (unlikely(!t))
- goto err2;
-
- err = ipt_init_target(net, t, tname, hook);
- if (err < 0)
- goto err3;
-
- ipt = to_ipt(*a);
-
- spin_lock_bh(&ipt->tcf_lock);
- if (ret != ACT_P_CREATED) {
- ipt_destroy_target(ipt->tcfi_t, net);
- kfree(ipt->tcfi_tname);
- kfree(ipt->tcfi_t);
- }
- ipt->tcfi_tname = tname;
- ipt->tcfi_t = t;
- ipt->tcfi_hook = hook;
- spin_unlock_bh(&ipt->tcf_lock);
- return ret;
-
-err3:
- kfree(t);
-err2:
- kfree(tname);
-err1:
- tcf_idr_release(*a, bind);
- return err;
-}
-
-static int tcf_ipt_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a,
- struct tcf_proto *tp,
- u32 flags, struct netlink_ext_ack *extack)
-{
- return __tcf_ipt_init(net, act_ipt_ops.net_id, nla, est,
- a, &act_ipt_ops, tp, flags);
-}
-
-static int tcf_xt_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a,
- struct tcf_proto *tp,
- u32 flags, struct netlink_ext_ack *extack)
-{
- return __tcf_ipt_init(net, act_xt_ops.net_id, nla, est,
- a, &act_xt_ops, tp, flags);
-}
-
-static bool tcf_ipt_act_check(struct sk_buff *skb)
-{
- const struct iphdr *iph;
- unsigned int nhoff, len;
-
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- return false;
-
- nhoff = skb_network_offset(skb);
- iph = ip_hdr(skb);
- if (iph->ihl < 5 || iph->version != 4)
- return false;
-
- len = skb_ip_totlen(skb);
- if (skb->len < nhoff + len || len < (iph->ihl * 4u))
- return false;
-
- return pskb_may_pull(skb, iph->ihl * 4u);
-}
-
-TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb,
- const struct tc_action *a,
- struct tcf_result *res)
-{
- char saved_cb[sizeof_field(struct sk_buff, cb)];
- int ret = 0, result = 0;
- struct tcf_ipt *ipt = to_ipt(a);
- struct xt_action_param par;
- struct nf_hook_state state = {
- .net = dev_net(skb->dev),
- .in = skb->dev,
- .hook = ipt->tcfi_hook,
- .pf = NFPROTO_IPV4,
- };
-
- if (skb_protocol(skb, false) != htons(ETH_P_IP))
- return TC_ACT_UNSPEC;
-
- if (skb_unclone(skb, GFP_ATOMIC))
- return TC_ACT_UNSPEC;
-
- if (!tcf_ipt_act_check(skb))
- return TC_ACT_UNSPEC;
-
- if (state.hook == NF_INET_POST_ROUTING) {
- if (!skb_dst(skb))
- return TC_ACT_UNSPEC;
-
- state.out = skb->dev;
- }
-
- memcpy(saved_cb, skb->cb, sizeof(saved_cb));
-
- spin_lock(&ipt->tcf_lock);
-
- tcf_lastuse_update(&ipt->tcf_tm);
- bstats_update(&ipt->tcf_bstats, skb);
-
- /* yes, we have to worry about both in and out dev
- * worry later - danger - this API seems to have changed
- * from earlier kernels
- */
- par.state = &state;
- par.target = ipt->tcfi_t->u.kernel.target;
- par.targinfo = ipt->tcfi_t->data;
-
- memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
-
- ret = par.target->target(skb, &par);
-
- switch (ret) {
- case NF_ACCEPT:
- result = TC_ACT_OK;
- break;
- case NF_DROP:
- result = TC_ACT_SHOT;
- ipt->tcf_qstats.drops++;
- break;
- case XT_CONTINUE:
- result = TC_ACT_PIPE;
- break;
- default:
- net_notice_ratelimited("tc filter: Bogus netfilter code %d assume ACCEPT\n",
- ret);
- result = TC_ACT_OK;
- break;
- }
- spin_unlock(&ipt->tcf_lock);
-
- memcpy(skb->cb, saved_cb, sizeof(skb->cb));
-
- return result;
-
-}
-
-static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind,
- int ref)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tcf_ipt *ipt = to_ipt(a);
- struct xt_entry_target *t;
- struct tcf_t tm;
- struct tc_cnt c;
-
- /* for simple targets kernel size == user size
- * user name = target name
- * for foolproof you need to not assume this
- */
-
- spin_lock_bh(&ipt->tcf_lock);
- t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
- if (unlikely(!t))
- goto nla_put_failure;
-
- c.bindcnt = atomic_read(&ipt->tcf_bindcnt) - bind;
- c.refcnt = refcount_read(&ipt->tcf_refcnt) - ref;
- strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
-
- if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) ||
- nla_put_u32(skb, TCA_IPT_INDEX, ipt->tcf_index) ||
- nla_put_u32(skb, TCA_IPT_HOOK, ipt->tcfi_hook) ||
- nla_put(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c) ||
- nla_put_string(skb, TCA_IPT_TABLE, ipt->tcfi_tname))
- goto nla_put_failure;
-
- tcf_tm_dump(&tm, &ipt->tcf_tm);
- if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD))
- goto nla_put_failure;
-
- spin_unlock_bh(&ipt->tcf_lock);
- kfree(t);
- return skb->len;
-
-nla_put_failure:
- spin_unlock_bh(&ipt->tcf_lock);
- nlmsg_trim(skb, b);
- kfree(t);
- return -1;
-}
-
-static struct tc_action_ops act_ipt_ops = {
- .kind = "ipt",
- .id = TCA_ID_IPT,
- .owner = THIS_MODULE,
- .act = tcf_ipt_act,
- .dump = tcf_ipt_dump,
- .cleanup = tcf_ipt_release,
- .init = tcf_ipt_init,
- .size = sizeof(struct tcf_ipt),
-};
-
-static __net_init int ipt_init_net(struct net *net)
-{
- struct tc_action_net *tn = net_generic(net, act_ipt_ops.net_id);
-
- return tc_action_net_init(net, tn, &act_ipt_ops);
-}
-
-static void __net_exit ipt_exit_net(struct list_head *net_list)
-{
- tc_action_net_exit(net_list, act_ipt_ops.net_id);
-}
-
-static struct pernet_operations ipt_net_ops = {
- .init = ipt_init_net,
- .exit_batch = ipt_exit_net,
- .id = &act_ipt_ops.net_id,
- .size = sizeof(struct tc_action_net),
-};
-
-static struct tc_action_ops act_xt_ops = {
- .kind = "xt",
- .id = TCA_ID_XT,
- .owner = THIS_MODULE,
- .act = tcf_ipt_act,
- .dump = tcf_ipt_dump,
- .cleanup = tcf_ipt_release,
- .init = tcf_xt_init,
- .size = sizeof(struct tcf_ipt),
-};
-
-static __net_init int xt_init_net(struct net *net)
-{
- struct tc_action_net *tn = net_generic(net, act_xt_ops.net_id);
-
- return tc_action_net_init(net, tn, &act_xt_ops);
-}
-
-static void __net_exit xt_exit_net(struct list_head *net_list)
-{
- tc_action_net_exit(net_list, act_xt_ops.net_id);
-}
-
-static struct pernet_operations xt_net_ops = {
- .init = xt_init_net,
- .exit_batch = xt_exit_net,
- .id = &act_xt_ops.net_id,
- .size = sizeof(struct tc_action_net),
-};
-
-MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
-MODULE_DESCRIPTION("Iptables target actions");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("act_xt");
-
-static int __init ipt_init_module(void)
-{
- int ret1, ret2;
-
- ret1 = tcf_register_action(&act_xt_ops, &xt_net_ops);
- if (ret1 < 0)
- pr_err("Failed to load xt action\n");
-
- ret2 = tcf_register_action(&act_ipt_ops, &ipt_net_ops);
- if (ret2 < 0)
- pr_err("Failed to load ipt action\n");
-
- if (ret1 < 0 && ret2 < 0) {
- return ret1;
- } else
- return 0;
-}
-
-static void __exit ipt_cleanup_module(void)
-{
- tcf_unregister_action(&act_ipt_ops, &ipt_net_ops);
- tcf_unregister_action(&act_xt_ops, &xt_net_ops);
-}
-
-module_init(ipt_init_module);
-module_exit(ipt_cleanup_module);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 0a711c184c29..12386f590b0f 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -85,10 +85,21 @@ static void tcf_mirred_release(struct tc_action *a)
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
[TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) },
+ [TCA_MIRRED_BLOCKID] = NLA_POLICY_MIN(NLA_U32, 1),
};
static struct tc_action_ops act_mirred_ops;
+static void tcf_mirred_replace_dev(struct tcf_mirred *m,
+ struct net_device *ndev)
+{
+ struct net_device *odev;
+
+ odev = rcu_replace_pointer(m->tcfm_dev, ndev,
+ lockdep_is_held(&m->tcf_lock));
+ netdev_put(odev, &m->tcfm_dev_tracker);
+}
+
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
struct tcf_proto *tp,
@@ -124,7 +135,18 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
+
+ if (tb[TCA_MIRRED_BLOCKID] && parm->ifindex) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot specify Block ID and dev simultaneously");
+ if (exists)
+ tcf_idr_release(*a, bind);
+ else
+ tcf_idr_cleanup(tn, index);
+
+ return -EINVAL;
+ }
switch (parm->eaction) {
case TCA_EGRESS_MIRROR:
@@ -142,9 +164,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
if (!exists) {
- if (!parm->ifindex) {
+ if (!parm->ifindex && !tb[TCA_MIRRED_BLOCKID]) {
tcf_idr_cleanup(tn, index);
- NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
+ NL_SET_ERR_MSG_MOD(extack,
+ "Must specify device or block");
return -EINVAL;
}
ret = tcf_idr_create_from_flags(tn, index, est, a,
@@ -170,7 +193,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
spin_lock_bh(&m->tcf_lock);
if (parm->ifindex) {
- struct net_device *odev, *ndev;
+ struct net_device *ndev;
ndev = dev_get_by_index(net, parm->ifindex);
if (!ndev) {
@@ -179,11 +202,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
goto put_chain;
}
mac_header_xmit = dev_is_mac_header_xmit(ndev);
- odev = rcu_replace_pointer(m->tcfm_dev, ndev,
- lockdep_is_held(&m->tcf_lock));
- netdev_put(odev, &m->tcfm_dev_tracker);
+ tcf_mirred_replace_dev(m, ndev);
netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC);
m->tcfm_mac_header_xmit = mac_header_xmit;
+ m->tcfm_blockid = 0;
+ } else if (tb[TCA_MIRRED_BLOCKID]) {
+ tcf_mirred_replace_dev(m, NULL);
+ m->tcfm_mac_header_xmit = false;
+ m->tcfm_blockid = nla_get_u32(tb[TCA_MIRRED_BLOCKID]);
}
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
m->tcfm_eaction = parm->eaction;
@@ -225,48 +251,26 @@ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
return err;
}
-TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
- const struct tc_action *a,
- struct tcf_result *res)
+static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
+ struct net_device *dev,
+ const bool m_mac_header_xmit, int m_eaction,
+ int retval)
{
- struct tcf_mirred *m = to_mirred(a);
- struct sk_buff *skb2 = skb;
- bool m_mac_header_xmit;
- struct net_device *dev;
- unsigned int nest_level;
- int retval, err = 0;
- bool use_reinsert;
+ struct sk_buff *skb_to_send = skb;
bool want_ingress;
bool is_redirect;
bool expects_nh;
bool at_ingress;
- int m_eaction;
+ bool dont_clone;
int mac_len;
bool at_nh;
+ int err;
- nest_level = __this_cpu_inc_return(mirred_nest_level);
- if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
- net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
- netdev_name(skb->dev));
- __this_cpu_dec(mirred_nest_level);
- return TC_ACT_SHOT;
- }
-
- tcf_lastuse_update(&m->tcf_tm);
- tcf_action_update_bstats(&m->common, skb);
-
- m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
- m_eaction = READ_ONCE(m->tcfm_eaction);
- retval = READ_ONCE(m->tcf_action);
- dev = rcu_dereference_bh(m->tcfm_dev);
- if (unlikely(!dev)) {
- pr_notice_once("tc mirred: target device is gone\n");
- goto out;
- }
-
+ is_redirect = tcf_mirred_is_act_redirect(m_eaction);
if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) {
net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
dev->name);
+ err = -ENODEV;
goto out;
}
@@ -274,61 +278,188 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
* since we can't easily detect the clsact caller, skip clone only for
* ingress - that covers the TC S/W datapath.
*/
- is_redirect = tcf_mirred_is_act_redirect(m_eaction);
at_ingress = skb_at_tc_ingress(skb);
- use_reinsert = at_ingress && is_redirect &&
- tcf_mirred_can_reinsert(retval);
- if (!use_reinsert) {
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (!skb2)
+ dont_clone = skb_at_tc_ingress(skb) && is_redirect &&
+ tcf_mirred_can_reinsert(retval);
+ if (!dont_clone) {
+ skb_to_send = skb_clone(skb, GFP_ATOMIC);
+ if (!skb_to_send) {
+ err = -ENOMEM;
goto out;
+ }
}
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
/* All mirred/redirected skbs should clear previous ct info */
- nf_reset_ct(skb2);
+ nf_reset_ct(skb_to_send);
if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */
- skb_dst_drop(skb2);
+ skb_dst_drop(skb_to_send);
expects_nh = want_ingress || !m_mac_header_xmit;
at_nh = skb->data == skb_network_header(skb);
if (at_nh != expects_nh) {
- mac_len = skb_at_tc_ingress(skb) ? skb->mac_len :
+ mac_len = at_ingress ? skb->mac_len :
skb_network_offset(skb);
if (expects_nh) {
/* target device/action expect data at nh */
- skb_pull_rcsum(skb2, mac_len);
+ skb_pull_rcsum(skb_to_send, mac_len);
} else {
/* target device/action expect data at mac */
- skb_push_rcsum(skb2, mac_len);
+ skb_push_rcsum(skb_to_send, mac_len);
}
}
- skb2->skb_iif = skb->dev->ifindex;
- skb2->dev = dev;
+ skb_to_send->skb_iif = skb->dev->ifindex;
+ skb_to_send->dev = dev;
- /* mirror is always swallowed */
if (is_redirect) {
- skb_set_redirected(skb2, skb2->tc_at_ingress);
-
- /* let's the caller reinsert the packet, if possible */
- if (use_reinsert) {
- err = tcf_mirred_forward(want_ingress, skb);
- if (err)
- tcf_action_inc_overlimit_qstats(&m->common);
- __this_cpu_dec(mirred_nest_level);
- return TC_ACT_CONSUMED;
- }
+ if (skb == skb_to_send)
+ retval = TC_ACT_CONSUMED;
+
+ skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress);
+
+ err = tcf_mirred_forward(want_ingress, skb_to_send);
+ } else {
+ err = tcf_mirred_forward(want_ingress, skb_to_send);
}
- err = tcf_mirred_forward(want_ingress, skb2);
if (err) {
out:
tcf_action_inc_overlimit_qstats(&m->common);
- if (tcf_mirred_is_act_redirect(m_eaction))
+ if (is_redirect)
retval = TC_ACT_SHOT;
}
+
+ return retval;
+}
+
+static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m,
+ struct tcf_block *block, int m_eaction,
+ const u32 exception_ifindex, int retval)
+{
+ struct net_device *dev_prev = NULL;
+ struct net_device *dev = NULL;
+ unsigned long index;
+ int mirred_eaction;
+
+ mirred_eaction = tcf_mirred_act_wants_ingress(m_eaction) ?
+ TCA_INGRESS_MIRROR : TCA_EGRESS_MIRROR;
+
+ xa_for_each(&block->ports, index, dev) {
+ if (index == exception_ifindex)
+ continue;
+
+ if (!dev_prev)
+ goto assign_prev;
+
+ tcf_mirred_to_dev(skb, m, dev_prev,
+ dev_is_mac_header_xmit(dev),
+ mirred_eaction, retval);
+assign_prev:
+ dev_prev = dev;
+ }
+
+ if (dev_prev)
+ return tcf_mirred_to_dev(skb, m, dev_prev,
+ dev_is_mac_header_xmit(dev_prev),
+ m_eaction, retval);
+
+ return retval;
+}
+
+static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m,
+ struct tcf_block *block, int m_eaction,
+ const u32 exception_ifindex, int retval)
+{
+ struct net_device *dev = NULL;
+ unsigned long index;
+
+ xa_for_each(&block->ports, index, dev) {
+ if (index == exception_ifindex)
+ continue;
+
+ tcf_mirred_to_dev(skb, m, dev,
+ dev_is_mac_header_xmit(dev),
+ m_eaction, retval);
+ }
+
+ return retval;
+}
+
+static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m,
+ const u32 blockid, struct tcf_result *res,
+ int retval)
+{
+ const u32 exception_ifindex = skb->dev->ifindex;
+ struct tcf_block *block;
+ bool is_redirect;
+ int m_eaction;
+
+ m_eaction = READ_ONCE(m->tcfm_eaction);
+ is_redirect = tcf_mirred_is_act_redirect(m_eaction);
+
+ /* we are already under rcu protection, so can call block lookup
+ * directly.
+ */
+ block = tcf_block_lookup(dev_net(skb->dev), blockid);
+ if (!block || xa_empty(&block->ports)) {
+ tcf_action_inc_overlimit_qstats(&m->common);
+ return retval;
+ }
+
+ if (is_redirect)
+ return tcf_blockcast_redir(skb, m, block, m_eaction,
+ exception_ifindex, retval);
+
+ /* If it's not redirect, it is mirror */
+ return tcf_blockcast_mirror(skb, m, block, m_eaction, exception_ifindex,
+ retval);
+}
+
+TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_mirred *m = to_mirred(a);
+ int retval = READ_ONCE(m->tcf_action);
+ unsigned int nest_level;
+ bool m_mac_header_xmit;
+ struct net_device *dev;
+ int m_eaction;
+ u32 blockid;
+
+ nest_level = __this_cpu_inc_return(mirred_nest_level);
+ if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
+ net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
+ netdev_name(skb->dev));
+ retval = TC_ACT_SHOT;
+ goto dec_nest_level;
+ }
+
+ tcf_lastuse_update(&m->tcf_tm);
+ tcf_action_update_bstats(&m->common, skb);
+
+ blockid = READ_ONCE(m->tcfm_blockid);
+ if (blockid) {
+ retval = tcf_blockcast(skb, m, blockid, res, retval);
+ goto dec_nest_level;
+ }
+
+ dev = rcu_dereference_bh(m->tcfm_dev);
+ if (unlikely(!dev)) {
+ pr_notice_once("tc mirred: target device is gone\n");
+ tcf_action_inc_overlimit_qstats(&m->common);
+ goto dec_nest_level;
+ }
+
+ m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
+ m_eaction = READ_ONCE(m->tcfm_eaction);
+
+ retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction,
+ retval);
+
+dec_nest_level:
__this_cpu_dec(mirred_nest_level);
return retval;
@@ -356,6 +487,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
};
struct net_device *dev;
struct tcf_t t;
+ u32 blockid;
spin_lock_bh(&m->tcf_lock);
opt.action = m->tcf_action;
@@ -367,6 +499,10 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
+ blockid = m->tcfm_blockid;
+ if (blockid && nla_put_u32(skb, TCA_MIRRED_BLOCKID, blockid))
+ goto nla_put_failure;
+
tcf_tm_dump(&t, &m->tcf_tm);
if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD))
goto nla_put_failure;
@@ -397,6 +533,8 @@ static int mirred_device_event(struct notifier_block *unused,
* net_device are already rcu protected.
*/
RCU_INIT_POINTER(m->tcfm_dev, NULL);
+ } else if (m->tcfm_blockid) {
+ m->tcfm_blockid = 0;
}
spin_unlock_bh(&m->tcf_lock);
}
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 1010dc632874..34b8edb6cc77 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -195,7 +195,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
if (!exists) {
ret = tcf_idr_create(tn, index, est, a, &act_mpls_ops, bind,
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 4184af5abbf3..a180e724634e 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -69,7 +69,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
ret = ACT_P_CREATED;
} else if (err > 0) {
if (bind)
- return 0;
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 1ef8fcfa9997..2ef22969f274 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -202,7 +202,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
ret = ACT_P_CREATED;
} else if (err > 0) {
if (bind)
- return 0;
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
ret = -EEXIST;
goto out_release;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index f3121c5a85e9..e119b4a3db9f 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -77,7 +77,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
if (!exists) {
ret = tcf_idr_create(tn, index, NULL, a,
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 4c670e7568dc..c5c61efe6db4 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -66,7 +66,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
if (!exists) {
ret = tcf_idr_create(tn, index, est, a,
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 4b84514534f3..0a3e92888295 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -118,7 +118,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
if (tb[TCA_DEF_DATA] == NULL) {
if (exists)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index ce7008cf291c..754f78b35bb8 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -209,7 +209,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
if (!flags) {
if (exists)
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index dffa990a9629..bcb673ab0008 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -157,7 +157,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
if (!lflags) {
if (exists)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 0c8aa7e686ea..300b08aa8283 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -401,7 +401,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
switch (parm->t_action) {
case TCA_TUNNEL_KEY_ACT_RELEASE:
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 0251442f5f29..836183011a7c 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -151,7 +151,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
switch (parm->v_action) {
case TCA_VLAN_ACT_POP:
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a193cc7b3241..ff3d396a65aa 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -531,6 +531,7 @@ static void tcf_block_destroy(struct tcf_block *block)
{
mutex_destroy(&block->lock);
mutex_destroy(&block->proto_destroy_lock);
+ xa_destroy(&block->ports);
kfree_rcu(block, rcu);
}
@@ -650,7 +651,7 @@ static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
void *tmplt_priv, u32 chain_index,
struct tcf_block *block, struct sk_buff *oskb,
- u32 seq, u16 flags, bool unicast);
+ u32 seq, u16 flags);
static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
bool explicitly_created)
@@ -685,8 +686,7 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
if (non_act_refcnt == chain->explicitly_created && !by_act) {
if (non_act_refcnt == 0)
tc_chain_notify_delete(tmplt_ops, tmplt_priv,
- chain->index, block, NULL, 0, 0,
- false);
+ chain->index, block, NULL, 0, 0);
/* Last reference to chain, no need to lock. */
chain->flushing = false;
}
@@ -1003,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
refcount_set(&block->refcnt, 1);
block->net = net;
block->index = block_index;
+ xa_init(&block->ports);
/* Don't store q pointer for blocks which are shared */
if (!tcf_block_shared(block))
@@ -1010,12 +1011,13 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
return block;
}
-static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
+struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);
return idr_find(&tn->idr, block_index);
}
+EXPORT_SYMBOL(tcf_block_lookup);
static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
{
@@ -1422,10 +1424,19 @@ static void tcf_block_owner_del(struct tcf_block *block,
WARN_ON(1);
}
+static bool tcf_block_tracks_dev(struct tcf_block *block,
+ struct tcf_block_ext_info *ei)
+{
+ return tcf_block_shared(block) &&
+ (ei->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS ||
+ ei->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS);
+}
+
int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
struct tcf_block_ext_info *ei,
struct netlink_ext_ack *extack)
{
+ struct net_device *dev = qdisc_dev(q);
struct net *net = qdisc_net(q);
struct tcf_block *block = NULL;
int err;
@@ -1459,9 +1470,18 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
if (err)
goto err_block_offload_bind;
+ if (tcf_block_tracks_dev(block, ei)) {
+ err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "block dev insert failed");
+ goto err_dev_insert;
+ }
+ }
+
*p_block = block;
return 0;
+err_dev_insert:
err_block_offload_bind:
tcf_chain0_head_change_cb_del(block, ei);
err_chain0_head_change_cb_add:
@@ -1500,8 +1520,12 @@ EXPORT_SYMBOL(tcf_block_get);
void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
struct tcf_block_ext_info *ei)
{
+ struct net_device *dev = qdisc_dev(q);
+
if (!block)
return;
+ if (tcf_block_tracks_dev(block, ei))
+ xa_erase(&block->ports, dev->ifindex);
tcf_chain0_head_change_cb_del(block, ei);
tcf_block_owner_del(block, q, ei->binder_type);
@@ -1536,6 +1560,9 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
chain_prev = chain,
chain = __tcf_get_next_chain(block, chain),
tcf_chain_put(chain_prev)) {
+ if (chain->tmplt_ops && add)
+ chain->tmplt_ops->tmplt_reoffload(chain, true, cb,
+ cb_priv);
for (tp = __tcf_get_next_proto(chain, NULL); tp;
tp_prev = tp,
tp = __tcf_get_next_proto(chain, tp),
@@ -1551,6 +1578,9 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
goto err_playback_remove;
}
}
+ if (chain->tmplt_ops && !add)
+ chain->tmplt_ops->tmplt_reoffload(chain, false, cb,
+ cb_priv);
}
return 0;
@@ -1681,12 +1711,18 @@ reclassify:
* time we got here with a cookie from hardware.
*/
if (unlikely(n->tp != tp || n->tp->chain != n->chain ||
- !tp->ops->get_exts))
+ !tp->ops->get_exts)) {
+ tcf_set_drop_reason(skb,
+ SKB_DROP_REASON_TC_COOKIE_ERROR);
return TC_ACT_SHOT;
+ }
exts = tp->ops->get_exts(tp, n->handle);
- if (unlikely(!exts || n->exts != exts))
+ if (unlikely(!exts || n->exts != exts)) {
+ tcf_set_drop_reason(skb,
+ SKB_DROP_REASON_TC_COOKIE_ERROR);
return TC_ACT_SHOT;
+ }
n = NULL;
err = tcf_exts_exec_ex(skb, exts, act_index, res);
@@ -1712,8 +1748,11 @@ reclassify:
return err;
}
- if (unlikely(n))
+ if (unlikely(n)) {
+ tcf_set_drop_reason(skb,
+ SKB_DROP_REASON_TC_COOKIE_ERROR);
return TC_ACT_SHOT;
+ }
return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
@@ -1723,6 +1762,8 @@ reset:
tp->chain->block->index,
tp->prio & 0xffff,
ntohs(tp->protocol));
+ tcf_set_drop_reason(skb,
+ SKB_DROP_REASON_TC_RECLASSIFY_LOOP);
return TC_ACT_SHOT;
}
@@ -1759,8 +1800,11 @@ int tcf_classify(struct sk_buff *skb,
if (ext->act_miss) {
n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie,
&act_index);
- if (!n)
+ if (!n) {
+ tcf_set_drop_reason(skb,
+ SKB_DROP_REASON_TC_COOKIE_ERROR);
return TC_ACT_SHOT;
+ }
chain = n->chain_index;
} else {
@@ -1768,8 +1812,12 @@ int tcf_classify(struct sk_buff *skb,
}
fchain = tcf_chain_lookup_rcu(block, chain);
- if (!fchain)
+ if (!fchain) {
+ tcf_set_drop_reason(skb,
+ SKB_DROP_REASON_TC_CHAIN_NOTFOUND);
+
return TC_ACT_SHOT;
+ }
/* Consume, so cloned/redirect skbs won't inherit ext */
skb_ext_del(skb, TC_SKB_EXT);
@@ -1788,8 +1836,10 @@ int tcf_classify(struct sk_buff *skb,
struct tc_skb_cb *cb = tc_skb_cb(skb);
ext = tc_skb_ext_alloc(skb);
- if (WARN_ON_ONCE(!ext))
+ if (WARN_ON_ONCE(!ext)) {
+ tcf_set_drop_reason(skb, SKB_DROP_REASON_NOMEM);
return TC_ACT_SHOT;
+ }
ext->chain = last_executed_chain;
ext->mru = cb->mru;
ext->post_ct = cb->post_ct;
@@ -2032,6 +2082,9 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
int err = 0;
+ if (!unicast && !rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
+ return 0;
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
@@ -2054,13 +2107,16 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
struct tcf_block *block, struct Qdisc *q,
- u32 parent, void *fh, bool unicast, bool *last,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ u32 parent, void *fh, bool *last, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
int err;
+ if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
+ return tp->ops->delete(tp, fh, last, rtnl_held, extack);
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
@@ -2079,11 +2135,8 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
return err;
}
- if (unicast)
- err = rtnl_unicast(skb, net, portid);
- else
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
@@ -2478,9 +2531,8 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
} else {
bool last;
- err = tfilter_del_notify(net, skb, n, tp, block,
- q, parent, fh, false, &last,
- rtnl_held, extack);
+ err = tfilter_del_notify(net, skb, n, tp, block, q, parent, fh,
+ &last, rtnl_held, extack);
if (err)
goto errout;
@@ -2711,6 +2763,7 @@ errout:
}
static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = {
+ [TCA_CHAIN] = { .type = NLA_U32 },
[TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE),
};
@@ -2885,6 +2938,9 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
struct sk_buff *skb;
int err = 0;
+ if (!unicast && !rtnl_notify_needed(net, flags, RTNLGRP_TC))
+ return 0;
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
@@ -2908,12 +2964,15 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
void *tmplt_priv, u32 chain_index,
struct tcf_block *block, struct sk_buff *oskb,
- u32 seq, u16 flags, bool unicast)
+ u32 seq, u16 flags)
{
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
struct net *net = block->net;
struct sk_buff *skb;
+ if (!rtnl_notify_needed(net, flags, RTNLGRP_TC))
+ return 0;
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
@@ -2924,9 +2983,6 @@ static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
return -EINVAL;
}
- if (unicast)
- return rtnl_unicast(skb, net, portid);
-
return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
}
@@ -2950,7 +3006,8 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
ops = tcf_proto_lookup_ops(name, true, extack);
if (IS_ERR(ops))
return PTR_ERR(ops);
- if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
+ if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump ||
+ !ops->tmplt_reoffload) {
NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
module_put(ops->owner);
return -EOPNOTSUPP;
@@ -3272,12 +3329,11 @@ int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **
if (exts->police && tb[exts->police]) {
struct tc_action_ops *a_o;
- a_o = tc_action_load_ops(tb[exts->police], true,
- !(flags & TCA_ACT_FLAGS_NO_RTNL),
+ flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND;
+ a_o = tc_action_load_ops(tb[exts->police], flags,
extack);
if (IS_ERR(a_o))
return PTR_ERR(a_o);
- flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND;
act = tcf_action_init_1(net, tp, tb[exts->police],
rate_tlv, a_o, init_res, flags,
extack);
@@ -3288,7 +3344,7 @@ int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **
act->type = exts->type = TCA_OLD_COMPAT;
exts->actions[0] = act;
exts->nr_actions = 1;
- tcf_idr_insert_many(exts->actions);
+ tcf_idr_insert_many(exts->actions, init_res);
} else if (exts->action && tb[exts->action]) {
int err;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 1b92c33b5f81..a1f56931330c 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -341,4 +341,5 @@ static void __exit exit_basic(void)
module_init(init_basic)
module_exit(exit_basic)
+MODULE_DESCRIPTION("TC basic classifier");
MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index bd9322d71910..7ee8dbf49ed0 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -222,4 +222,5 @@ static void __exit exit_cgroup_cls(void)
module_init(init_cgroup_cls);
module_exit(exit_cgroup_cls);
+MODULE_DESCRIPTION("TC cgroup classifier");
MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index e5314a31f75a..efb9d2811b73 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2721,6 +2721,28 @@ static void fl_tmplt_destroy(void *tmplt_priv)
kfree(tmplt);
}
+static void fl_tmplt_reoffload(struct tcf_chain *chain, bool add,
+ flow_setup_cb_t *cb, void *cb_priv)
+{
+ struct fl_flow_tmplt *tmplt = chain->tmplt_priv;
+ struct flow_cls_offload cls_flower = {};
+
+ cls_flower.rule = flow_rule_alloc(0);
+ if (!cls_flower.rule)
+ return;
+
+ cls_flower.common.chain_index = chain->index;
+ cls_flower.command = add ? FLOW_CLS_TMPLT_CREATE :
+ FLOW_CLS_TMPLT_DESTROY;
+ cls_flower.cookie = (unsigned long) tmplt;
+ cls_flower.rule->match.dissector = &tmplt->dissector;
+ cls_flower.rule->match.mask = &tmplt->mask;
+ cls_flower.rule->match.key = &tmplt->dummy_key;
+
+ cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+ kfree(cls_flower.rule);
+}
+
static int fl_dump_key_val(struct sk_buff *skb,
void *val, int val_type,
void *mask, int mask_type, int len)
@@ -3628,6 +3650,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.bind_class = fl_bind_class,
.tmplt_create = fl_tmplt_create,
.tmplt_destroy = fl_tmplt_destroy,
+ .tmplt_reoffload = fl_tmplt_reoffload,
.tmplt_dump = fl_tmplt_dump,
.get_exts = fl_get_exts,
.owner = THIS_MODULE,
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index c49d6af0e048..afc534ee0a18 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -446,4 +446,5 @@ static void __exit exit_fw(void)
module_init(init_fw)
module_exit(exit_fw)
+MODULE_DESCRIPTION("SKB mark based TC classifier");
MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 1e20bbd687f1..12a505db4183 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -375,9 +375,9 @@ out:
static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = {
[TCA_ROUTE4_CLASSID] = { .type = NLA_U32 },
- [TCA_ROUTE4_TO] = { .type = NLA_U32 },
- [TCA_ROUTE4_FROM] = { .type = NLA_U32 },
- [TCA_ROUTE4_IIF] = { .type = NLA_U32 },
+ [TCA_ROUTE4_TO] = NLA_POLICY_MAX(NLA_U32, 0xFF),
+ [TCA_ROUTE4_FROM] = NLA_POLICY_MAX(NLA_U32, 0xFF),
+ [TCA_ROUTE4_IIF] = NLA_POLICY_MAX(NLA_U32, 0x7FFF),
};
static int route4_set_parms(struct net *net, struct tcf_proto *tp,
@@ -397,33 +397,37 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
return err;
if (tb[TCA_ROUTE4_TO]) {
- if (new && handle & 0x8000)
+ if (new && handle & 0x8000) {
+ NL_SET_ERR_MSG(extack, "Invalid handle");
return -EINVAL;
+ }
to = nla_get_u32(tb[TCA_ROUTE4_TO]);
- if (to > 0xFF)
- return -EINVAL;
nhandle = to;
}
+ if (tb[TCA_ROUTE4_FROM] && tb[TCA_ROUTE4_IIF]) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_ROUTE4_FROM],
+ "'from' and 'fromif' are mutually exclusive");
+ return -EINVAL;
+ }
+
if (tb[TCA_ROUTE4_FROM]) {
- if (tb[TCA_ROUTE4_IIF])
- return -EINVAL;
id = nla_get_u32(tb[TCA_ROUTE4_FROM]);
- if (id > 0xFF)
- return -EINVAL;
nhandle |= id << 16;
} else if (tb[TCA_ROUTE4_IIF]) {
id = nla_get_u32(tb[TCA_ROUTE4_IIF]);
- if (id > 0x7FFF)
- return -EINVAL;
nhandle |= (id | 0x8000) << 16;
} else
nhandle |= 0xFFFF << 16;
if (handle && new) {
nhandle |= handle & 0x7F00;
- if (nhandle != handle)
+ if (nhandle != handle) {
+ NL_SET_ERR_MSG_FMT(extack,
+ "Handle mismatch constructed: %x (expected: %x)",
+ handle, nhandle);
return -EINVAL;
+ }
}
if (!nhandle) {
@@ -478,7 +482,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
struct route4_filter __rcu **fp;
struct route4_filter *fold, *f1, *pfp, *f = NULL;
struct route4_bucket *b;
- struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_ROUTE4_MAX + 1];
unsigned int h, th;
int err;
@@ -489,10 +492,12 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
- if (opt == NULL)
+ if (NL_REQ_ATTR_CHECK(extack, NULL, tca, TCA_OPTIONS)) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing options");
return -EINVAL;
+ }
- err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt,
+ err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, tca[TCA_OPTIONS],
route4_policy, NULL);
if (err < 0)
return err;
@@ -679,4 +684,5 @@ static void __exit exit_route4(void)
module_init(init_route4)
module_exit(exit_route4)
+MODULE_DESCRIPTION("Routing table realm based TC classifier");
MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index da4c179a4d41..289e1755c26b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -71,7 +71,7 @@ struct tc_u_hnode {
struct tc_u_hnode __rcu *next;
u32 handle;
u32 prio;
- int refcnt;
+ refcount_t refcnt;
unsigned int divisor;
struct idr handle_idr;
bool is_root;
@@ -86,7 +86,7 @@ struct tc_u_hnode {
struct tc_u_common {
struct tc_u_hnode __rcu *hlist;
void *ptr;
- int refcnt;
+ refcount_t refcnt;
struct idr handle_idr;
struct hlist_node hnode;
long knodes;
@@ -359,30 +359,32 @@ static int u32_init(struct tcf_proto *tp)
if (root_ht == NULL)
return -ENOBUFS;
- root_ht->refcnt++;
+ refcount_set(&root_ht->refcnt, 1);
root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
root_ht->prio = tp->prio;
root_ht->is_root = true;
idr_init(&root_ht->handle_idr);
if (tp_c == NULL) {
- tp_c = kzalloc(struct_size(tp_c, hlist->ht, 1), GFP_KERNEL);
+ tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
if (tp_c == NULL) {
kfree(root_ht);
return -ENOBUFS;
}
+ refcount_set(&tp_c->refcnt, 1);
tp_c->ptr = key;
INIT_HLIST_NODE(&tp_c->hnode);
idr_init(&tp_c->handle_idr);
hlist_add_head(&tp_c->hnode, tc_u_hash(key));
+ } else {
+ refcount_inc(&tp_c->refcnt);
}
- tp_c->refcnt++;
RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
rcu_assign_pointer(tp_c->hlist, root_ht);
- root_ht->refcnt++;
+ /* root_ht must be destroyed when tcf_proto is destroyed */
rcu_assign_pointer(tp->root, root_ht);
tp->data = tp_c;
return 0;
@@ -393,7 +395,7 @@ static void __u32_destroy_key(struct tc_u_knode *n)
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
tcf_exts_destroy(&n->exts);
- if (ht && --ht->refcnt == 0)
+ if (ht && refcount_dec_and_test(&ht->refcnt))
kfree(ht);
kfree(n);
}
@@ -601,8 +603,6 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
struct tc_u_hnode __rcu **hn;
struct tc_u_hnode *phn;
- WARN_ON(--ht->refcnt);
-
u32_clear_hnode(tp, ht, extack);
hn = &tp_c->hlist;
@@ -630,10 +630,10 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
WARN_ON(root_ht == NULL);
- if (root_ht && --root_ht->refcnt == 1)
+ if (root_ht && refcount_dec_and_test(&root_ht->refcnt))
u32_destroy_hnode(tp, root_ht, extack);
- if (--tp_c->refcnt == 0) {
+ if (refcount_dec_and_test(&tp_c->refcnt)) {
struct tc_u_hnode *ht;
hlist_del(&tp_c->hnode);
@@ -645,7 +645,7 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
/* u32_destroy_key() will later free ht for us, if it's
* still referenced by some knode
*/
- if (--ht->refcnt == 0)
+ if (refcount_dec_and_test(&ht->refcnt))
kfree_rcu(ht, rcu);
}
@@ -674,7 +674,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
return -EINVAL;
}
- if (ht->refcnt == 1) {
+ if (refcount_dec_if_one(&ht->refcnt)) {
u32_destroy_hnode(tp, ht, extack);
} else {
NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
@@ -682,7 +682,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
}
out:
- *last = tp_c->refcnt == 1 && tp_c->knodes == 0;
+ *last = refcount_read(&tp_c->refcnt) == 1 && tp_c->knodes == 0;
return ret;
}
@@ -766,14 +766,14 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
NL_SET_ERR_MSG_MOD(extack, "Not linking to root node");
return -EINVAL;
}
- ht_down->refcnt++;
+ refcount_inc(&ht_down->refcnt);
}
ht_old = rtnl_dereference(n->ht_down);
rcu_assign_pointer(n->ht_down, ht_down);
if (ht_old)
- ht_old->refcnt--;
+ refcount_dec(&ht_old->refcnt);
}
if (ifindex >= 0)
@@ -852,7 +852,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
/* bump reference count as long as we hold pointer to structure */
if (ht)
- ht->refcnt++;
+ refcount_inc(&ht->refcnt);
return new;
}
@@ -932,7 +932,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
ht_old = rtnl_dereference(n->ht_down);
if (ht_old)
- ht_old->refcnt++;
+ refcount_inc(&ht_old->refcnt);
}
__u32_destroy_key(new);
return err;
@@ -980,7 +980,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return err;
}
}
- ht->refcnt = 1;
+ refcount_set(&ht->refcnt, 1);
ht->divisor = divisor;
ht->handle = handle;
ht->prio = tp->prio;
@@ -1489,4 +1489,5 @@ static void __exit exit_u32(void)
module_init(init_u32)
module_exit(exit_u32)
+MODULE_DESCRIPTION("Universal 32bit based TC Classifier");
MODULE_LICENSE("GPL");
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index da34fd4c9269..09d8afd04a2a 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -546,7 +546,7 @@ META_COLLECTOR(int_sk_prio)
*err = -1;
return;
}
- dst->value = sk->sk_priority;
+ dst->value = READ_ONCE(sk->sk_priority);
}
META_COLLECTOR(int_sk_rcvlowat)
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 6f3c1fb2fb44..f176afb70559 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -97,8 +97,10 @@ retry:
static void em_text_destroy(struct tcf_ematch *m)
{
- if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config)
+ if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config) {
textsearch_destroy(EM_TEXT_PRIV(m)->config);
+ kfree(EM_TEXT_PRIV(m));
+ }
}
static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e9eaf637220e..36b025cc4fd2 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1003,6 +1003,32 @@ static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
return false;
}
+static int qdisc_get_notify(struct net *net, struct sk_buff *oskb,
+ struct nlmsghdr *n, u32 clid, struct Qdisc *q,
+ struct netlink_ext_ack *extack)
+{
+ struct sk_buff *skb;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (!tc_qdisc_dump_ignore(q, false)) {
+ if (tc_fill_qdisc(skb, q, clid, portid, n->nlmsg_seq, 0,
+ RTM_NEWQDISC, extack) < 0)
+ goto err_out;
+ }
+
+ if (skb->len)
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+
+err_out:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
static int qdisc_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, u32 clid,
struct Qdisc *old, struct Qdisc *new,
@@ -1011,6 +1037,9 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
+ return 0;
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
@@ -1542,7 +1571,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
if (err != 0)
return err;
} else {
- qdisc_notify(net, skb, n, clid, NULL, q, NULL);
+ qdisc_get_notify(net, skb, n, clid, q, NULL);
}
return 0;
}
@@ -1936,6 +1965,9 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
+ return 0;
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
@@ -1949,6 +1981,27 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
n->nlmsg_flags & NLM_F_ECHO);
}
+static int tclass_get_notify(struct net *net, struct sk_buff *oskb,
+ struct nlmsghdr *n, struct Qdisc *q,
+ unsigned long cl, struct netlink_ext_ack *extack)
+{
+ struct sk_buff *skb;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS,
+ extack) < 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+}
+
static int tclass_del_notify(struct net *net,
const struct Qdisc_class_ops *cops,
struct sk_buff *oskb, struct nlmsghdr *n,
@@ -1962,14 +2015,18 @@ static int tclass_del_notify(struct net *net,
if (!cops->delete)
return -EOPNOTSUPP;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb)
- return -ENOBUFS;
+ if (rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
- if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
- RTM_DELTCLASS, extack) < 0) {
- kfree_skb(skb);
- return -EINVAL;
+ if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
+ RTM_DELTCLASS, extack) < 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ } else {
+ skb = NULL;
}
err = cops->delete(q, cl, extack);
@@ -1978,8 +2035,8 @@ static int tclass_del_notify(struct net *net,
return err;
}
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ err = rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
return err;
}
@@ -2174,7 +2231,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
tc_bind_tclass(q, portid, clid, 0);
goto out;
case RTM_GETTCLASS:
- err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS, extack);
+ err = tclass_get_notify(net, skb, n, q, cl, extack);
goto out;
default:
err = -EINVAL;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index cac870eb7897..beece8e82c23 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -57,6 +57,8 @@
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
+#include <linux/units.h>
+
#include <net/netevent.h>
#include <net/netlink.h>
#include <net/sch_generic.h>
@@ -65,8 +67,6 @@
static LIST_HEAD(cbs_list);
static DEFINE_SPINLOCK(cbs_list_lock);
-#define BYTES_PER_KBIT (1000LL / 8)
-
struct cbs_sched_data {
bool offload;
int queue;
@@ -574,3 +574,4 @@ static void __exit cbs_module_exit(void)
module_init(cbs_module_init)
module_exit(cbs_module_exit)
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Credit Based shaper");
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 19c851125901..ae1da08e268f 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -513,3 +513,4 @@ module_init(choke_module_init)
module_exit(choke_module_exit)
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Choose and keep responsive flows scheduler");
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 19901e77cd3b..097740a9afea 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -495,3 +495,4 @@ static void __exit drr_exit(void)
module_init(drr_init);
module_exit(drr_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Deficit Round Robin scheduler");
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index 61d1f0e32cf3..4808159a5466 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -513,3 +513,4 @@ static void __exit etf_module_exit(void)
module_init(etf_module_init)
module_exit(etf_module_exit)
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Earliest TxTime First (ETF) qdisc");
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index b10efeaf0629..f7c88495946b 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -826,3 +826,4 @@ static void __exit ets_exit(void)
module_init(ets_init);
module_exit(ets_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Enhanced Transmission Selection(ETS) scheduler");
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index e1040421b797..450f5c67ac49 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -269,3 +269,4 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
return q ? : ERR_PTR(err);
}
EXPORT_SYMBOL(fifo_create_dflt);
+MODULE_DESCRIPTION("Single queue packet and byte based First In First Out(P/BFIFO) scheduler");
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index f59a2cb2c803..3a31c47fea9b 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -2,7 +2,7 @@
/*
* net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing)
*
- * Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com>
+ * Copyright (C) 2013-2023 Eric Dumazet <edumazet@google.com>
*
* Meant to be mostly used for locally generated traffic :
* Fast classification depends on skb->sk being set before reaching us.
@@ -51,7 +51,8 @@
#include <net/tcp.h>
struct fq_skb_cb {
- u64 time_to_send;
+ u64 time_to_send;
+ u8 band;
};
static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb)
@@ -73,37 +74,41 @@ struct fq_flow {
struct sk_buff *tail; /* last skb in the list */
unsigned long age; /* (jiffies | 1UL) when flow was emptied, for gc */
};
- struct rb_node fq_node; /* anchor in fq_root[] trees */
+ union {
+ struct rb_node fq_node; /* anchor in fq_root[] trees */
+ /* Following field is only used for q->internal,
+ * because q->internal is not hashed in fq_root[]
+ */
+ u64 stat_fastpath_packets;
+ };
struct sock *sk;
u32 socket_hash; /* sk_hash */
int qlen; /* number of packets in flow queue */
-/* Second cache line, used in fq_dequeue() */
+/* Second cache line */
int credit;
- /* 32bit hole on 64bit arches */
-
+ int band;
struct fq_flow *next; /* next pointer in RR lists */
struct rb_node rate_node; /* anchor in q->delayed tree */
u64 time_next_packet;
-} ____cacheline_aligned_in_smp;
+};
struct fq_flow_head {
struct fq_flow *first;
struct fq_flow *last;
};
-struct fq_sched_data {
+struct fq_perband_flows {
struct fq_flow_head new_flows;
-
struct fq_flow_head old_flows;
+ int credit;
+ int quantum; /* based on band nr : 576KB, 192KB, 64KB */
+};
- struct rb_root delayed; /* for rate limited flows */
- u64 time_next_delayed_flow;
- u64 ktime_cache; /* copy of last ktime_get_ns() */
- unsigned long unthrottle_latency_ns;
+struct fq_sched_data {
+/* Read mostly cache line */
- struct fq_flow internal; /* for non classified or high prio packets */
u32 quantum;
u32 initial_quantum;
u32 flow_refill_delay;
@@ -117,24 +122,46 @@ struct fq_sched_data {
u8 rate_enable;
u8 fq_trees_log;
u8 horizon_drop;
+ u8 prio2band[(TC_PRIO_MAX + 1) >> 2];
+ u32 timer_slack; /* hrtimer slack in ns */
+
+/* Read/Write fields. */
+
+ unsigned int band_nr; /* band being serviced in fq_dequeue() */
+
+ struct fq_perband_flows band_flows[FQ_BANDS];
+
+ struct fq_flow internal; /* fastpath queue. */
+ struct rb_root delayed; /* for rate limited flows */
+ u64 time_next_delayed_flow;
+ unsigned long unthrottle_latency_ns;
+
+ u32 band_pkt_count[FQ_BANDS];
u32 flows;
- u32 inactive_flows;
+ u32 inactive_flows; /* Flows with no packet to send. */
u32 throttled_flows;
- u64 stat_gc_flows;
- u64 stat_internal_packets;
u64 stat_throttled;
+ struct qdisc_watchdog watchdog;
+ u64 stat_gc_flows;
+
+/* Seldom used fields. */
+
+ u64 stat_band_drops[FQ_BANDS];
u64 stat_ce_mark;
u64 stat_horizon_drops;
u64 stat_horizon_caps;
u64 stat_flows_plimit;
u64 stat_pkts_too_long;
u64 stat_allocation_errors;
-
- u32 timer_slack; /* hrtimer slack in ns */
- struct qdisc_watchdog watchdog;
};
+/* return the i-th 2-bit value ("crumb") */
+static u8 fq_prio2band(const u8 *prio2band, unsigned int prio)
+{
+ return (prio2band[prio / 4] >> (2 * (prio & 0x3))) & 0x3;
+}
+
/*
* f->tail and f->age share the same location.
* We can use the low order bit to differentiate if this location points
@@ -159,8 +186,19 @@ static bool fq_flow_is_throttled(const struct fq_flow *f)
return f->next == &throttled;
}
-static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow)
+enum new_flow {
+ NEW_FLOW,
+ OLD_FLOW
+};
+
+static void fq_flow_add_tail(struct fq_sched_data *q, struct fq_flow *flow,
+ enum new_flow list_sel)
{
+ struct fq_perband_flows *pband = &q->band_flows[flow->band];
+ struct fq_flow_head *head = (list_sel == NEW_FLOW) ?
+ &pband->new_flows :
+ &pband->old_flows;
+
if (head->first)
head->last->next = flow;
else
@@ -173,7 +211,7 @@ static void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f)
{
rb_erase(&f->rate_node, &q->delayed);
q->throttled_flows--;
- fq_flow_add_tail(&q->old_flows, f);
+ fq_flow_add_tail(q, f, OLD_FLOW);
}
static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
@@ -258,17 +296,61 @@ static void fq_gc(struct fq_sched_data *q,
kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree);
}
-static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
+/* Fast path can be used if :
+ * 1) Packet tstamp is in the past.
+ * 2) FQ qlen == 0 OR
+ * (no flow is currently eligible for transmit,
+ * AND fast path queue has less than 8 packets)
+ * 3) No SO_MAX_PACING_RATE on the socket (if any).
+ * 4) No @maxrate attribute on this qdisc,
+ *
+ * FQ can not use generic TCQ_F_CAN_BYPASS infrastructure.
+ */
+static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb,
+ u64 now)
{
+ const struct fq_sched_data *q = qdisc_priv(sch);
+ const struct sock *sk;
+
+ if (fq_skb_cb(skb)->time_to_send > now)
+ return false;
+
+ if (sch->q.qlen != 0) {
+ /* Even if some packets are stored in this qdisc,
+ * we can still enable fast path if all of them are
+ * scheduled in the future (ie no flows are eligible)
+ * or in the fast path queue.
+ */
+ if (q->flows != q->inactive_flows + q->throttled_flows)
+ return false;
+
+ /* Do not allow fast path queue to explode, we want Fair Queue mode
+ * under pressure.
+ */
+ if (q->internal.qlen >= 8)
+ return false;
+ }
+
+ sk = skb->sk;
+ if (sk && sk_fullsock(sk) && !sk_is_tcp(sk) &&
+ sk->sk_max_pacing_rate != ~0UL)
+ return false;
+
+ if (q->flow_max_rate != ~0UL)
+ return false;
+
+ return true;
+}
+
+static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb,
+ u64 now)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
struct rb_node **p, *parent;
struct sock *sk = skb->sk;
struct rb_root *root;
struct fq_flow *f;
- /* warning: no starvation prevention... */
- if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
- return &q->internal;
-
/* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket
* or a listener (SYNCOOKIE mode)
* 1) request sockets are not full blown,
@@ -299,11 +381,18 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
sk = (struct sock *)((hash << 1) | 1UL);
}
+ if (fq_fastpath_check(sch, skb, now)) {
+ q->internal.stat_fastpath_packets++;
+ if (skb->sk == sk && q->rate_enable &&
+ READ_ONCE(sk->sk_pacing_status) != SK_PACING_FQ)
+ smp_store_release(&sk->sk_pacing_status,
+ SK_PACING_FQ);
+ return &q->internal;
+ }
+
root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)];
- if (q->flows >= (2U << q->fq_trees_log) &&
- q->inactive_flows > q->flows/2)
- fq_gc(q, root, sk);
+ fq_gc(q, root, sk);
p = &root->rb_node;
parent = NULL;
@@ -396,7 +485,6 @@ static void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow,
{
fq_erase_head(sch, flow, skb);
skb_mark_not_on_list(skb);
- flow->qlen--;
qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
}
@@ -434,9 +522,9 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
}
static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
- const struct fq_sched_data *q)
+ const struct fq_sched_data *q, u64 now)
{
- return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon));
+ return unlikely((s64)skb->tstamp > (s64)(now + q->horizon));
}
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
@@ -444,53 +532,57 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{
struct fq_sched_data *q = qdisc_priv(sch);
struct fq_flow *f;
+ u64 now;
+ u8 band;
- if (unlikely(sch->q.qlen >= sch->limit))
+ band = fq_prio2band(q->prio2band, skb->priority & TC_PRIO_MAX);
+ if (unlikely(q->band_pkt_count[band] >= sch->limit)) {
+ q->stat_band_drops[band]++;
return qdisc_drop(skb, sch, to_free);
+ }
+ now = ktime_get_ns();
if (!skb->tstamp) {
- fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns();
+ fq_skb_cb(skb)->time_to_send = now;
} else {
- /* Check if packet timestamp is too far in the future.
- * Try first if our cached value, to avoid ktime_get_ns()
- * cost in most cases.
- */
- if (fq_packet_beyond_horizon(skb, q)) {
- /* Refresh our cache and check another time */
- q->ktime_cache = ktime_get_ns();
- if (fq_packet_beyond_horizon(skb, q)) {
- if (q->horizon_drop) {
+ /* Check if packet timestamp is too far in the future. */
+ if (fq_packet_beyond_horizon(skb, q, now)) {
+ if (q->horizon_drop) {
q->stat_horizon_drops++;
return qdisc_drop(skb, sch, to_free);
- }
- q->stat_horizon_caps++;
- skb->tstamp = q->ktime_cache + q->horizon;
}
+ q->stat_horizon_caps++;
+ skb->tstamp = now + q->horizon;
}
fq_skb_cb(skb)->time_to_send = skb->tstamp;
}
- f = fq_classify(skb, q);
- if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
- q->stat_flows_plimit++;
- return qdisc_drop(skb, sch, to_free);
- }
+ f = fq_classify(sch, skb, now);
- f->qlen++;
- qdisc_qstats_backlog_inc(sch, skb);
- if (fq_flow_is_detached(f)) {
- fq_flow_add_tail(&q->new_flows, f);
- if (time_after(jiffies, f->age + q->flow_refill_delay))
- f->credit = max_t(u32, f->credit, q->quantum);
- q->inactive_flows--;
+ if (f != &q->internal) {
+ if (unlikely(f->qlen >= q->flow_plimit)) {
+ q->stat_flows_plimit++;
+ return qdisc_drop(skb, sch, to_free);
+ }
+
+ if (fq_flow_is_detached(f)) {
+ fq_flow_add_tail(q, f, NEW_FLOW);
+ if (time_after(jiffies, f->age + q->flow_refill_delay))
+ f->credit = max_t(u32, f->credit, q->quantum);
+ }
+
+ f->band = band;
+ q->band_pkt_count[band]++;
+ fq_skb_cb(skb)->band = band;
+ if (f->qlen == 0)
+ q->inactive_flows--;
}
+ f->qlen++;
/* Note: this overwrites f->age */
flow_queue_add(f, skb);
- if (unlikely(f == &q->internal)) {
- q->stat_internal_packets++;
- }
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
@@ -523,13 +615,26 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now)
}
}
+static struct fq_flow_head *fq_pband_head_select(struct fq_perband_flows *pband)
+{
+ if (pband->credit <= 0)
+ return NULL;
+
+ if (pband->new_flows.first)
+ return &pband->new_flows;
+
+ return pband->old_flows.first ? &pband->old_flows : NULL;
+}
+
static struct sk_buff *fq_dequeue(struct Qdisc *sch)
{
struct fq_sched_data *q = qdisc_priv(sch);
+ struct fq_perband_flows *pband;
struct fq_flow_head *head;
struct sk_buff *skb;
struct fq_flow *f;
unsigned long rate;
+ int retry;
u32 plen;
u64 now;
@@ -538,30 +643,38 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
skb = fq_peek(&q->internal);
if (unlikely(skb)) {
+ q->internal.qlen--;
fq_dequeue_skb(sch, &q->internal, skb);
goto out;
}
- q->ktime_cache = now = ktime_get_ns();
+ now = ktime_get_ns();
fq_check_throttled(q, now);
+ retry = 0;
+ pband = &q->band_flows[q->band_nr];
begin:
- head = &q->new_flows;
- if (!head->first) {
- head = &q->old_flows;
- if (!head->first) {
- if (q->time_next_delayed_flow != ~0ULL)
- qdisc_watchdog_schedule_range_ns(&q->watchdog,
+ head = fq_pband_head_select(pband);
+ if (!head) {
+ while (++retry <= FQ_BANDS) {
+ if (++q->band_nr == FQ_BANDS)
+ q->band_nr = 0;
+ pband = &q->band_flows[q->band_nr];
+ pband->credit = min(pband->credit + pband->quantum,
+ pband->quantum);
+ goto begin;
+ }
+ if (q->time_next_delayed_flow != ~0ULL)
+ qdisc_watchdog_schedule_range_ns(&q->watchdog,
q->time_next_delayed_flow,
q->timer_slack);
- return NULL;
- }
+ return NULL;
}
f = head->first;
-
+ retry = 0;
if (f->credit <= 0) {
f->credit += q->quantum;
head->first = f->next;
- fq_flow_add_tail(&q->old_flows, f);
+ fq_flow_add_tail(q, f, OLD_FLOW);
goto begin;
}
@@ -581,20 +694,23 @@ begin:
INET_ECN_set_ce(skb);
q->stat_ce_mark++;
}
+ if (--f->qlen == 0)
+ q->inactive_flows++;
+ q->band_pkt_count[fq_skb_cb(skb)->band]--;
fq_dequeue_skb(sch, f, skb);
} else {
head->first = f->next;
/* force a pass through old_flows to prevent starvation */
- if ((head == &q->new_flows) && q->old_flows.first) {
- fq_flow_add_tail(&q->old_flows, f);
+ if (head == &pband->new_flows) {
+ fq_flow_add_tail(q, f, OLD_FLOW);
} else {
fq_flow_set_detached(f);
- q->inactive_flows++;
}
goto begin;
}
plen = qdisc_pkt_len(skb);
f->credit -= plen;
+ pband->credit -= plen;
if (!q->rate_enable)
goto out;
@@ -607,7 +723,7 @@ begin:
*/
if (!skb->tstamp) {
if (skb->sk)
- rate = min(skb->sk->sk_pacing_rate, rate);
+ rate = min(READ_ONCE(skb->sk->sk_pacing_rate), rate);
if (rate <= q->low_rate_threshold) {
f->credit = 0;
@@ -686,8 +802,10 @@ static void fq_reset(struct Qdisc *sch)
kmem_cache_free(fq_flow_cachep, f);
}
}
- q->new_flows.first = NULL;
- q->old_flows.first = NULL;
+ for (idx = 0; idx < FQ_BANDS; idx++) {
+ q->band_flows[idx].new_flows.first = NULL;
+ q->band_flows[idx].old_flows.first = NULL;
+ }
q->delayed = RB_ROOT;
q->flows = 0;
q->inactive_flows = 0;
@@ -779,7 +897,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
return 0;
}
-static struct netlink_range_validation iq_range = {
+static const struct netlink_range_validation iq_range = {
.max = INT_MAX,
};
@@ -801,8 +919,71 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 },
[TCA_FQ_HORIZON] = { .type = NLA_U32 },
[TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 },
+ [TCA_FQ_PRIOMAP] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_prio_qopt)),
+ [TCA_FQ_WEIGHTS] = NLA_POLICY_EXACT_LEN(FQ_BANDS * sizeof(s32)),
};
+/* compress a u8 array with all elems <= 3 to an array of 2-bit fields */
+static void fq_prio2band_compress_crumb(const u8 *in, u8 *out)
+{
+ const int num_elems = TC_PRIO_MAX + 1;
+ int i;
+
+ memset(out, 0, num_elems / 4);
+ for (i = 0; i < num_elems; i++)
+ out[i / 4] |= in[i] << (2 * (i & 0x3));
+}
+
+static void fq_prio2band_decompress_crumb(const u8 *in, u8 *out)
+{
+ const int num_elems = TC_PRIO_MAX + 1;
+ int i;
+
+ for (i = 0; i < num_elems; i++)
+ out[i] = fq_prio2band(in, i);
+}
+
+static int fq_load_weights(struct fq_sched_data *q,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ s32 *weights = nla_data(attr);
+ int i;
+
+ for (i = 0; i < FQ_BANDS; i++) {
+ if (weights[i] < FQ_MIN_WEIGHT) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Weight %d less that minimum allowed %d",
+ weights[i], FQ_MIN_WEIGHT);
+ return -EINVAL;
+ }
+ }
+ for (i = 0; i < FQ_BANDS; i++)
+ q->band_flows[i].quantum = weights[i];
+ return 0;
+}
+
+static int fq_load_priomap(struct fq_sched_data *q,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ const struct tc_prio_qopt *map = nla_data(attr);
+ int i;
+
+ if (map->bands != FQ_BANDS) {
+ NL_SET_ERR_MSG_MOD(extack, "FQ only supports 3 bands");
+ return -EINVAL;
+ }
+ for (i = 0; i < TC_PRIO_MAX + 1; i++) {
+ if (map->priomap[i] >= FQ_BANDS) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "FQ priomap field %d maps to a too high band %d",
+ i, map->priomap[i]);
+ return -EINVAL;
+ }
+ }
+ fq_prio2band_compress_crumb(map->priomap, q->prio2band);
+ return 0;
+}
+
static int fq_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -877,6 +1058,12 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
}
+ if (!err && tb[TCA_FQ_PRIOMAP])
+ err = fq_load_priomap(q, tb[TCA_FQ_PRIOMAP], extack);
+
+ if (!err && tb[TCA_FQ_WEIGHTS])
+ err = fq_load_weights(q, tb[TCA_FQ_WEIGHTS], extack);
+
if (tb[TCA_FQ_ORPHAN_MASK])
q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
@@ -928,7 +1115,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
struct fq_sched_data *q = qdisc_priv(sch);
- int err;
+ int i, err;
sch->limit = 10000;
q->flow_plimit = 100;
@@ -938,8 +1125,13 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
q->flow_max_rate = ~0UL;
q->time_next_delayed_flow = ~0ULL;
q->rate_enable = 1;
- q->new_flows.first = NULL;
- q->old_flows.first = NULL;
+ for (i = 0; i < FQ_BANDS; i++) {
+ q->band_flows[i].new_flows.first = NULL;
+ q->band_flows[i].old_flows.first = NULL;
+ }
+ q->band_flows[0].quantum = 9 << 16;
+ q->band_flows[1].quantum = 3 << 16;
+ q->band_flows[2].quantum = 1 << 16;
q->delayed = RB_ROOT;
q->fq_root = NULL;
q->fq_trees_log = ilog2(1024);
@@ -954,6 +1146,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
/* Default ce_threshold of 4294 seconds */
q->ce_threshold = (u64)NSEC_PER_USEC * ~0U;
+ fq_prio2band_compress_crumb(sch_default_prio2band, q->prio2band);
qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC);
if (opt)
@@ -968,8 +1161,12 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct fq_sched_data *q = qdisc_priv(sch);
u64 ce_threshold = q->ce_threshold;
+ struct tc_prio_qopt prio = {
+ .bands = FQ_BANDS,
+ };
u64 horizon = q->horizon;
struct nlattr *opts;
+ s32 weights[3];
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (opts == NULL)
@@ -999,6 +1196,16 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop))
goto nla_put_failure;
+ fq_prio2band_decompress_crumb(q->prio2band, prio.priomap);
+ if (nla_put(skb, TCA_FQ_PRIOMAP, sizeof(prio), &prio))
+ goto nla_put_failure;
+
+ weights[0] = q->band_flows[0].quantum;
+ weights[1] = q->band_flows[1].quantum;
+ weights[2] = q->band_flows[2].quantum;
+ if (nla_put(skb, TCA_FQ_WEIGHTS, sizeof(weights), &weights))
+ goto nla_put_failure;
+
return nla_nest_end(skb, opts);
nla_put_failure:
@@ -1009,11 +1216,15 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
struct fq_sched_data *q = qdisc_priv(sch);
struct tc_fq_qd_stats st;
+ int i;
+
+ st.pad = 0;
sch_tree_lock(sch);
st.gc_flows = q->stat_gc_flows;
- st.highprio_packets = q->stat_internal_packets;
+ st.highprio_packets = 0;
+ st.fastpath_packets = q->internal.stat_fastpath_packets;
st.tcp_retrans = 0;
st.throttled = q->stat_throttled;
st.flows_plimit = q->stat_flows_plimit;
@@ -1029,6 +1240,10 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.ce_mark = q->stat_ce_mark;
st.horizon_drops = q->stat_horizon_drops;
st.horizon_caps = q->stat_horizon_caps;
+ for (i = 0; i < FQ_BANDS; i++) {
+ st.band_drops[i] = q->stat_band_drops[i];
+ st.band_pkt_count[i] = q->band_pkt_count[i];
+ }
sch_tree_unlock(sch);
return gnet_stats_copy_app(d, &st, sizeof(st));
@@ -1056,7 +1271,7 @@ static int __init fq_module_init(void)
fq_flow_cachep = kmem_cache_create("fq_flow_cache",
sizeof(struct fq_flow),
- 0, 0, NULL);
+ 0, SLAB_HWCACHE_ALIGN, NULL);
if (!fq_flow_cachep)
return -ENOMEM;
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 591d87d5e5c0..5b595773e59b 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -61,6 +61,7 @@ struct fq_pie_sched_data {
struct pie_params p_params;
u32 ecn_prob;
u32 flows_cnt;
+ u32 flows_cursor;
u32 quantum;
u32 memory_limit;
u32 new_flow_count;
@@ -201,7 +202,7 @@ out:
return NET_XMIT_CN;
}
-static struct netlink_range_validation fq_pie_q_range = {
+static const struct netlink_range_validation fq_pie_q_range = {
.min = 1,
.max = 1 << 20,
};
@@ -375,22 +376,32 @@ flow_error:
static void fq_pie_timer(struct timer_list *t)
{
struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
+ unsigned long next, tupdate;
struct Qdisc *sch = q->sch;
spinlock_t *root_lock; /* to lock qdisc for probability calculations */
- u32 idx;
+ int max_cnt, i;
rcu_read_lock();
root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
- for (idx = 0; idx < q->flows_cnt; idx++)
- pie_calculate_probability(&q->p_params, &q->flows[idx].vars,
- q->flows[idx].backlog);
-
- /* reset the timer to fire after 'tupdate' jiffies. */
- if (q->p_params.tupdate)
- mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate);
+ /* Limit this expensive loop to 2048 flows per round. */
+ max_cnt = min_t(int, q->flows_cnt - q->flows_cursor, 2048);
+ for (i = 0; i < max_cnt; i++) {
+ pie_calculate_probability(&q->p_params,
+ &q->flows[q->flows_cursor].vars,
+ q->flows[q->flows_cursor].backlog);
+ q->flows_cursor++;
+ }
+ tupdate = q->p_params.tupdate;
+ next = 0;
+ if (q->flows_cursor >= q->flows_cnt) {
+ q->flows_cursor = 0;
+ next = tupdate;
+ }
+ if (tupdate)
+ mod_timer(&q->adapt_timer, jiffies + next);
spin_unlock(root_lock);
rcu_read_unlock();
}
diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c
index a9bd0a235890..ce63414185fd 100644
--- a/net/sched/sch_frag.c
+++ b/net/sched/sch_frag.c
@@ -96,7 +96,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb,
unsigned long orig_dst;
sch_frag_prepare_frag(skb, xmit);
- dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1,
+ dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL,
DST_OBSOLETE_NONE, DST_NOCOUNT);
sch_frag_rt.dst.dev = skb->dev;
@@ -112,7 +112,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb,
sch_frag_prepare_frag(skb, xmit);
memset(&sch_frag_rt, 0, sizeof(sch_frag_rt));
- dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1,
+ dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL,
DST_OBSOLETE_NONE, DST_NOCOUNT);
sch_frag_rt.dst.dev = skb->dev;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5d7e23f4cc0e..9b3e9262040b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -522,8 +522,9 @@ static void dev_watchdog(struct timer_list *t)
if (unlikely(timedout_ms)) {
trace_net_dev_xmit_timeout(dev, i);
- WARN_ONCE(1, "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out %u ms\n",
- dev->name, netdev_drivername(dev), i, timedout_ms);
+ netdev_crit(dev, "NETDEV WATCHDOG: CPU: %d: transmit queue %u timed out %u ms\n",
+ raw_smp_processor_id(),
+ i, timedout_ms);
netif_freeze_queues(dev);
dev->netdev_ops->ndo_tx_timeout(dev, i);
netif_unfreeze_queues(dev);
@@ -694,9 +695,10 @@ struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
.owner = THIS_MODULE,
};
-static const u8 prio2band[TC_PRIO_MAX + 1] = {
- 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
+const u8 sch_default_prio2band[TC_PRIO_MAX + 1] = {
+ 1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1
};
+EXPORT_SYMBOL(sch_default_prio2band);
/* 3-band FIFO queue: old style, but should be a bit faster than
generic prio+fifo combination.
@@ -721,7 +723,7 @@ static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
struct sk_buff **to_free)
{
- int band = prio2band[skb->priority & TC_PRIO_MAX];
+ int band = sch_default_prio2band[skb->priority & TC_PRIO_MAX];
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
struct skb_array *q = band2list(priv, band);
unsigned int pkt_len = qdisc_pkt_len(skb);
@@ -830,7 +832,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
- memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
+ memcpy(&opt.priomap, sch_default_prio2band, TC_PRIO_MAX + 1);
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
goto nla_put_failure;
return skb->len;
@@ -1049,6 +1051,7 @@ static void qdisc_free_cb(struct rcu_head *head)
static void __qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
+ struct net_device *dev = qdisc_dev(qdisc);
#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);
@@ -1059,11 +1062,12 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
qdisc_reset(qdisc);
+
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
- netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
+ netdev_put(dev, &qdisc->dev_tracker);
trace_qdisc_destroy(qdisc);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 872d127c9db4..8c61eb3dc943 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -945,3 +945,4 @@ module_init(gred_module_init)
module_exit(gred_module_exit)
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Generic Random Early Detection qdisc");
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3554085bc2be..16c45da4036a 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -902,6 +902,14 @@ hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc,
cl->cl_flags |= HFSC_USC;
}
+static void
+hfsc_upgrade_rt(struct hfsc_class *cl)
+{
+ cl->cl_fsc = cl->cl_rsc;
+ rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
+ cl->cl_flags |= HFSC_FSC;
+}
+
static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = {
[TCA_HFSC_RSC] = { .len = sizeof(struct tc_service_curve) },
[TCA_HFSC_FSC] = { .len = sizeof(struct tc_service_curve) },
@@ -1011,10 +1019,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (parent == NULL)
return -ENOENT;
}
- if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) {
- NL_SET_ERR_MSG(extack, "Invalid parent - parent class must have FSC");
- return -EINVAL;
- }
if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
return -EINVAL;
@@ -1065,6 +1069,12 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->cf_tree = RB_ROOT;
sch_tree_lock(sch);
+ /* Check if the inner class is a misconfigured 'rt' */
+ if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) {
+ NL_SET_ERR_MSG(extack,
+ "Forced curve change on parent 'rt' to 'sc'");
+ hfsc_upgrade_rt(parent);
+ }
qdisc_class_hash_insert(&q->clhash, &cl->cl_common);
list_add_tail(&cl->siblings, &parent->children);
if (parent->level == 0)
@@ -1683,5 +1693,6 @@ hfsc_cleanup(void)
}
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Hierarchical Fair Service Curve scheduler");
module_init(hfsc_init);
module_exit(hfsc_cleanup);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 0d947414e616..7349233eaa9b 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -2179,3 +2179,4 @@ static void __exit htb_module_exit(void)
module_init(htb_module_init)
module_exit(htb_module_exit)
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Hierarchical Token Bucket scheduler");
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index a463a63192c3..5fa9eaa79bfc 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -370,3 +370,4 @@ module_exit(ingress_module_exit);
MODULE_ALIAS("sch_clsact");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Ingress and clsact based ingress and egress qdiscs");
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 793009f445c0..43e53ee00a56 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -789,3 +789,4 @@ module_init(mqprio_module_init);
module_exit(mqprio_module_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Classful multiqueue prio qdisc");
diff --git a/net/sched/sch_mqprio_lib.c b/net/sched/sch_mqprio_lib.c
index 83b3793c4012..b3a5572c167b 100644
--- a/net/sched/sch_mqprio_lib.c
+++ b/net/sched/sch_mqprio_lib.c
@@ -129,3 +129,4 @@ void mqprio_fp_to_offload(u32 fp[TC_QOPT_MAX_QUEUE],
EXPORT_SYMBOL_GPL(mqprio_fp_to_offload);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Shared mqprio qdisc code currently between taprio and mqprio");
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 75c9c860182b..d66d5f0ec080 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -410,3 +410,4 @@ module_init(multiq_module_init)
module_exit(multiq_module_exit)
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Multi queue to hardware queue mapping qdisc");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 4ad39a4a3cf5..fa678eb88528 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -67,7 +67,7 @@
struct disttable {
u32 size;
- s16 table[];
+ s16 table[] __counted_by(size);
};
struct netem_sched_data {
@@ -1307,3 +1307,4 @@ static void __exit netem_module_exit(void)
module_init(netem_module_init)
module_exit(netem_module_exit)
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Network characteristics emulator qdisc");
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index ea8c4a7174bb..992f0c8d7988 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -207,7 +207,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct plug_sched_data),
.enqueue = plug_enqueue,
.dequeue = plug_dequeue,
- .peek = qdisc_peek_head,
+ .peek = qdisc_peek_dequeued,
.init = plug_init,
.change = plug_change,
.reset = qdisc_reset_queue,
@@ -226,3 +226,4 @@ static void __exit plug_module_exit(void)
module_init(plug_module_init)
module_exit(plug_module_exit)
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Qdisc to plug and unplug traffic via netlink control");
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index fdc5ef52c3ee..8ecdd3ef6f8e 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -433,3 +433,4 @@ module_init(prio_module_init)
module_exit(prio_module_exit)
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Simple 3-band priority qdisc");
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 1a25752f1a9a..48a604c320c7 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -213,7 +213,7 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
return container_of(clc, struct qfq_class, common);
}
-static struct netlink_range_validation lmax_range = {
+static const struct netlink_range_validation lmax_range = {
.min = QFQ_MIN_LMAX,
.max = QFQ_MAX_LMAX,
};
@@ -974,10 +974,13 @@ static void qfq_update_eligible(struct qfq_sched *q)
}
/* Dequeue head packet of the head class in the DRR queue of the aggregate. */
-static void agg_dequeue(struct qfq_aggregate *agg,
- struct qfq_class *cl, unsigned int len)
+static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
+ struct qfq_class *cl, unsigned int len)
{
- qdisc_dequeue_peeked(cl->qdisc);
+ struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc);
+
+ if (!skb)
+ return NULL;
cl->deficit -= (int) len;
@@ -987,6 +990,8 @@ static void agg_dequeue(struct qfq_aggregate *agg,
cl->deficit += agg->lmax;
list_move_tail(&cl->alist, &agg->active);
}
+
+ return skb;
}
static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
@@ -998,7 +1003,7 @@ static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
*cl = list_first_entry(&agg->active, struct qfq_class, alist);
skb = (*cl)->qdisc->ops->peek((*cl)->qdisc);
if (skb == NULL)
- WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
+ qdisc_warn_nonwc("qfq_dequeue", (*cl)->qdisc);
else
*len = qdisc_pkt_len(skb);
@@ -1132,11 +1137,18 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
if (!skb)
return NULL;
- qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
+
+ skb = agg_dequeue(in_serv_agg, cl, len);
+
+ if (!skb) {
+ sch->q.qlen++;
+ return NULL;
+ }
+
+ qdisc_qstats_backlog_dec(sch, skb);
qdisc_bstats_update(sch, skb);
- agg_dequeue(in_serv_agg, cl, len);
/* If lmax is lowered, through qfq_change_class, for a class
* owning pending packets with larger size than the new value
* of lmax, then the following condition may hold.
@@ -1523,3 +1535,4 @@ static void __exit qfq_exit(void)
module_init(qfq_init);
module_exit(qfq_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Quick Fair Queueing Plus qdisc");
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 16277b6a0238..607b6c8b3a9b 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -563,3 +563,4 @@ module_init(red_module_init)
module_exit(red_module_exit)
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Random Early Detection qdisc");
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 66dcb18638fe..eb77558fa367 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -937,3 +937,4 @@ static void __exit sfq_module_exit(void)
module_init(sfq_module_init)
module_exit(sfq_module_exit)
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Stochastic Fairness qdisc");
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 5df2dacb7b1a..28beb11762d8 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -307,3 +307,4 @@ module_init(skbprio_module_init)
module_exit(skbprio_module_exit)
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SKB priority based scheduling qdisc");
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 1cb5e41c0ec7..31a8252bd09c 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1015,7 +1015,7 @@ static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
TC_FP_PREEMPTIBLE),
};
-static struct netlink_range_validation_signed taprio_cycle_time_range = {
+static const struct netlink_range_validation_signed taprio_cycle_time_range = {
.min = 0,
.max = INT_MAX,
};
@@ -2572,3 +2572,4 @@ static void __exit taprio_module_exit(void)
module_init(taprio_module_init);
module_exit(taprio_module_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Time Aware Priority qdisc");
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 17d2d00ddb18..dd6b1a723bf7 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -621,3 +621,4 @@ static void __exit tbf_module_exit(void)
module_init(tbf_module_init)
module_exit(tbf_module_exit)
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Token Bucket Filter qdisc");
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 7721239c185f..59304611dc00 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -523,3 +523,4 @@ module_init(teql_init);
module_exit(teql_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc");
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 796529167e8d..c45c192b7878 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1159,8 +1159,7 @@ int sctp_assoc_update(struct sctp_association *asoc,
/* Add any peer addresses from the new association. */
list_for_each_entry(trans, &new->peer.transport_addr_list,
transports)
- if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr) &&
- !sctp_assoc_add_peer(asoc, &trans->ipaddr,
+ if (!sctp_assoc_add_peer(asoc, &trans->ipaddr,
GFP_ATOMIC, trans->state))
return -ENOMEM;
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index c3d6b92dd386..eb05131ff1dd 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -527,4 +527,5 @@ static void __exit sctp_diag_exit(void)
module_init(sctp_diag_init);
module_exit(sctp_diag_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SCTP socket monitoring via SOCK_DIAG");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-132);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 43f2731bf590..24368f755ab1 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -128,7 +128,6 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb,
{
struct sctp_association *asoc = t->asoc;
struct sock *sk = asoc->base.sk;
- struct ipv6_pinfo *np;
int err = 0;
switch (type) {
@@ -149,9 +148,8 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb,
break;
}
- np = inet6_sk(sk);
icmpv6_err_convert(type, code, &err);
- if (!sock_owned_by_user(sk) && np->recverr) {
+ if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
sk->sk_err = err;
sk_error_report(sk);
} else {
@@ -249,7 +247,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t)
rcu_read_lock();
res = ip6_xmit(sk, skb, fl6, sk->sk_mark,
rcu_dereference(np->opt),
- tclass, sk->sk_priority);
+ tclass, READ_ONCE(sk->sk_priority));
rcu_read_unlock();
return res;
}
@@ -298,7 +296,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
if (t->flowlabel & SCTP_FLOWLABEL_SET_MASK)
fl6->flowlabel = htonl(t->flowlabel & SCTP_FLOWLABEL_VAL_MASK);
- if (np->sndflow && (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) {
+ if (inet6_test_bit(SNDFLOW, sk) &&
+ (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) {
struct ip6_flowlabel *flowlabel;
flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index f13d6a34f32f..ec00ee75d59a 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -282,7 +282,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
assoc->init_retries, assoc->shutdown_retries,
assoc->rtx_data_chunks,
refcount_read(&sk->sk_wmem_alloc),
- sk->sk_wmem_queued,
+ READ_ONCE(sk->sk_wmem_queued),
sk->sk_sndbuf,
sk->sk_rcvbuf);
seq_printf(seq, "\n");
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 2185f44198de..94c6dd53cd62 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -426,7 +426,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
struct dst_entry *dst = NULL;
union sctp_addr *daddr = &t->ipaddr;
union sctp_addr dst_saddr;
- __u8 tos = inet_sk(sk)->tos;
+ u8 tos = READ_ONCE(inet_sk(sk)->tos);
if (t->dscp & SCTP_DSCP_SET_MASK)
tos = t->dscp & SCTP_DSCP_VAL_MASK;
@@ -1057,7 +1057,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t)
struct flowi4 *fl4 = &t->fl.u.ip4;
struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
- __u8 dscp = inet->tos;
+ __u8 dscp = READ_ONCE(inet->tos);
__be16 df = 0;
pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb,
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 08527d882e56..f80208edd6a5 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3303,7 +3303,7 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
/* Process the TLVs contained within the ASCONF chunk. */
sctp_walk_params(param, addip) {
- /* Skip preceeding address parameters. */
+ /* Skip preceding address parameters. */
if (param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
param.p->type == SCTP_PARAM_IPV6_ADDRESS)
continue;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fd0631e70d46..6b9fcdb0952a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -69,7 +69,7 @@
#include <net/sctp/stream_sched.h>
/* Forward declarations for internal helper functions. */
-static bool sctp_writeable(struct sock *sk);
+static bool sctp_writeable(const struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
size_t msg_len);
@@ -140,7 +140,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
asoc->sndbuf_used += chunk->skb->truesize + sizeof(struct sctp_chunk);
- sk->sk_wmem_queued += chunk->skb->truesize + sizeof(struct sctp_chunk);
+ sk_wmem_queued_add(sk, chunk->skb->truesize + sizeof(struct sctp_chunk));
sk_mem_charge(sk, chunk->skb->truesize);
}
@@ -2099,6 +2099,13 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
pr_debug("%s: sk:%p, msghdr:%p, len:%zd, flags:0x%x, addr_len:%p)\n",
__func__, sk, msg, len, flags, addr_len);
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return inet_recv_error(sk, msg, len, addr_len);
+
+ if (sk_can_busy_loop(sk) &&
+ skb_queue_empty_lockless(&sk->sk_receive_queue))
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
+
lock_sock(sk);
if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED) &&
@@ -2450,6 +2457,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
if (trans) {
trans->hbinterval =
msecs_to_jiffies(params->spp_hbinterval);
+ sctp_transport_reset_hb_timer(trans);
} else if (asoc) {
asoc->hbinterval =
msecs_to_jiffies(params->spp_hbinterval);
@@ -9042,12 +9050,6 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, int *err)
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
- if (sk_can_busy_loop(sk)) {
- sk_busy_loop(sk, flags & MSG_DONTWAIT);
-
- if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
- continue;
- }
/* User doesn't want to wait. */
error = -EAGAIN;
@@ -9144,7 +9146,7 @@ static void sctp_wfree(struct sk_buff *skb)
struct sock *sk = asoc->base.sk;
sk_mem_uncharge(sk, skb->truesize);
- sk->sk_wmem_queued -= skb->truesize + sizeof(struct sctp_chunk);
+ sk_wmem_queued_add(sk, -(skb->truesize + sizeof(struct sctp_chunk)));
asoc->sndbuf_used -= skb->truesize + sizeof(struct sctp_chunk);
WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk),
&sk->sk_wmem_alloc));
@@ -9299,9 +9301,9 @@ void sctp_write_space(struct sock *sk)
* UDP-style sockets or TCP-style sockets, this code should work.
* - Daisy
*/
-static bool sctp_writeable(struct sock *sk)
+static bool sctp_writeable(const struct sock *sk)
{
- return sk->sk_sndbuf > sk->sk_wmem_queued;
+ return READ_ONCE(sk->sk_sndbuf) > READ_ONCE(sk->sk_wmem_queued);
}
/* Wait for an association to go into ESTABLISHED state. If timeout is 0,
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index 1ab3c5a2c5ad..746be3996768 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -2,6 +2,7 @@
config SMC
tristate "SMC socket protocol family"
depends on INET && INFINIBAND
+ depends on m || ISM != m
help
SMC-R provides a "sockets over RDMA" solution making use of
RDMA over Converged Ethernet (RoCE) technology to upgrade
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index bacdd971615e..a2cb30af46cb 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -275,7 +275,7 @@ static int __smc_release(struct smc_sock *smc)
if (!smc->use_fallback) {
rc = smc_close_active(smc);
- sock_set_flag(sk, SOCK_DEAD);
+ smc_sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
} else {
if (sk->sk_state != SMC_CLOSED) {
@@ -493,7 +493,7 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
nsk->sk_sndtimeo = osk->sk_sndtimeo;
nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
nsk->sk_mark = READ_ONCE(osk->sk_mark);
- nsk->sk_priority = osk->sk_priority;
+ nsk->sk_priority = READ_ONCE(osk->sk_priority);
nsk->sk_rcvlowat = osk->sk_rcvlowat;
nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
nsk->sk_err = osk->sk_err;
@@ -598,8 +598,12 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
struct smc_llc_qentry *qentry;
int rc;
- /* receive CONFIRM LINK request from server over RoCE fabric */
- qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
+ /* Receive CONFIRM LINK request from server over RoCE fabric.
+ * Increasing the client's timeout by twice as much as the server's
+ * timeout by default can temporarily avoid decline messages of
+ * both sides crossing or colliding
+ */
+ qentry = smc_llc_wait(link->lgr, NULL, 2 * SMC_LLC_WAIT_TIME,
SMC_LLC_CONFIRM_LINK);
if (!qentry) {
struct smc_clc_msg_decline dclc;
@@ -673,8 +677,6 @@ static bool smc_isascii(char *hostname)
static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
- struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
- (struct smc_clc_msg_accept_confirm_v2 *)clc;
struct smc_clc_first_contact_ext *fce;
int clc_v2_len;
@@ -683,17 +685,15 @@ static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
return;
if (smc->conn.lgr->is_smcd) {
- memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid,
+ memcpy(smc->conn.lgr->negotiated_eid, clc->d1.eid,
SMC_MAX_EID_LEN);
- clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
- d1);
+ clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm, d1);
} else {
- memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid,
+ memcpy(smc->conn.lgr->negotiated_eid, clc->r1.eid,
SMC_MAX_EID_LEN);
- clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
- r1);
+ clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm, r1);
}
- fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len);
+ fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc) + clc_v2_len);
smc->conn.lgr->peer_os = fce->os_type;
smc->conn.lgr->peer_smc_release = fce->release;
if (smc_isascii(fce->hostname))
@@ -719,7 +719,7 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc,
int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size);
smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx;
- smc->conn.peer_token = clc->d0.token;
+ smc->conn.peer_token = ntohll(clc->d0.token);
/* msg header takes up space in the buffer */
smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
@@ -1044,7 +1044,8 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
{
int rc = SMC_CLC_DECL_NOSMCDDEV;
struct smcd_dev *smcd;
- int i = 1;
+ int i = 1, entry = 1;
+ bool is_virtual;
u16 chid;
if (smcd_indicated(ini->smc_type_v1))
@@ -1056,14 +1057,23 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
chid = smc_ism_get_chid(smcd);
if (!smc_find_ism_v2_is_unique_chid(chid, ini, i))
continue;
+ is_virtual = __smc_ism_is_virtual(chid);
if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
+ if (is_virtual && entry == SMCD_CLC_MAX_V2_GID_ENTRIES)
+ /* It's the last GID-CHID entry left in CLC
+ * Proposal SMC-Dv2 extension, but a virtual
+ * ISM device will take two entries. So give
+ * up it and try the next potential ISM device.
+ */
+ continue;
ini->ism_dev[i] = smcd;
ini->ism_chid[i] = chid;
ini->is_smcd = true;
rc = 0;
i++;
- if (i > SMC_MAX_ISM_DEVS)
+ entry = is_virtual ? entry + 2 : entry + 1;
+ if (entry > SMCD_CLC_MAX_V2_GID_ENTRIES)
break;
}
}
@@ -1145,13 +1155,13 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
}
#define SMC_CLC_MAX_ACCEPT_LEN \
- (sizeof(struct smc_clc_msg_accept_confirm_v2) + \
+ (sizeof(struct smc_clc_msg_accept_confirm) + \
sizeof(struct smc_clc_first_contact_ext_v2x) + \
sizeof(struct smc_clc_msg_trail))
/* CLC handshake during connect */
static int smc_connect_clc(struct smc_sock *smc,
- struct smc_clc_msg_accept_confirm_v2 *aclc2,
+ struct smc_clc_msg_accept_confirm *aclc,
struct smc_init_info *ini)
{
int rc = 0;
@@ -1161,7 +1171,7 @@ static int smc_connect_clc(struct smc_sock *smc,
if (rc)
return rc;
/* receive SMC Accept CLC message */
- return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN,
+ return smc_clc_wait_msg(smc, aclc, SMC_CLC_MAX_ACCEPT_LEN,
SMC_CLC_ACCEPT, CLC_WAIT_TIME);
}
@@ -1197,10 +1207,9 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
struct smc_init_info *ini)
{
- struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
- (struct smc_clc_msg_accept_confirm_v2 *)aclc;
struct smc_clc_first_contact_ext *fce =
- smc_get_clc_first_contact_ext(clc_v2, false);
+ smc_get_clc_first_contact_ext(aclc, false);
+ struct net *net = sock_net(&smc->sk);
int rc;
if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
@@ -1210,7 +1219,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
ini->smcrv2.uses_gateway = false;
} else {
- if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
+ if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
ini->smcrv2.nexthop_mac,
&ini->smcrv2.uses_gateway))
@@ -1322,10 +1331,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
}
if (aclc->hdr.version > SMC_V1) {
- struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
- (struct smc_clc_msg_accept_confirm_v2 *)aclc;
-
- eid = clc_v2->r1.eid;
+ eid = aclc->r1.eid;
if (ini->first_contact_local)
smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist,
link->smcibdev, link->gid);
@@ -1366,7 +1372,7 @@ connect_abort:
* Determine from the CHID of the received CLC ACCEPT the ISM device chosen.
*/
static int
-smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc,
+smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm *aclc,
struct smc_init_info *ini)
{
int i;
@@ -1393,12 +1399,9 @@ static int smc_connect_ism(struct smc_sock *smc,
ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
if (aclc->hdr.version == SMC_V2) {
- struct smc_clc_msg_accept_confirm_v2 *aclc_v2 =
- (struct smc_clc_msg_accept_confirm_v2 *)aclc;
-
if (ini->first_contact_peer) {
struct smc_clc_first_contact_ext *fce =
- smc_get_clc_first_contact_ext(aclc_v2, true);
+ smc_get_clc_first_contact_ext(aclc, true);
ini->release_nr = fce->release;
rc = smc_clc_clnt_v2x_features_validate(fce, ini);
@@ -1406,11 +1409,16 @@ static int smc_connect_ism(struct smc_sock *smc,
return rc;
}
- rc = smc_v2_determine_accepted_chid(aclc_v2, ini);
+ rc = smc_v2_determine_accepted_chid(aclc, ini);
if (rc)
return rc;
+
+ if (__smc_ism_is_virtual(ini->ism_chid[ini->ism_selected]))
+ ini->ism_peer_gid[ini->ism_selected].gid_ext =
+ ntohll(aclc->d1.gid_ext);
+ /* for non-virtual ISM devices, peer gid_ext remains 0. */
}
- ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid;
+ ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid);
/* there is only one lgr role for SMC-D; use server lock */
mutex_lock(&smc_server_lgr_pending);
@@ -1432,12 +1440,8 @@ static int smc_connect_ism(struct smc_sock *smc,
smc_rx_init(smc);
smc_tx_init(smc);
- if (aclc->hdr.version > SMC_V1) {
- struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
- (struct smc_clc_msg_accept_confirm_v2 *)aclc;
-
- eid = clc_v2->d1.eid;
- }
+ if (aclc->hdr.version > SMC_V1)
+ eid = aclc->d1.eid;
rc = smc_clc_send_confirm(smc, ini->first_contact_local,
aclc->hdr.version, eid, ini);
@@ -1488,7 +1492,6 @@ static int smc_connect_check_aclc(struct smc_init_info *ini,
static int __smc_connect(struct smc_sock *smc)
{
u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
- struct smc_clc_msg_accept_confirm_v2 *aclc2;
struct smc_clc_msg_accept_confirm *aclc;
struct smc_init_info *ini = NULL;
u8 *buf = NULL;
@@ -1536,11 +1539,10 @@ static int __smc_connect(struct smc_sock *smc)
rc = SMC_CLC_DECL_MEM;
goto fallback;
}
- aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf;
- aclc = (struct smc_clc_msg_accept_confirm *)aclc2;
+ aclc = (struct smc_clc_msg_accept_confirm *)buf;
/* perform CLC handshake */
- rc = smc_connect_clc(smc, aclc2, ini);
+ rc = smc_connect_clc(smc, aclc, ini);
if (rc) {
/* -EAGAIN on timeout, see tcp_recvmsg() */
if (rc == -EAGAIN) {
@@ -1742,7 +1744,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
if (new_clcsock)
sock_release(new_clcsock);
new_sk->sk_state = SMC_CLOSED;
- sock_set_flag(new_sk, SOCK_DEAD);
+ smc_sock_set_flag(new_sk, SOCK_DEAD);
sock_put(new_sk); /* final */
*new_smc = NULL;
goto out;
@@ -2101,7 +2103,8 @@ static bool smc_is_already_selected(struct smcd_dev *smcd,
/* check for ISM devices matching proposed ISM devices */
static void smc_check_ism_v2_match(struct smc_init_info *ini,
- u16 proposed_chid, u64 proposed_gid,
+ u16 proposed_chid,
+ struct smcd_gid *proposed_gid,
unsigned int *matches)
{
struct smcd_dev *smcd;
@@ -2113,7 +2116,11 @@ static void smc_check_ism_v2_match(struct smc_init_info *ini,
continue;
if (smc_ism_get_chid(smcd) == proposed_chid &&
!smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
- ini->ism_peer_gid[*matches] = proposed_gid;
+ ini->ism_peer_gid[*matches].gid = proposed_gid->gid;
+ if (__smc_ism_is_virtual(proposed_chid))
+ ini->ism_peer_gid[*matches].gid_ext =
+ proposed_gid->gid_ext;
+ /* non-virtual ISM's peer gid_ext remains 0. */
ini->ism_dev[*matches] = smcd;
(*matches)++;
break;
@@ -2135,9 +2142,11 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
struct smc_clc_v2_extension *smc_v2_ext;
struct smc_clc_msg_smcd *pclc_smcd;
unsigned int matches = 0;
+ struct smcd_gid smcd_gid;
u8 smcd_version;
u8 *eid = NULL;
int i, rc;
+ u16 chid;
if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2))
goto not_found;
@@ -2147,18 +2156,35 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
mutex_lock(&smcd_dev_list.mutex);
- if (pclc_smcd->ism.chid)
+ if (pclc_smcd->ism.chid) {
/* check for ISM device matching proposed native ISM device */
+ smcd_gid.gid = ntohll(pclc_smcd->ism.gid);
+ smcd_gid.gid_ext = 0;
smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid),
- ntohll(pclc_smcd->ism.gid), &matches);
- for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) {
+ &smcd_gid, &matches);
+ }
+ for (i = 0; i < smc_v2_ext->hdr.ism_gid_cnt; i++) {
/* check for ISM devices matching proposed non-native ISM
* devices
*/
- smc_check_ism_v2_match(ini,
- ntohs(smcd_v2_ext->gidchid[i - 1].chid),
- ntohll(smcd_v2_ext->gidchid[i - 1].gid),
- &matches);
+ smcd_gid.gid = ntohll(smcd_v2_ext->gidchid[i].gid);
+ smcd_gid.gid_ext = 0;
+ chid = ntohs(smcd_v2_ext->gidchid[i].chid);
+ if (__smc_ism_is_virtual(chid)) {
+ if ((i + 1) == smc_v2_ext->hdr.ism_gid_cnt ||
+ chid != ntohs(smcd_v2_ext->gidchid[i + 1].chid))
+ /* each virtual ISM device takes two GID-CHID
+ * entries and CHID of the second entry repeats
+ * that of the first entry.
+ *
+ * So check if the next GID-CHID entry exists
+ * and both two entries' CHIDs are the same.
+ */
+ continue;
+ smcd_gid.gid_ext =
+ ntohll(smcd_v2_ext->gidchid[++i].gid);
+ }
+ smc_check_ism_v2_match(ini, chid, &smcd_gid, &matches);
}
mutex_unlock(&smcd_dev_list.mutex);
@@ -2207,7 +2233,8 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1))
goto not_found;
ini->is_smcd = true; /* prepare ISM check */
- ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid);
+ ini->ism_peer_gid[0].gid = ntohll(pclc_smcd->ism.gid);
+ ini->ism_peer_gid[0].gid_ext = 0;
rc = smc_find_ism_device(new_smc, ini);
if (rc)
goto not_found;
@@ -2361,7 +2388,7 @@ static int smc_listen_find_device(struct smc_sock *new_smc,
smc_find_ism_store_rc(rc, ini);
return (!rc) ? 0 : ini->rc;
}
- return SMC_CLC_DECL_NOSMCDEV;
+ return prfx_rc;
}
/* listen worker: finish RDMA setup */
@@ -2456,7 +2483,7 @@ static void smc_listen_work(struct work_struct *work)
if (rc)
goto out_decl;
- rc = smc_clc_srv_v2x_features_validate(pclc, ini);
+ rc = smc_clc_srv_v2x_features_validate(new_smc, pclc, ini);
if (rc)
goto out_decl;
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 24745fde4ac2..df64efd2dee8 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -29,9 +29,6 @@
#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
-#define SMC_MAX_ISM_DEVS 8 /* max # of proposed non-native ISM
- * devices
- */
#define SMC_AUTOCORKING_DEFAULT_SIZE 0x10000 /* 64K by default */
extern struct proto smc_proto;
@@ -58,6 +55,13 @@ enum smc_state { /* possible states of an SMC socket */
SMC_PROCESSABORT = 27,
};
+enum smc_supplemental_features {
+ SMC_SPF_VIRT_ISM_DEV = 0,
+};
+
+#define SMC_FEATURE_MASK \
+ (BIT(SMC_SPF_VIRT_ISM_DEV))
+
struct smc_link_group;
struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */
@@ -196,7 +200,6 @@ struct smc_connection {
* - dec on polled tx cqe
*/
wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/
- atomic_t tx_pushing; /* nr_threads trying tx push */
struct delayed_work tx_work; /* retry of smc_cdc_msg_send */
u32 tx_off; /* base offset in peer rmb */
@@ -377,4 +380,9 @@ int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb);
int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
+static inline void smc_sock_set_flag(struct sock *sk, enum sock_flags flag)
+{
+ set_bit(flag, &sk->sk_flags);
+}
+
#endif /* __SMC_H */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 89105e95b452..3c06625ceb20 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -28,13 +28,15 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
{
struct smc_cdc_tx_pend *cdcpend = (struct smc_cdc_tx_pend *)pnd_snd;
struct smc_connection *conn = cdcpend->conn;
+ struct smc_buf_desc *sndbuf_desc;
struct smc_sock *smc;
int diff;
+ sndbuf_desc = conn->sndbuf_desc;
smc = container_of(conn, struct smc_sock, conn);
bh_lock_sock(&smc->sk);
- if (!wc_status) {
- diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len,
+ if (!wc_status && sndbuf_desc) {
+ diff = smc_curs_diff(sndbuf_desc->len,
&cdcpend->conn->tx_curs_fin,
&cdcpend->cursor);
/* sndbuf_space is decreased in smc_sendmsg */
@@ -114,9 +116,6 @@ int smc_cdc_msg_send(struct smc_connection *conn,
union smc_host_cursor cfed;
int rc;
- if (unlikely(!READ_ONCE(conn->sndbuf_desc)))
- return -ENOBUFS;
-
smc_cdc_add_pending_send(conn, pend);
conn->tx_cdc_seq++;
@@ -385,7 +384,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
smc->sk.sk_shutdown |= RCV_SHUTDOWN;
if (smc->clcsock && smc->clcsock->sk)
smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
- sock_set_flag(&smc->sk, SOCK_DONE);
+ smc_sock_set_flag(&smc->sk, SOCK_DONE);
sock_hold(&smc->sk); /* sock_put in close_work */
if (!queue_work(smc_close_wq, &conn->close_work))
sock_put(&smc->sk);
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 8deb46c28f1d..9a13709bea1c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -155,10 +155,12 @@ static int smc_clc_ueid_remove(char *ueid)
rc = 0;
}
}
+#if IS_ENABLED(CONFIG_S390)
if (!rc && !smc_clc_eid_table.ueid_cnt) {
smc_clc_eid_table.seid_enabled = 1;
rc = -EAGAIN; /* indicate success and enabling of seid */
}
+#endif
write_unlock(&smc_clc_eid_table.lock);
return rc;
}
@@ -273,22 +275,30 @@ err:
int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info)
{
+#if IS_ENABLED(CONFIG_S390)
write_lock(&smc_clc_eid_table.lock);
smc_clc_eid_table.seid_enabled = 1;
write_unlock(&smc_clc_eid_table.lock);
return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
}
int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info)
{
int rc = 0;
+#if IS_ENABLED(CONFIG_S390)
write_lock(&smc_clc_eid_table.lock);
if (!smc_clc_eid_table.ueid_cnt)
rc = -ENOENT;
else
smc_clc_eid_table.seid_enabled = 0;
write_unlock(&smc_clc_eid_table.lock);
+#else
+ rc = -EOPNOTSUPP;
+#endif
return rc;
}
@@ -377,9 +387,9 @@ static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc)
/* check arriving CLC accept or confirm */
static bool
-smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2)
+smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm *clc)
{
- struct smc_clc_msg_hdr *hdr = &clc_v2->hdr;
+ struct smc_clc_msg_hdr *hdr = &clc->hdr;
if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D)
return false;
@@ -418,15 +428,16 @@ smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc)
return true;
}
-static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce,
- struct smc_init_info *ini)
+static int smc_clc_fill_fce_v2x(struct smc_clc_first_contact_ext_v2x *fce_v2x,
+ struct smc_init_info *ini)
{
- int ret = sizeof(*fce);
+ int ret = sizeof(*fce_v2x);
- memset(fce, 0, sizeof(*fce));
- fce->fce_v2_base.os_type = SMC_CLC_OS_LINUX;
- fce->fce_v2_base.release = ini->release_nr;
- memcpy(fce->fce_v2_base.hostname, smc_hostname, sizeof(smc_hostname));
+ memset(fce_v2x, 0, sizeof(*fce_v2x));
+ fce_v2x->fce_v2_base.os_type = SMC_CLC_OS_LINUX;
+ fce_v2x->fce_v2_base.release = ini->release_nr;
+ memcpy(fce_v2x->fce_v2_base.hostname,
+ smc_hostname, sizeof(smc_hostname));
if (ini->is_smcd && ini->release_nr < SMC_RELEASE_1) {
ret = sizeof(struct smc_clc_first_contact_ext);
goto out;
@@ -434,9 +445,10 @@ static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce,
if (ini->release_nr >= SMC_RELEASE_1) {
if (!ini->is_smcd) {
- fce->max_conns = ini->max_conns;
- fce->max_links = ini->max_links;
+ fce_v2x->max_conns = ini->max_conns;
+ fce_v2x->max_links = ini->max_links;
}
+ fce_v2x->feature_mask = htons(ini->feature_mask);
}
out:
@@ -448,7 +460,7 @@ out:
*/
static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
{
- struct smc_clc_msg_accept_confirm_v2 *clc_v2;
+ struct smc_clc_msg_accept_confirm *clc;
struct smc_clc_msg_proposal *pclc;
struct smc_clc_msg_decline *dclc;
struct smc_clc_msg_trail *trl;
@@ -466,12 +478,11 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
break;
case SMC_CLC_ACCEPT:
case SMC_CLC_CONFIRM:
- clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm;
- if (!smc_clc_msg_acc_conf_valid(clc_v2))
+ clc = (struct smc_clc_msg_accept_confirm *)clcm;
+ if (!smc_clc_msg_acc_conf_valid(clc))
return false;
trl = (struct smc_clc_msg_trail *)
- ((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) -
- sizeof(*trl));
+ ((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl));
break;
case SMC_CLC_DECLINE:
dclc = (struct smc_clc_msg_decline *)clcm;
@@ -824,6 +835,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
struct smc_clc_smcd_gid_chid *gidchids;
struct smc_clc_msg_proposal_area *pclc;
struct smc_clc_ipv6_prefix *ipv6_prfx;
+ struct net *net = sock_net(&smc->sk);
struct smc_clc_v2_extension *v2_ext;
struct smc_clc_msg_smcd *pclc_smcd;
struct smc_clc_msg_trail *trl;
@@ -881,11 +893,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
ETH_ALEN);
}
if (smcd_indicated(ini->smc_type_v1)) {
+ struct smcd_gid smcd_gid;
+
/* add SMC-D specifics */
if (ini->ism_dev[0]) {
smcd = ini->ism_dev[0];
- pclc_smcd->ism.gid =
- htonll(smcd->ops->get_local_gid(smcd));
+ smcd->ops->get_local_gid(smcd, &smcd_gid);
+ pclc_smcd->ism.gid = htonll(smcd_gid.gid);
pclc_smcd->ism.chid =
htons(smc_ism_get_chid(ini->ism_dev[0]));
}
@@ -906,6 +920,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
pclc_smcd->v2_ext_offset = htons(v2_ext_offset);
plen += sizeof(*v2_ext);
+ v2_ext->feature_mask = htons(SMC_FEATURE_MASK);
read_lock(&smc_clc_eid_table.lock);
v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt;
plen += smc_clc_eid_table.ueid_cnt * SMC_MAX_EID_LEN;
@@ -917,10 +932,11 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
read_unlock(&smc_clc_eid_table.lock);
}
if (smcd_indicated(ini->smc_type_v2)) {
+ struct smcd_gid smcd_gid;
u8 *eid = NULL;
+ int entry = 0;
v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled;
- v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt;
v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) -
offsetofend(struct smc_clnt_opts_area_hdr,
smcd_v2_ext_offset) +
@@ -932,19 +948,31 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
if (ini->ism_offered_cnt) {
for (i = 1; i <= ini->ism_offered_cnt; i++) {
smcd = ini->ism_dev[i];
- gidchids[i - 1].gid =
- htonll(smcd->ops->get_local_gid(smcd));
- gidchids[i - 1].chid =
+ smcd->ops->get_local_gid(smcd, &smcd_gid);
+ gidchids[entry].chid =
htons(smc_ism_get_chid(ini->ism_dev[i]));
+ gidchids[entry].gid = htonll(smcd_gid.gid);
+ if (smc_ism_is_virtual(smcd)) {
+ /* a virtual ISM device takes two
+ * entries. CHID of the second entry
+ * repeats that of the first entry.
+ */
+ gidchids[entry + 1].chid =
+ gidchids[entry].chid;
+ gidchids[entry + 1].gid =
+ htonll(smcd_gid.gid_ext);
+ entry++;
+ }
+ entry++;
}
- plen += ini->ism_offered_cnt *
- sizeof(struct smc_clc_smcd_gid_chid);
+ plen += entry * sizeof(struct smc_clc_smcd_gid_chid);
}
+ v2_ext->hdr.ism_gid_cnt = entry;
}
if (smcr_indicated(ini->smc_type_v2)) {
memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE);
- v2_ext->max_conns = SMC_CONN_PER_LGR_PREFER;
- v2_ext->max_links = SMC_LINKS_PER_LGR_MAX_PREFER;
+ v2_ext->max_conns = net->smc.sysctl_max_conns_per_lgr;
+ v2_ext->max_links = net->smc.sysctl_max_links_per_lgr;
}
pclc_base->hdr.length = htons(plen);
@@ -975,7 +1003,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
vec[i++].iov_len = sizeof(*smcd_v2_ext);
if (ini->ism_offered_cnt) {
vec[i].iov_base = gidchids;
- vec[i++].iov_len = ini->ism_offered_cnt *
+ vec[i++].iov_len = v2_ext->hdr.ism_gid_cnt *
sizeof(struct smc_clc_smcd_gid_chid);
}
}
@@ -996,110 +1024,143 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
return reason_code;
}
+static void
+smcd_clc_prep_confirm_accept(struct smc_connection *conn,
+ struct smc_clc_msg_accept_confirm *clc,
+ int first_contact, u8 version,
+ u8 *eid, struct smc_init_info *ini,
+ int *fce_len,
+ struct smc_clc_first_contact_ext_v2x *fce_v2x,
+ struct smc_clc_msg_trail *trl)
+{
+ struct smcd_dev *smcd = conn->lgr->smcd;
+ struct smcd_gid smcd_gid;
+ u16 chid;
+ int len;
+
+ /* SMC-D specific settings */
+ memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER,
+ sizeof(SMCD_EYECATCHER));
+ smcd->ops->get_local_gid(smcd, &smcd_gid);
+ clc->hdr.typev1 = SMC_TYPE_D;
+ clc->d0.gid = htonll(smcd_gid.gid);
+ clc->d0.token = htonll(conn->rmb_desc->token);
+ clc->d0.dmbe_size = conn->rmbe_size_comp;
+ clc->d0.dmbe_idx = 0;
+ memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
+ if (version == SMC_V1) {
+ clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
+ } else {
+ chid = smc_ism_get_chid(smcd);
+ clc->d1.chid = htons(chid);
+ if (eid && eid[0])
+ memcpy(clc->d1.eid, eid, SMC_MAX_EID_LEN);
+ if (__smc_ism_is_virtual(chid))
+ clc->d1.gid_ext = htonll(smcd_gid.gid_ext);
+ len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
+ if (first_contact) {
+ *fce_len = smc_clc_fill_fce_v2x(fce_v2x, ini);
+ len += *fce_len;
+ }
+ clc->hdr.length = htons(len);
+ }
+ memcpy(trl->eyecatcher, SMCD_EYECATCHER,
+ sizeof(SMCD_EYECATCHER));
+}
+
+static void
+smcr_clc_prep_confirm_accept(struct smc_connection *conn,
+ struct smc_clc_msg_accept_confirm *clc,
+ int first_contact, u8 version,
+ u8 *eid, struct smc_init_info *ini,
+ int *fce_len,
+ struct smc_clc_first_contact_ext_v2x *fce_v2x,
+ struct smc_clc_fce_gid_ext *gle,
+ struct smc_clc_msg_trail *trl)
+{
+ struct smc_link *link = conn->lnk;
+ int len;
+
+ /* SMC-R specific settings */
+ memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER,
+ sizeof(SMC_EYECATCHER));
+ clc->hdr.typev1 = SMC_TYPE_R;
+ memcpy(clc->r0.lcl.id_for_peer, local_systemid,
+ sizeof(local_systemid));
+ memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE);
+ memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
+ ETH_ALEN);
+ hton24(clc->r0.qpn, link->roce_qp->qp_num);
+ clc->r0.rmb_rkey =
+ htonl(conn->rmb_desc->mr[link->link_idx]->rkey);
+ clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
+ clc->r0.rmbe_alert_token = htonl(conn->alert_token_local);
+ switch (clc->hdr.type) {
+ case SMC_CLC_ACCEPT:
+ clc->r0.qp_mtu = link->path_mtu;
+ break;
+ case SMC_CLC_CONFIRM:
+ clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
+ break;
+ }
+ clc->r0.rmbe_size = conn->rmbe_size_comp;
+ clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
+ cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
+ cpu_to_be64((u64)sg_dma_address
+ (conn->rmb_desc->sgt[link->link_idx].sgl));
+ hton24(clc->r0.psn, link->psn_initial);
+ if (version == SMC_V1) {
+ clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
+ } else {
+ if (eid && eid[0])
+ memcpy(clc->r1.eid, eid, SMC_MAX_EID_LEN);
+ len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2;
+ if (first_contact) {
+ *fce_len = smc_clc_fill_fce_v2x(fce_v2x, ini);
+ len += *fce_len;
+ fce_v2x->fce_v2_base.v2_direct =
+ !link->lgr->uses_gateway;
+ if (clc->hdr.type == SMC_CLC_CONFIRM) {
+ memset(gle, 0, sizeof(*gle));
+ gle->gid_cnt = ini->smcrv2.gidlist.len;
+ len += sizeof(*gle);
+ len += gle->gid_cnt * sizeof(gle->gid[0]);
+ }
+ }
+ clc->hdr.length = htons(len);
+ }
+ memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
+}
+
/* build and send CLC CONFIRM / ACCEPT message */
static int smc_clc_send_confirm_accept(struct smc_sock *smc,
- struct smc_clc_msg_accept_confirm_v2 *clc_v2,
+ struct smc_clc_msg_accept_confirm *clc,
int first_contact, u8 version,
u8 *eid, struct smc_init_info *ini)
{
+ struct smc_clc_first_contact_ext_v2x fce_v2x;
struct smc_connection *conn = &smc->conn;
- struct smc_clc_first_contact_ext_v2x fce;
- struct smc_clc_msg_accept_confirm *clc;
struct smc_clc_fce_gid_ext gle;
struct smc_clc_msg_trail trl;
- int i, len, fce_len;
+ int i, fce_len;
struct kvec vec[5];
struct msghdr msg;
/* send SMC Confirm CLC msg */
- clc = (struct smc_clc_msg_accept_confirm *)clc_v2;
clc->hdr.version = version; /* SMC version */
if (first_contact)
clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK;
- if (conn->lgr->is_smcd) {
- /* SMC-D specific settings */
- memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER,
- sizeof(SMCD_EYECATCHER));
- clc->hdr.typev1 = SMC_TYPE_D;
- clc->d0.gid =
- conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd);
- clc->d0.token = conn->rmb_desc->token;
- clc->d0.dmbe_size = conn->rmbe_size_comp;
- clc->d0.dmbe_idx = 0;
- memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
- if (version == SMC_V1) {
- clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
- } else {
- clc_v2->d1.chid =
- htons(smc_ism_get_chid(conn->lgr->smcd));
- if (eid && eid[0])
- memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN);
- len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
- if (first_contact) {
- fce_len = smc_clc_fill_fce(&fce, ini);
- len += fce_len;
- }
- clc_v2->hdr.length = htons(len);
- }
- memcpy(trl.eyecatcher, SMCD_EYECATCHER,
- sizeof(SMCD_EYECATCHER));
- } else {
- struct smc_link *link = conn->lnk;
-
- /* SMC-R specific settings */
- memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER,
- sizeof(SMC_EYECATCHER));
- clc->hdr.typev1 = SMC_TYPE_R;
- clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
- memcpy(clc->r0.lcl.id_for_peer, local_systemid,
- sizeof(local_systemid));
- memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE);
- memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
- ETH_ALEN);
- hton24(clc->r0.qpn, link->roce_qp->qp_num);
- clc->r0.rmb_rkey =
- htonl(conn->rmb_desc->mr[link->link_idx]->rkey);
- clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
- clc->r0.rmbe_alert_token = htonl(conn->alert_token_local);
- switch (clc->hdr.type) {
- case SMC_CLC_ACCEPT:
- clc->r0.qp_mtu = link->path_mtu;
- break;
- case SMC_CLC_CONFIRM:
- clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
- break;
- }
- clc->r0.rmbe_size = conn->rmbe_size_comp;
- clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
- cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
- cpu_to_be64((u64)sg_dma_address
- (conn->rmb_desc->sgt[link->link_idx].sgl));
- hton24(clc->r0.psn, link->psn_initial);
- if (version == SMC_V1) {
- clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
- } else {
- if (eid && eid[0])
- memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN);
- len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2;
- if (first_contact) {
- fce_len = smc_clc_fill_fce(&fce, ini);
- len += fce_len;
- fce.fce_v2_base.v2_direct = !link->lgr->uses_gateway;
- if (clc->hdr.type == SMC_CLC_CONFIRM) {
- memset(&gle, 0, sizeof(gle));
- gle.gid_cnt = ini->smcrv2.gidlist.len;
- len += sizeof(gle);
- len += gle.gid_cnt * sizeof(gle.gid[0]);
- }
- }
- clc_v2->hdr.length = htons(len);
- }
- memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
- }
-
+ if (conn->lgr->is_smcd)
+ smcd_clc_prep_confirm_accept(conn, clc, first_contact,
+ version, eid, ini, &fce_len,
+ &fce_v2x, &trl);
+ else
+ smcr_clc_prep_confirm_accept(conn, clc, first_contact,
+ version, eid, ini, &fce_len,
+ &fce_v2x, &gle, &trl);
memset(&msg, 0, sizeof(msg));
i = 0;
- vec[i].iov_base = clc_v2;
+ vec[i].iov_base = clc;
if (version > SMC_V1)
vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 :
@@ -1111,7 +1172,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
SMCR_CLC_ACCEPT_CONFIRM_LEN) -
sizeof(trl);
if (version > SMC_V1 && first_contact) {
- vec[i].iov_base = &fce;
+ vec[i].iov_base = &fce_v2x;
vec[i++].iov_len = fce_len;
if (!conn->lgr->is_smcd) {
if (clc->hdr.type == SMC_CLC_CONFIRM) {
@@ -1133,16 +1194,16 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
u8 version, u8 *eid, struct smc_init_info *ini)
{
- struct smc_clc_msg_accept_confirm_v2 cclc_v2;
+ struct smc_clc_msg_accept_confirm cclc;
int reason_code = 0;
int len;
/* send SMC Confirm CLC msg */
- memset(&cclc_v2, 0, sizeof(cclc_v2));
- cclc_v2.hdr.type = SMC_CLC_CONFIRM;
- len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact,
+ memset(&cclc, 0, sizeof(cclc));
+ cclc.hdr.type = SMC_CLC_CONFIRM;
+ len = smc_clc_send_confirm_accept(smc, &cclc, clnt_first_contact,
version, eid, ini);
- if (len < ntohs(cclc_v2.hdr.length)) {
+ if (len < ntohs(cclc.hdr.length)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
smc->sk.sk_err = -reason_code;
@@ -1158,26 +1219,29 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
u8 version, u8 *negotiated_eid, struct smc_init_info *ini)
{
- struct smc_clc_msg_accept_confirm_v2 aclc_v2;
+ struct smc_clc_msg_accept_confirm aclc;
int len;
- memset(&aclc_v2, 0, sizeof(aclc_v2));
- aclc_v2.hdr.type = SMC_CLC_ACCEPT;
- len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact,
+ memset(&aclc, 0, sizeof(aclc));
+ aclc.hdr.type = SMC_CLC_ACCEPT;
+ len = smc_clc_send_confirm_accept(new_smc, &aclc, srv_first_contact,
version, negotiated_eid, ini);
- if (len < ntohs(aclc_v2.hdr.length))
+ if (len < ntohs(aclc.hdr.length))
len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
return len > 0 ? 0 : len;
}
-int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
+int smc_clc_srv_v2x_features_validate(struct smc_sock *smc,
+ struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
struct smc_clc_v2_extension *pclc_v2_ext;
+ struct net *net = sock_net(&smc->sk);
ini->max_conns = SMC_CONN_PER_LGR_MAX;
ini->max_links = SMC_LINKS_ADD_LNK_MAX;
+ ini->feature_mask = SMC_FEATURE_MASK;
if ((!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) ||
ini->release_nr < SMC_RELEASE_1)
@@ -1188,11 +1252,13 @@ int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
return SMC_CLC_DECL_NOV2EXT;
if (ini->smcr_version & SMC_V2) {
- ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, SMC_CONN_PER_LGR_PREFER);
+ ini->max_conns = min_t(u8, pclc_v2_ext->max_conns,
+ net->smc.sysctl_max_conns_per_lgr);
if (ini->max_conns < SMC_CONN_PER_LGR_MIN)
return SMC_CLC_DECL_MAXCONNERR;
- ini->max_links = min_t(u8, pclc_v2_ext->max_links, SMC_LINKS_PER_LGR_MAX_PREFER);
+ ini->max_links = min_t(u8, pclc_v2_ext->max_links,
+ net->smc.sysctl_max_links_per_lgr);
if (ini->max_links < SMC_LINKS_ADD_LNK_MIN)
return SMC_CLC_DECL_MAXLINKERR;
}
@@ -1219,6 +1285,8 @@ int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
return SMC_CLC_DECL_MAXLINKERR;
ini->max_links = fce_v2x->max_links;
}
+ /* common supplemental features of server and client */
+ ini->feature_mask = ntohs(fce_v2x->feature_mask) & SMC_FEATURE_MASK;
return 0;
}
@@ -1226,10 +1294,8 @@ int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc,
struct smc_init_info *ini)
{
- struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
- (struct smc_clc_msg_accept_confirm_v2 *)cclc;
struct smc_clc_first_contact_ext *fce =
- smc_get_clc_first_contact_ext(clc_v2, ini->is_smcd);
+ smc_get_clc_first_contact_ext(cclc, ini->is_smcd);
struct smc_clc_first_contact_ext_v2x *fce_v2x =
(struct smc_clc_first_contact_ext_v2x *)fce;
@@ -1249,6 +1315,8 @@ int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc,
if (fce_v2x->max_links != ini->max_links)
return SMC_CLC_DECL_MAXLINKERR;
}
+ /* common supplemental features returned by client */
+ ini->feature_mask = ntohs(fce_v2x->feature_mask);
return 0;
}
@@ -1270,7 +1338,11 @@ void __init smc_clc_init(void)
INIT_LIST_HEAD(&smc_clc_eid_table.list);
rwlock_init(&smc_clc_eid_table.lock);
smc_clc_eid_table.ueid_cnt = 0;
+#if IS_ENABLED(CONFIG_S390)
smc_clc_eid_table.seid_enabled = 1;
+#else
+ smc_clc_eid_table.seid_enabled = 0;
+#endif
}
void smc_clc_exit(void)
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index c5c8e7db775a..a9f9bdd26dcd 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -138,7 +138,8 @@ struct smc_clc_v2_extension {
u8 roce[16]; /* RoCEv2 GID */
u8 max_conns;
u8 max_links;
- u8 reserved[14];
+ __be16 feature_mask;
+ u8 reserved[12];
u8 user_eids[][SMC_MAX_EID_LEN];
};
@@ -171,6 +172,11 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
#define SMC_CLC_MAX_V6_PREFIX 8
#define SMC_CLC_MAX_UEID 8
+#define SMCD_CLC_MAX_V2_GID_ENTRIES 8 /* max # of CHID-GID entries in CLC
+ * proposal SMC-Dv2 extension.
+ * each ISM device takes one entry and
+ * each virtual ISM takes two entries.
+ */
struct smc_clc_msg_proposal_area {
struct smc_clc_msg_proposal pclc_base;
@@ -180,7 +186,8 @@ struct smc_clc_msg_proposal_area {
struct smc_clc_v2_extension pclc_v2_ext;
u8 user_eids[SMC_CLC_MAX_UEID][SMC_MAX_EID_LEN];
struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext;
- struct smc_clc_smcd_gid_chid pclc_gidchids[SMC_MAX_ISM_DEVS];
+ struct smc_clc_smcd_gid_chid
+ pclc_gidchids[SMCD_CLC_MAX_V2_GID_ENTRIES];
struct smc_clc_msg_trail pclc_trl;
};
@@ -204,8 +211,8 @@ struct smcr_clc_msg_accept_confirm { /* SMCR accept/confirm */
} __packed;
struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */
- u64 gid; /* Sender GID */
- u64 token; /* DMB token */
+ __be64 gid; /* Sender GID */
+ __be64 token; /* DMB token */
u8 dmbe_idx; /* DMBE index */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 dmbe_size : 4, /* buf size (compressed) */
@@ -240,9 +247,14 @@ struct smc_clc_first_contact_ext {
struct smc_clc_first_contact_ext_v2x {
struct smc_clc_first_contact_ext fce_v2_base;
- u8 max_conns; /* for SMC-R only */
- u8 max_links; /* for SMC-R only */
- u8 reserved3[2];
+ union {
+ struct {
+ u8 max_conns; /* for SMC-R only */
+ u8 max_links; /* for SMC-R only */
+ };
+ u8 reserved3[2]; /* for SMC-D only */
+ };
+ __be16 feature_mask;
__be32 vendor_exp_options;
u8 reserved4[8];
} __packed; /* format defined in
@@ -259,28 +271,21 @@ struct smc_clc_fce_gid_ext {
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
- struct smcr_clc_msg_accept_confirm r0; /* SMC-R */
- struct { /* SMC-D */
- struct smcd_clc_msg_accept_confirm_common d0;
- u32 reserved5[3];
- };
- };
-} __packed; /* format defined in RFC7609 */
-
-struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */
- struct smc_clc_msg_hdr hdr;
- union {
struct { /* SMC-R */
struct smcr_clc_msg_accept_confirm r0;
- u8 eid[SMC_MAX_EID_LEN];
- u8 reserved6[8];
- } r1;
+ struct { /* v2 only */
+ u8 eid[SMC_MAX_EID_LEN];
+ u8 reserved6[8];
+ } __packed r1;
+ };
struct { /* SMC-D */
struct smcd_clc_msg_accept_confirm_common d0;
- __be16 chid;
- u8 eid[SMC_MAX_EID_LEN];
- u8 reserved5[8];
- } d1;
+ struct { /* v2 only, but 12 bytes reserved in v1 */
+ __be16 chid;
+ u8 eid[SMC_MAX_EID_LEN];
+ __be64 gid_ext;
+ } __packed d1;
+ };
};
};
@@ -389,24 +394,23 @@ smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext)
}
static inline struct smc_clc_first_contact_ext *
-smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm_v2 *clc_v2,
+smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm *clc,
bool is_smcd)
{
int clc_v2_len;
- if (clc_v2->hdr.version == SMC_V1 ||
- !(clc_v2->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
+ if (clc->hdr.version == SMC_V1 ||
+ !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
return NULL;
if (is_smcd)
clc_v2_len =
- offsetofend(struct smc_clc_msg_accept_confirm_v2, d1);
+ offsetofend(struct smc_clc_msg_accept_confirm, d1);
else
clc_v2_len =
- offsetofend(struct smc_clc_msg_accept_confirm_v2, r1);
+ offsetofend(struct smc_clc_msg_accept_confirm, r1);
- return (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) +
- clc_v2_len);
+ return (struct smc_clc_first_contact_ext *)(((u8 *)clc) + clc_v2_len);
}
struct smcd_dev;
@@ -422,7 +426,8 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
u8 version, u8 *eid, struct smc_init_info *ini);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
u8 version, u8 *negotiated_eid, struct smc_init_info *ini);
-int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
+int smc_clc_srv_v2x_features_validate(struct smc_sock *smc,
+ struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini);
int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
struct smc_init_info *ini);
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index dbdf03e8aa5b..10219f55aad1 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -116,7 +116,8 @@ static void smc_close_cancel_work(struct smc_sock *smc)
struct sock *sk = &smc->sk;
release_sock(sk);
- cancel_work_sync(&smc->conn.close_work);
+ if (cancel_work_sync(&smc->conn.close_work))
+ sock_put(sk);
cancel_delayed_work_sync(&smc->conn.tx_work);
lock_sock(sk);
}
@@ -173,7 +174,7 @@ void smc_close_active_abort(struct smc_sock *smc)
break;
}
- sock_set_flag(sk, SOCK_DEAD);
+ smc_sock_set_flag(sk, SOCK_DEAD);
sk->sk_state_change(sk);
if (release_clcsock) {
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index bd01dd31e4bd..e4c858411207 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -506,6 +506,7 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
{
char smc_pnet[SMC_MAX_PNETID_LEN + 1];
struct smcd_dev *smcd = lgr->smcd;
+ struct smcd_gid smcd_gid;
struct nlattr *attrs;
void *nlh;
@@ -521,13 +522,19 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id)))
goto errattr;
+ smcd->ops->get_local_gid(smcd, &smcd_gid);
if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID,
- smcd->ops->get_local_gid(smcd),
- SMC_NLA_LGR_D_PAD))
+ smcd_gid.gid, SMC_NLA_LGR_D_PAD))
goto errattr;
- if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid,
+ if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_EXT_GID,
+ smcd_gid.gid_ext, SMC_NLA_LGR_D_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid.gid,
SMC_NLA_LGR_D_PAD))
goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_EXT_GID,
+ lgr->peer_gid.gid_ext, SMC_NLA_LGR_D_PAD))
+ goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id))
goto errattr;
if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num))
@@ -876,7 +883,10 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
/* SMC-D specific settings */
smcd = ini->ism_dev[ini->ism_selected];
get_device(smcd->ops->get_dev(smcd));
- lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected];
+ lgr->peer_gid.gid =
+ ini->ism_peer_gid[ini->ism_selected].gid;
+ lgr->peer_gid.gid_ext =
+ ini->ism_peer_gid[ini->ism_selected].gid_ext;
lgr->smcd = ini->ism_dev[ini->ism_selected];
lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
lgr_lock = &lgr->smcd->lgr_lock;
@@ -1514,7 +1524,8 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr)
}
/* Called when peer lgr shutdown (regularly or abnormally) is received */
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
+void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid,
+ unsigned short vlan)
{
struct smc_link_group *lgr, *l;
LIST_HEAD(lgr_free_list);
@@ -1522,9 +1533,12 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
/* run common cleanup function and build free list */
spin_lock_bh(&dev->lgr_lock);
list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
- if ((!peer_gid || lgr->peer_gid == peer_gid) &&
+ if ((!peer_gid->gid ||
+ (lgr->peer_gid.gid == peer_gid->gid &&
+ !smc_ism_is_virtual(dev) ? 1 :
+ lgr->peer_gid.gid_ext == peer_gid->gid_ext)) &&
(vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
- if (peer_gid) /* peer triggered termination */
+ if (peer_gid->gid) /* peer triggered termination */
lgr->peer_shutdown = 1;
list_move(&lgr->list, &lgr_free_list);
lgr->freeing = 1;
@@ -1662,6 +1676,7 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link_group *lgr, *n;
+ spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
struct smc_link *link;
@@ -1680,6 +1695,7 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
if (link)
smc_llc_add_link_local(link);
}
+ spin_unlock_bh(&smc_lgr_list.lock);
}
/* link is down - switch connections to alternate link,
@@ -1858,9 +1874,18 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
}
static bool smcd_lgr_match(struct smc_link_group *lgr,
- struct smcd_dev *smcismdev, u64 peer_gid)
+ struct smcd_dev *smcismdev,
+ struct smcd_gid *peer_gid)
{
- return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
+ if (lgr->peer_gid.gid != peer_gid->gid ||
+ lgr->smcd != smcismdev)
+ return false;
+
+ if (smc_ism_is_virtual(smcismdev) &&
+ lgr->peer_gid.gid_ext != peer_gid->gid_ext)
+ return false;
+
+ return true;
}
/* create a new SMC connection (and a new link group if necessary) */
@@ -1890,7 +1915,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
write_lock_bh(&lgr->conns_lock);
if ((ini->is_smcd ?
smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
- ini->ism_peer_gid[ini->ism_selected]) :
+ &ini->ism_peer_gid[ini->ism_selected]) :
smcr_lgr_match(lgr, ini->smcr_version,
ini->peer_systemid,
ini->peer_gid, ini->peer_mac, role,
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 120027d40469..1f175376037b 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -17,9 +17,11 @@
#include <linux/pci.h>
#include <rdma/ib_verbs.h>
#include <net/genetlink.h>
+#include <net/smc.h>
#include "smc.h"
#include "smc_ib.h"
+#include "smc_clc.h"
#define SMC_RMBS_PER_LGR_MAX 255 /* max. # of RMBs per link group */
#define SMC_CONN_PER_LGR_MIN 16 /* min. # of connections per link group */
@@ -355,7 +357,7 @@ struct smc_link_group {
/* max links can be added in lgr */
};
struct { /* SMC-D */
- u64 peer_gid;
+ struct smcd_gid peer_gid;
/* Peer GID (remote) */
struct smcd_dev *smcd;
/* ISM device for VLAN reg. */
@@ -392,6 +394,11 @@ struct smc_init_info_smcrv2 {
struct smc_gidlist gidlist;
};
+#define SMC_MAX_V2_ISM_DEVS SMCD_CLC_MAX_V2_GID_ENTRIES
+ /* max # of proposed non-native ISM devices,
+ * which can't exceed the max # of CHID-GID
+ * entries in CLC proposal SMC-Dv2 extension.
+ */
struct smc_init_info {
u8 is_smcd;
u8 smc_type_v1;
@@ -401,6 +408,7 @@ struct smc_init_info {
u8 max_links;
u8 first_contact_peer;
u8 first_contact_local;
+ u16 feature_mask;
unsigned short vlan_id;
u32 rc;
u8 negotiated_eid[SMC_MAX_EID_LEN];
@@ -416,9 +424,9 @@ struct smc_init_info {
u32 ib_clcqpn;
struct smc_init_info_smcrv2 smcrv2;
/* SMC-D */
- u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1];
- struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1];
- u16 ism_chid[SMC_MAX_ISM_DEVS + 1];
+ struct smcd_gid ism_peer_gid[SMC_MAX_V2_ISM_DEVS + 1];
+ struct smcd_dev *ism_dev[SMC_MAX_V2_ISM_DEVS + 1];
+ u16 ism_chid[SMC_MAX_V2_ISM_DEVS + 1];
u8 ism_offered_cnt; /* # of ISM devices offered */
u8 ism_selected; /* index of selected ISM dev*/
u8 smcd_version;
@@ -544,7 +552,7 @@ void smc_lgr_hold(struct smc_link_group *lgr);
void smc_lgr_put(struct smc_link_group *lgr);
void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
+void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid,
unsigned short vlan);
void smc_smcd_terminate_all(struct smcd_dev *dev);
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 7ff2152971a5..5a33908015f3 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -21,6 +21,7 @@
#include "smc.h"
#include "smc_core.h"
+#include "smc_ism.h"
struct smc_diag_dump_ctx {
int pos[2];
@@ -153,8 +154,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
.lnk[0].link_id = link->link_id,
};
- memcpy(linfo.lnk[0].ibname,
- smc->conn.lgr->lnk[0].smcibdev->ibdev->name,
+ memcpy(linfo.lnk[0].ibname, link->smcibdev->ibdev->name,
sizeof(link->smcibdev->ibdev->name));
smc_gid_be16_convert(linfo.lnk[0].gid, link->gid);
smc_gid_be16_convert(linfo.lnk[0].peer_gid, link->peer_gid);
@@ -164,16 +164,20 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
}
if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
- !list_empty(&smc->conn.lgr->list)) {
+ !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) {
struct smc_connection *conn = &smc->conn;
struct smcd_diag_dmbinfo dinfo;
struct smcd_dev *smcd = conn->lgr->smcd;
+ struct smcd_gid smcd_gid;
memset(&dinfo, 0, sizeof(dinfo));
dinfo.linkid = *((u32 *)conn->lgr->id);
- dinfo.peer_gid = conn->lgr->peer_gid;
- dinfo.my_gid = smcd->ops->get_local_gid(smcd);
+ dinfo.peer_gid = conn->lgr->peer_gid.gid;
+ dinfo.peer_gid_ext = conn->lgr->peer_gid.gid_ext;
+ smcd->ops->get_local_gid(smcd, &smcd_gid);
+ dinfo.my_gid = smcd_gid.gid;
+ dinfo.my_gid_ext = smcd_gid.gid_ext;
dinfo.token = conn->rmb_desc->token;
dinfo.peer_token = conn->peer_token;
@@ -268,5 +272,6 @@ static void __exit smc_diag_exit(void)
module_init(smc_diag_init);
module_exit(smc_diag_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SMC socket monitoring via SOCK_DIAG");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 43 /* AF_SMC */);
MODULE_ALIAS_GENL_FAMILY(SMCR_GENL_FAMILY_NAME);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 9b66d6aeeb1a..97704a9e84c7 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -193,7 +193,7 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
}
-int smc_ib_find_route(__be32 saddr, __be32 daddr,
+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
u8 nexthop_mac[], u8 *uses_gateway)
{
struct neighbour *neigh = NULL;
@@ -205,7 +205,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr,
if (daddr == cpu_to_be32(INADDR_NONE))
goto out;
- rt = ip_route_output_flow(&init_net, &fl4, NULL);
+ rt = ip_route_output_flow(net, &fl4, NULL);
if (IS_ERR(rt))
goto out;
if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
@@ -235,6 +235,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
struct in_device *in_dev = __in_dev_get_rcu(ndev);
+ struct net *net = dev_net(ndev);
const struct in_ifaddr *ifa;
bool subnet_match = false;
@@ -248,7 +249,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
}
if (!subnet_match)
goto out;
- if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr,
+ if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr,
smcrv2->daddr,
smcrv2->nexthop_mac,
&smcrv2->uses_gateway))
@@ -843,7 +844,7 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
goto out;
/* the calculated number of cq entries fits to mlx5 cq allocation */
cqe_size_order = cache_line_size() == 128 ? 7 : 6;
- smc_order = MAX_ORDER - cqe_size_order;
+ smc_order = MAX_PAGE_ORDER - cqe_size_order;
if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE)
cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2;
smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev,
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 4df5f8c8a0a1..ef8ac2b7546d 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -112,7 +112,7 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
unsigned short vlan_id, u8 gid[], u8 *sgid_index,
struct smc_init_info_smcrv2 *smcrv2);
-int smc_ib_find_route(__be32 saddr, __be32 daddr,
+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
u8 nexthop_mac[], u8 *uses_gateway);
bool smc_ib_is_valid_local_systemid(void);
int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index fbee2493091f..ac88de2a06a0 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -43,8 +43,30 @@ static struct ism_client smc_ism_client = {
};
#endif
+static void smc_ism_create_system_eid(void)
+{
+ struct smc_ism_seid *seid =
+ (struct smc_ism_seid *)smc_ism_v2_system_eid;
+#if IS_ENABLED(CONFIG_S390)
+ struct cpuid id;
+ u16 ident_tail;
+ char tmp[5];
+
+ memcpy(seid->seid_string, "IBM-SYSZ-ISMSEID00000000", 24);
+ get_cpu_id(&id);
+ ident_tail = (u16)(id.ident & SMC_ISM_IDENT_MASK);
+ snprintf(tmp, 5, "%04X", ident_tail);
+ memcpy(seid->serial_number, tmp, 4);
+ snprintf(tmp, 5, "%04X", id.machine);
+ memcpy(seid->type, tmp, 4);
+#else
+ memset(seid, 0, SMC_MAX_EID_LEN);
+#endif
+}
+
/* Test if an ISM communication is possible - same CPC */
-int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd)
+int smc_ism_cantalk(struct smcd_gid *peer_gid, unsigned short vlan_id,
+ struct smcd_dev *smcd)
{
return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0,
vlan_id);
@@ -208,7 +230,7 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
dmb.dmb_len = dmb_len;
dmb.sba_idx = dmb_desc->sba_idx;
dmb.vlan_id = lgr->vlan_id;
- dmb.rgid = lgr->peer_gid;
+ dmb.rgid = lgr->peer_gid.gid;
rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, &smc_ism_client);
if (!rc) {
dmb_desc->sba_idx = dmb.sba_idx;
@@ -340,18 +362,20 @@ union smcd_sw_event_info {
static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
{
+ struct smcd_gid peer_gid = { .gid = wrk->event.tok,
+ .gid_ext = 0 };
union smcd_sw_event_info ev_info;
ev_info.info = wrk->event.info;
switch (wrk->event.code) {
case ISM_EVENT_CODE_SHUTDOWN: /* Peer shut down DMBs */
- smc_smcd_terminate(wrk->smcd, wrk->event.tok, ev_info.vlan_id);
+ smc_smcd_terminate(wrk->smcd, &peer_gid, ev_info.vlan_id);
break;
case ISM_EVENT_CODE_TESTLINK: /* Activity timer */
if (ev_info.code == ISM_EVENT_REQUEST) {
ev_info.code = ISM_EVENT_RESPONSE;
wrk->smcd->ops->signal_event(wrk->smcd,
- wrk->event.tok,
+ &peer_gid,
ISM_EVENT_REQUEST_IR,
ISM_EVENT_CODE_TESTLINK,
ev_info.info);
@@ -365,10 +389,12 @@ static void smc_ism_event_work(struct work_struct *work)
{
struct smc_ism_event_work *wrk =
container_of(work, struct smc_ism_event_work, work);
+ struct smcd_gid smcd_gid = { .gid = wrk->event.tok,
+ .gid_ext = 0 };
switch (wrk->event.type) {
case ISM_EVENT_GID: /* GID event, token is peer GID */
- smc_smcd_terminate(wrk->smcd, wrk->event.tok, VLAN_VID_MASK);
+ smc_smcd_terminate(wrk->smcd, &smcd_gid, VLAN_VID_MASK);
break;
case ISM_EVENT_DMB:
break;
@@ -426,14 +452,8 @@ static void smcd_register_dev(struct ism_dev *ism)
mutex_lock(&smcd_dev_list.mutex);
if (list_empty(&smcd_dev_list.list)) {
- u8 *system_eid = NULL;
-
- system_eid = smcd->ops->get_system_eid();
- if (smcd->ops->supports_v2()) {
+ if (smcd->ops->supports_v2())
smc_ism_v2_capable = true;
- memcpy(smc_ism_v2_system_eid, system_eid,
- SMC_MAX_EID_LEN);
- }
}
/* sort list: devices without pnetid before devices with pnetid */
if (smcd->pnetid[0])
@@ -525,7 +545,7 @@ int smc_ism_signal_shutdown(struct smc_link_group *lgr)
memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
ev_info.vlan_id = lgr->vlan_id;
ev_info.code = ISM_EVENT_REQUEST;
- rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid,
+ rc = lgr->smcd->ops->signal_event(lgr->smcd, &lgr->peer_gid,
ISM_EVENT_REQUEST_IR,
ISM_EVENT_CODE_SHUTDOWN,
ev_info.info);
@@ -537,10 +557,10 @@ int smc_ism_init(void)
{
int rc = 0;
-#if IS_ENABLED(CONFIG_ISM)
smc_ism_v2_capable = false;
- memset(smc_ism_v2_system_eid, 0, SMC_MAX_EID_LEN);
+ smc_ism_create_system_eid();
+#if IS_ENABLED(CONFIG_ISM)
rc = ism_register_client(&smc_ism_client);
#endif
return rc;
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index 832b2f42d79f..ffff40c30a06 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -15,6 +15,9 @@
#include "smc.h"
+#define SMC_VIRTUAL_ISM_CHID_MASK 0xFF00
+#define SMC_ISM_IDENT_MASK 0x00FFFF
+
struct smcd_dev_list { /* List of SMCD devices */
struct list_head list;
struct mutex mutex; /* Protects list of devices */
@@ -28,9 +31,16 @@ struct smc_ism_vlanid { /* VLAN id set on ISM device */
refcount_t refcnt; /* Reference count */
};
+struct smc_ism_seid {
+ u8 seid_string[24];
+ u8 serial_number[4];
+ u8 type[4];
+};
+
struct smcd_dev;
-int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev);
+int smc_ism_cantalk(struct smcd_gid *peer_gid, unsigned short vlan_id,
+ struct smcd_dev *dev);
void smc_ism_set_conn(struct smc_connection *conn);
void smc_ism_unset_conn(struct smc_connection *conn);
int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id);
@@ -56,4 +66,22 @@ static inline int smc_ism_write(struct smcd_dev *smcd, u64 dmb_tok,
return rc < 0 ? rc : 0;
}
+static inline bool __smc_ism_is_virtual(u16 chid)
+{
+ /* CHIDs in range of 0xFF00 to 0xFFFF are reserved
+ * for virtual ISM device.
+ *
+ * loopback-ism: 0xFFFF
+ * virtio-ism: 0xFF00 ~ 0xFFFE
+ */
+ return ((chid & 0xFF00) == 0xFF00);
+}
+
+static inline bool smc_ism_is_virtual(struct smcd_dev *smcd)
+{
+ u16 chid = smcd->ops->get_chid(smcd);
+
+ return __smc_ism_is_virtual(chid);
+}
+
#endif
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 11775401df68..9f2c58c5a86b 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -1103,8 +1103,8 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
!ismdev->going_away &&
- (!ini->ism_peer_gid[0] ||
- !smc_ism_cantalk(ini->ism_peer_gid[0], ini->vlan_id,
+ (!ini->ism_peer_gid[0].gid ||
+ !smc_ism_cantalk(&ini->ism_peer_gid[0], ini->vlan_id,
ismdev))) {
ini->ism_dev[0] = ismdev;
break;
diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h
index b60fe1eb37ab..9d32058db2b5 100644
--- a/net/smc/smc_stats.h
+++ b/net/smc/smc_stats.h
@@ -92,13 +92,14 @@ do { \
typeof(_smc_stats) stats = (_smc_stats); \
typeof(_tech) t = (_tech); \
typeof(_len) l = (_len); \
- int _pos = fls64((l) >> 13); \
+ int _pos; \
typeof(_rc) r = (_rc); \
int m = SMC_BUF_MAX - 1; \
this_cpu_inc((*stats).smc[t].key ## _cnt); \
- if (r <= 0) \
+ if (r <= 0 || l <= 0) \
break; \
- _pos = (_pos < m) ? ((l == 1 << (_pos + 12)) ? _pos - 1 : _pos) : m; \
+ _pos = fls64((l - 1) >> 13); \
+ _pos = (_pos <= m) ? _pos : m; \
this_cpu_inc((*stats).smc[t].key ## _pd.buf[_pos]); \
this_cpu_add((*stats).smc[t].key ## _bytes, r); \
} \
@@ -138,9 +139,12 @@ while (0)
do { \
typeof(_len) _l = (_len); \
typeof(_tech) t = (_tech); \
- int _pos = fls((_l) >> 13); \
+ int _pos; \
int m = SMC_BUF_MAX - 1; \
- _pos = (_pos < m) ? ((_l == 1 << (_pos + 12)) ? _pos - 1 : _pos) : m; \
+ if (_l <= 0) \
+ break; \
+ _pos = fls((_l - 1) >> 13); \
+ _pos = (_pos <= m) ? _pos : m; \
this_cpu_inc((*(_smc_stats)).smc[t].k ## _rmbsize.buf[_pos]); \
} \
while (0)
@@ -243,8 +247,9 @@ while (0)
#define SMC_STAT_SERV_SUCC_INC(net, _ini) \
do { \
typeof(_ini) i = (_ini); \
- bool is_v2 = (i->smcd_version & SMC_V2); \
bool is_smcd = (i->is_smcd); \
+ u8 version = is_smcd ? i->smcd_version : i->smcr_version; \
+ bool is_v2 = (version & SMC_V2); \
typeof(net->smc.smc_stats) smc_stats = (net)->smc.smc_stats; \
if (is_v2 && is_smcd) \
this_cpu_inc(smc_stats->smc[SMC_TYPE_D].srv_v2_succ_cnt); \
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index 5cbc18c6e62b..a5946d1b9d60 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -25,6 +25,10 @@ static int max_sndbuf = INT_MAX / 2;
static int max_rcvbuf = INT_MAX / 2;
static const int net_smc_wmem_init = (64 * 1024);
static const int net_smc_rmem_init = (64 * 1024);
+static int links_per_lgr_min = SMC_LINKS_ADD_LNK_MIN;
+static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX;
+static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN;
+static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX;
static struct ctl_table smc_table[] = {
{
@@ -68,6 +72,24 @@ static struct ctl_table smc_table[] = {
.extra1 = &min_rcvbuf,
.extra2 = &max_rcvbuf,
},
+ {
+ .procname = "smcr_max_links_per_lgr",
+ .data = &init_net.smc.sysctl_max_links_per_lgr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &links_per_lgr_min,
+ .extra2 = &links_per_lgr_max,
+ },
+ {
+ .procname = "smcr_max_conns_per_lgr",
+ .data = &init_net.smc.sysctl_max_conns_per_lgr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &conns_per_lgr_min,
+ .extra2 = &conns_per_lgr_max,
+ },
{ }
};
@@ -97,6 +119,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init);
WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
+ net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER;
+ net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER;
return 0;
diff --git a/net/smc/smc_sysctl.h b/net/smc/smc_sysctl.h
index 0becc11bd2f4..eb2465ae1e15 100644
--- a/net/smc/smc_sysctl.h
+++ b/net/smc/smc_sysctl.h
@@ -23,6 +23,8 @@ void __net_exit smc_sysctl_net_exit(struct net *net);
static inline int smc_sysctl_net_init(struct net *net)
{
net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
+ net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER;
+ net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER;
return 0;
}
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 3b0ff3b589c7..214ac3cbcf9a 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -621,7 +621,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
return rc;
}
-static int __smc_tx_sndbuf_nonempty(struct smc_connection *conn)
+int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
int rc = 0;
@@ -655,34 +655,6 @@ out:
return rc;
}
-int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
-{
- int rc;
-
- /* This make sure only one can send simultaneously to prevent wasting
- * of CPU and CDC slot.
- * Record whether someone has tried to push while we are pushing.
- */
- if (atomic_inc_return(&conn->tx_pushing) > 1)
- return 0;
-
-again:
- atomic_set(&conn->tx_pushing, 1);
- smp_wmb(); /* Make sure tx_pushing is 1 before real send */
- rc = __smc_tx_sndbuf_nonempty(conn);
-
- /* We need to check whether someone else have added some data into
- * the send queue and tried to push but failed after the atomic_set()
- * when we are pushing.
- * If so, we need to push again to prevent those data hang in the send
- * queue.
- */
- if (unlikely(!atomic_dec_and_test(&conn->tx_pushing)))
- goto again;
-
- return rc;
-}
-
/* Wakeup sndbuf consumers from process context
* since there is more data to transmit. The caller
* must hold sock lock.
diff --git a/net/socket.c b/net/socket.c
index 77f28328e387..ed3df2f749bf 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -403,7 +403,7 @@ static const struct xattr_handler sockfs_security_xattr_handler = {
.set = sockfs_security_xattr_set,
};
-static const struct xattr_handler *sockfs_xattr_handlers[] = {
+static const struct xattr_handler * const sockfs_xattr_handlers[] = {
&sockfs_xattr_handler,
&sockfs_security_xattr_handler,
NULL
@@ -737,6 +737,14 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
return ret;
}
+static int __sock_sendmsg(struct socket *sock, struct msghdr *msg)
+{
+ int err = security_socket_sendmsg(sock, msg,
+ msg_data_left(msg));
+
+ return err ?: sock_sendmsg_nosec(sock, msg);
+}
+
/**
* sock_sendmsg - send a message through @sock
* @sock: socket
@@ -747,10 +755,21 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
*/
int sock_sendmsg(struct socket *sock, struct msghdr *msg)
{
- int err = security_socket_sendmsg(sock, msg,
- msg_data_left(msg));
+ struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name;
+ struct sockaddr_storage address;
+ int save_len = msg->msg_namelen;
+ int ret;
- return err ?: sock_sendmsg_nosec(sock, msg);
+ if (msg->msg_name) {
+ memcpy(&address, msg->msg_name, msg->msg_namelen);
+ msg->msg_name = &address;
+ }
+
+ ret = __sock_sendmsg(sock, msg);
+ msg->msg_name = save_addr;
+ msg->msg_namelen = save_len;
+
+ return ret;
}
EXPORT_SYMBOL(sock_sendmsg);
@@ -827,7 +846,7 @@ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
{
- bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
+ bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
struct net_device *orig_dev;
ktime_t hwtstamp;
@@ -879,12 +898,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
struct scm_timestamping_internal tss;
-
int empty = 1, false_tstamp = 0;
struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb);
int if_index;
ktime_t hwtstamp;
+ u32 tsflags;
/* Race occurred between timestamp enabling and packet
receiving. Fill in the current time for now. */
@@ -926,11 +945,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
}
memset(&tss, 0, sizeof(tss));
- if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
+ tsflags = READ_ONCE(sk->sk_tsflags);
+ if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
empty = 0;
if (shhwtstamps &&
- (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+ (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
!skb_is_swtx_tstamp(skb, false_tstamp)) {
if_index = 0;
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
@@ -938,14 +958,14 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
else
hwtstamp = shhwtstamps->hwtstamp;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
+ if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
hwtstamp = ptp_convert_timestamp(&hwtstamp,
- sk->sk_bind_phc);
+ READ_ONCE(sk->sk_bind_phc));
if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
empty = 0;
- if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
+ if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
!skb_is_err_queue(skb))
put_ts_pktinfo(msg, skb, if_index);
}
@@ -1137,7 +1157,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (sock->type == SOCK_SEQPACKET)
msg.msg_flags |= MSG_EOR;
- res = sock_sendmsg(sock, &msg);
+ res = __sock_sendmsg(sock, &msg);
*from = msg.msg_iter;
return res;
}
@@ -1667,20 +1687,16 @@ struct file *__sys_socket_file(int family, int type, int protocol)
* Therefore, __weak is needed to ensure that the call is still
* emitted, by telling the compiler that we don't know what the
* function might eventually be.
- *
- * __diag_* below are needed to dismiss the missing prototype warning.
*/
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
- "A fmod_ret entry point for BPF programs");
+__bpf_hook_start();
__weak noinline int update_socket_protocol(int family, int type, int protocol)
{
return protocol;
}
-__diag_pop();
+__bpf_hook_end();
int __sys_socket(int family, int type, int protocol)
{
@@ -2147,10 +2163,9 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
struct sockaddr_storage address;
int err;
struct msghdr msg;
- struct iovec iov;
int fput_needed;
- err = import_single_range(ITER_SOURCE, buff, len, &iov, &msg.msg_iter);
+ err = import_ubuf(ITER_SOURCE, buff, len, &msg.msg_iter);
if (unlikely(err))
return err;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
@@ -2173,7 +2188,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
msg.msg_flags = flags;
- err = sock_sendmsg(sock, &msg);
+ err = __sock_sendmsg(sock, &msg);
out_put:
fput_light(sock->file, fput_needed);
@@ -2212,11 +2227,10 @@ int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
.msg_name = addr ? (struct sockaddr *)&address : NULL,
};
struct socket *sock;
- struct iovec iov;
int err, err2;
int fput_needed;
- err = import_single_range(ITER_DEST, ubuf, size, &iov, &msg.msg_iter);
+ err = import_ubuf(ITER_DEST, ubuf, size, &msg.msg_iter);
if (unlikely(err))
return err;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
@@ -2261,33 +2275,23 @@ static bool sock_use_custom_sol_socket(const struct socket *sock)
return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
}
-/*
- * Set a socket option. Because we don't know the option lengths we have
- * to pass the user mode parameter for the protocols to sort out.
- */
-int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
- int optlen)
+int do_sock_setsockopt(struct socket *sock, bool compat, int level,
+ int optname, sockptr_t optval, int optlen)
{
- sockptr_t optval = USER_SOCKPTR(user_optval);
const struct proto_ops *ops;
char *kernel_optval = NULL;
- int err, fput_needed;
- struct socket *sock;
+ int err;
if (optlen < 0)
return -EINVAL;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- return err;
-
err = security_socket_setsockopt(sock, level, optname);
if (err)
goto out_put;
- if (!in_compat_syscall())
+ if (!compat)
err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
- user_optval, &optlen,
+ optval, &optlen,
&kernel_optval);
if (err < 0)
goto out_put;
@@ -2308,6 +2312,27 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
optlen);
kfree(kernel_optval);
out_put:
+ return err;
+}
+EXPORT_SYMBOL(do_sock_setsockopt);
+
+/* Set a socket option. Because we don't know the option lengths we have
+ * to pass the user mode parameter for the protocols to sort out.
+ */
+int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
+ int optlen)
+{
+ sockptr_t optval = USER_SOCKPTR(user_optval);
+ bool compat = in_compat_syscall();
+ int err, fput_needed;
+ struct socket *sock;
+
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ if (!sock)
+ return err;
+
+ err = do_sock_setsockopt(sock, compat, level, optname, optval, optlen);
+
fput_light(sock->file, fput_needed);
return err;
}
@@ -2321,43 +2346,62 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
int optname));
-/*
- * Get a socket option. Because we don't know the option lengths we have
- * to pass a user mode parameter for the protocols to sort out.
- */
-int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
- int __user *optlen)
+int do_sock_getsockopt(struct socket *sock, bool compat, int level,
+ int optname, sockptr_t optval, sockptr_t optlen)
{
int max_optlen __maybe_unused;
const struct proto_ops *ops;
- int err, fput_needed;
- struct socket *sock;
-
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- return err;
+ int err;
err = security_socket_getsockopt(sock, level, optname);
if (err)
- goto out_put;
+ return err;
- if (!in_compat_syscall())
+ if (!compat)
max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
ops = READ_ONCE(sock->ops);
- if (level == SOL_SOCKET)
- err = sock_getsockopt(sock, level, optname, optval, optlen);
- else if (unlikely(!ops->getsockopt))
+ if (level == SOL_SOCKET) {
+ err = sk_getsockopt(sock->sk, level, optname, optval, optlen);
+ } else if (unlikely(!ops->getsockopt)) {
err = -EOPNOTSUPP;
- else
- err = ops->getsockopt(sock, level, optname, optval,
- optlen);
+ } else {
+ if (WARN_ONCE(optval.is_kernel || optlen.is_kernel,
+ "Invalid argument type"))
+ return -EOPNOTSUPP;
+
+ err = ops->getsockopt(sock, level, optname, optval.user,
+ optlen.user);
+ }
- if (!in_compat_syscall())
+ if (!compat)
err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
optval, optlen, max_optlen,
err);
-out_put:
+
+ return err;
+}
+EXPORT_SYMBOL(do_sock_getsockopt);
+
+/*
+ * Get a socket option. Because we don't know the option lengths we have
+ * to pass a user mode parameter for the protocols to sort out.
+ */
+int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
+ int __user *optlen)
+{
+ int err, fput_needed;
+ struct socket *sock;
+ bool compat;
+
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ if (!sock)
+ return err;
+
+ compat = in_compat_syscall();
+ err = do_sock_getsockopt(sock, compat, level, optname,
+ USER_SOCKPTR(optval), USER_SOCKPTR(optlen));
+
fput_light(sock->file, fput_needed);
return err;
}
@@ -2537,7 +2581,7 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
err = sock_sendmsg_nosec(sock, msg_sys);
goto out_freectl;
}
- err = sock_sendmsg(sock, msg_sys);
+ err = __sock_sendmsg(sock, msg_sys);
/*
* If this is sendmmsg() and sending to current destination address was
* successful, remember it.
@@ -3498,7 +3542,12 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
{
- return READ_ONCE(sock->ops)->bind(sock, addr, addrlen);
+ struct sockaddr_storage address;
+
+ memcpy(&address, addr, addrlen);
+
+ return READ_ONCE(sock->ops)->bind(sock, (struct sockaddr *)&address,
+ addrlen);
}
EXPORT_SYMBOL(kernel_bind);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 2f16f9d17966..04534ea537c8 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -40,9 +40,6 @@ static unsigned long number_cred_unused;
static struct cred machine_cred = {
.usage = ATOMIC_INIT(1),
-#ifdef CONFIG_DEBUG_CREDENTIALS
- .magic = CRED_MAGIC,
-#endif
};
/*
@@ -769,9 +766,14 @@ int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
* @task: controlling RPC task
* @xdr: xdr_stream containing RPC Reply header
*
- * On success, @xdr is updated to point past the verifier and
- * zero is returned. Otherwise, @xdr is in an undefined state
- * and a negative errno is returned.
+ * Return values:
+ * %0: Verifier is valid. @xdr now points past the verifier.
+ * %-EIO: Verifier is corrupted or message ended early.
+ * %-EACCES: Verifier is intact but not valid.
+ * %-EPROTONOSUPPORT: Server does not support the requested auth type.
+ *
+ * When a negative errno is returned, @xdr is left in an undefined
+ * state.
*/
int
rpcauth_checkverf(struct rpc_task *task, struct xdr_stream *xdr)
@@ -861,11 +863,7 @@ rpcauth_uptodatecred(struct rpc_task *task)
test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0;
}
-static struct shrinker rpc_cred_shrinker = {
- .count_objects = rpcauth_cache_shrink_count,
- .scan_objects = rpcauth_cache_shrink_scan,
- .seeks = DEFAULT_SEEKS,
-};
+static struct shrinker *rpc_cred_shrinker;
int __init rpcauth_init_module(void)
{
@@ -874,9 +872,17 @@ int __init rpcauth_init_module(void)
err = rpc_init_authunix();
if (err < 0)
goto out1;
- err = register_shrinker(&rpc_cred_shrinker, "sunrpc_cred");
- if (err < 0)
+ rpc_cred_shrinker = shrinker_alloc(0, "sunrpc_cred");
+ if (!rpc_cred_shrinker) {
+ err = -ENOMEM;
goto out2;
+ }
+
+ rpc_cred_shrinker->count_objects = rpcauth_cache_shrink_count;
+ rpc_cred_shrinker->scan_objects = rpcauth_cache_shrink_scan;
+
+ shrinker_register(rpc_cred_shrinker);
+
return 0;
out2:
rpc_destroy_authunix();
@@ -887,5 +893,5 @@ out1:
void rpcauth_remove_module(void)
{
rpc_destroy_authunix();
- unregister_shrinker(&rpc_cred_shrinker);
+ shrinker_free(rpc_cred_shrinker);
}
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 1af71fbb0d80..c7af0220f82f 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -2280,6 +2280,7 @@ static void __exit exit_rpcsec_gss(void)
}
MODULE_ALIAS("rpc-auth-6");
+MODULE_DESCRIPTION("Sun RPC Kerberos RPCSEC_GSS client authentication");
MODULE_LICENSE("GPL");
module_param_named(expired_cred_retry_delay,
gss_expired_cred_retry_delay,
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 9734e1d9f991..d2b02710ab07 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -34,9 +34,9 @@
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
-#include <crypto/algapi.h>
#include <crypto/hash.h>
#include <crypto/skcipher.h>
+#include <crypto/utils.h>
#include <linux/err.h>
#include <linux/types.h>
#include <linux/mm.h>
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index e31cfdf7eadc..64cff717c3d9 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -650,6 +650,7 @@ static void __exit cleanup_kerberos_module(void)
gss_mech_unregister(&gss_kerberos_mech);
}
+MODULE_DESCRIPTION("Sun RPC Kerberos 5 module");
MODULE_LICENSE("GPL");
module_init(init_kerberos_module);
module_exit(cleanup_kerberos_module);
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 4fbc50a0a2c4..ef0e6af9fc95 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -57,11 +57,9 @@
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
-#include <crypto/algapi.h>
#include <linux/types.h>
#include <linux/jiffies.h>
#include <linux/sunrpc/gss_krb5.h>
-#include <linux/crypto.h>
#include "gss_krb5_internal.h"
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 18734e70c5dd..24de94184700 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -866,14 +866,6 @@ svcauth_gss_unwrap_integ(struct svc_rqst *rqstp, u32 seq, struct gss_ctx *ctx)
struct xdr_buf databody_integ;
struct xdr_netobj checksum;
- /* NFS READ normally uses splice to send data in-place. However
- * the data in cache can change after the reply's MIC is computed
- * but before the RPC reply is sent. To prevent the client from
- * rejecting the server-computed MIC in this somewhat rare case,
- * do not use splice with the GSS integrity service.
- */
- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
-
/* Did we already verify the signature on the original pass through? */
if (rqstp->rq_deferred)
return 0;
@@ -948,8 +940,6 @@ svcauth_gss_unwrap_priv(struct svc_rqst *rqstp, u32 seq, struct gss_ctx *ctx)
struct xdr_buf *buf = xdr->buf;
unsigned int saved_len;
- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
-
if (xdr_stream_decode_u32(xdr, &len) < 0)
goto unwrap_failed;
if (rqstp->rq_deferred) {
@@ -2014,6 +2004,11 @@ svcauth_gss_domain_release(struct auth_domain *dom)
call_rcu(&dom->rcu_head, svcauth_gss_domain_release_rcu);
}
+static rpc_authflavor_t svcauth_gss_pseudoflavor(struct svc_rqst *rqstp)
+{
+ return svcauth_gss_flavor(rqstp->rq_gssclient);
+}
+
static struct auth_ops svcauthops_gss = {
.name = "rpcsec_gss",
.owner = THIS_MODULE,
@@ -2022,6 +2017,7 @@ static struct auth_ops svcauthops_gss = {
.release = svcauth_gss_release,
.domain_release = svcauth_gss_domain_release,
.set_client = svcauth_gss_set_client,
+ .pseudoflavor = svcauth_gss_pseudoflavor,
};
static int rsi_cache_create_net(struct net *net)
diff --git a/net/sunrpc/auth_tls.c b/net/sunrpc/auth_tls.c
index de7678f8a23d..87f570fd3b00 100644
--- a/net/sunrpc/auth_tls.c
+++ b/net/sunrpc/auth_tls.c
@@ -129,9 +129,9 @@ static int tls_validate(struct rpc_task *task, struct xdr_stream *xdr)
if (*p != rpc_auth_null)
return -EIO;
if (xdr_stream_decode_opaque_inline(xdr, &str, starttls_len) != starttls_len)
- return -EIO;
+ return -EPROTONOSUPPORT;
if (memcmp(str, starttls_token, starttls_len))
- return -EIO;
+ return -EPROTONOSUPPORT;
return 0;
}
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 65a6c6429a53..caa94cf57123 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -83,7 +83,6 @@ static struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt)
return NULL;
req->rq_xprt = xprt;
- INIT_LIST_HEAD(&req->rq_bc_list);
/* Preallocate one XDR receive buffer */
if (xprt_alloc_xdr_buf(&req->rq_rcv_buf, gfp_flags) < 0) {
@@ -349,10 +348,8 @@ found:
}
/*
- * Add callback request to callback list. The callback
- * service sleeps on the sv_cb_waitq waiting for new
- * requests. Wake it up after adding enqueing the
- * request.
+ * Add callback request to callback list. Wake a thread
+ * on the first pool (usually the only pool) to handle it.
*/
void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
{
@@ -369,8 +366,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
dprintk("RPC: add callback request to list\n");
xprt_get(xprt);
- spin_lock(&bc_serv->sv_cb_lock);
- list_add(&req->rq_bc_list, &bc_serv->sv_cb_list);
- wake_up(&bc_serv->sv_cb_waitq);
- spin_unlock(&bc_serv->sv_cb_lock);
+ lwq_enqueue(&req->rq_bc_list, &bc_serv->sv_cb_list);
+ svc_pool_wake_idle_thread(&bc_serv->sv_pools[0]);
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8d75290f1a31..cda0935a68c9 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -111,7 +111,8 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
- __rpc_clnt_remove_pipedir(clnt);
+ if (pipefs_sb == clnt->pipefs_sb)
+ __rpc_clnt_remove_pipedir(clnt);
rpc_put_sb_net(net);
}
}
@@ -151,6 +152,8 @@ rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt)
{
struct dentry *dentry;
+ clnt->pipefs_sb = pipefs_sb;
+
if (clnt->cl_program->pipe_dir_name != NULL) {
dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt);
if (IS_ERR(dentry))
@@ -284,8 +287,14 @@ static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt,
static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename)
{
- clnt->cl_nodelen = strlcpy(clnt->cl_nodename,
- nodename, sizeof(clnt->cl_nodename));
+ ssize_t copied;
+
+ copied = strscpy(clnt->cl_nodename,
+ nodename, sizeof(clnt->cl_nodename));
+
+ clnt->cl_nodelen = copied < 0
+ ? sizeof(clnt->cl_nodename) - 1
+ : copied;
}
static int rpc_client_register(struct rpc_clnt *clnt,
@@ -794,15 +803,24 @@ out_revert:
}
EXPORT_SYMBOL_GPL(rpc_switch_client_transport);
-static
-int _rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi,
- void func(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps))
+static struct rpc_xprt_switch *rpc_clnt_xprt_switch_get(struct rpc_clnt *clnt)
{
struct rpc_xprt_switch *xps;
rcu_read_lock();
xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
rcu_read_unlock();
+
+ return xps;
+}
+
+static
+int _rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi,
+ void func(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps))
+{
+ struct rpc_xprt_switch *xps;
+
+ xps = rpc_clnt_xprt_switch_get(clnt);
if (xps == NULL)
return -EAGAIN;
func(xpi, xps);
@@ -1299,8 +1317,10 @@ static void call_bc_encode(struct rpc_task *task);
* rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
* rpc_execute against it
* @req: RPC request
+ * @timeout: timeout values to use for this task
*/
-struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
+struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
+ struct rpc_timeout *timeout)
{
struct rpc_task *task;
struct rpc_task_setup task_setup_data = {
@@ -1319,7 +1339,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
return task;
}
- xprt_init_bc_request(req, task);
+ xprt_init_bc_request(req, task, timeout);
task->tk_action = call_bc_encode;
atomic_inc(&task->tk_count);
@@ -2171,6 +2191,7 @@ call_connect_status(struct rpc_task *task)
task->tk_status = 0;
switch (status) {
case -ECONNREFUSED:
+ case -ECONNRESET:
/* A positive refusal suggests a rebind is needed. */
if (RPC_IS_SOFTCONN(task))
break;
@@ -2179,7 +2200,6 @@ call_connect_status(struct rpc_task *task)
goto out_retry;
}
fallthrough;
- case -ECONNRESET:
case -ECONNABORTED:
case -ENETDOWN:
case -ENETUNREACH:
@@ -2203,9 +2223,7 @@ call_connect_status(struct rpc_task *task)
struct rpc_xprt *saved = task->tk_xprt;
struct rpc_xprt_switch *xps;
- rcu_read_lock();
- xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
- rcu_read_unlock();
+ xps = rpc_clnt_xprt_switch_get(clnt);
if (xps->xps_nxprts > 1) {
long value;
@@ -2220,7 +2238,7 @@ call_connect_status(struct rpc_task *task)
}
xprt_switch_put(xps);
if (!task->tk_xprt)
- return;
+ goto out;
}
goto out_retry;
case -ENOBUFS:
@@ -2235,6 +2253,7 @@ out_next:
out_retry:
/* Check for timeouts before looping back to call_bind */
task->tk_action = call_bind;
+out:
rpc_check_timeout(task);
}
@@ -2476,8 +2495,7 @@ call_status(struct rpc_task *task)
goto out_exit;
}
task->tk_action = call_encode;
- if (status != -ECONNRESET && status != -ECONNABORTED)
- rpc_check_timeout(task);
+ rpc_check_timeout(task);
return;
out_exit:
rpc_call_rpcerror(task, status);
@@ -2725,7 +2743,15 @@ out_unparsable:
out_verifier:
trace_rpc_bad_verifier(task);
- goto out_err;
+ switch (error) {
+ case -EPROTONOSUPPORT:
+ goto out_err;
+ case -EACCES:
+ /* Re-encode with a fresh cred */
+ fallthrough;
+ default:
+ goto out_garbage;
+ }
out_msg_denied:
error = -EACCES;
@@ -2751,6 +2777,7 @@ out_msg_denied:
case rpc_autherr_rejectedverf:
case rpcsec_gsserr_credproblem:
case rpcsec_gsserr_ctxproblem:
+ rpcauth_invalcred(task);
if (!task->tk_cred_retry)
break;
task->tk_cred_retry--;
@@ -2907,19 +2934,22 @@ static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
* @clnt: pointer to struct rpc_clnt
* @xps: pointer to struct rpc_xprt_switch,
* @xprt: pointer struct rpc_xprt
- * @dummy: unused
+ * @in_max_connect: pointer to the max_connect value for the passed in xprt transport
*/
int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
- void *dummy)
+ void *in_max_connect)
{
struct rpc_cb_add_xprt_calldata *data;
struct rpc_task *task;
+ int max_connect = clnt->cl_max_connect;
- if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) {
+ if (in_max_connect)
+ max_connect = *(int *)in_max_connect;
+ if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) {
rcu_read_lock();
pr_warn("SUNRPC: reached max allowed number (%d) did not add "
- "transport to server: %s\n", clnt->cl_max_connect,
+ "transport to server: %s\n", max_connect,
rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
rcu_read_unlock();
return -EINVAL;
@@ -3101,7 +3131,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
struct rpc_add_xprt_test *data)
{
- struct rpc_xprt_switch *xps;
struct rpc_xprt *main_xprt;
int status = 0;
@@ -3109,7 +3138,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
rcu_read_lock();
main_xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
- xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
status = rpc_cmp_addr_port((struct sockaddr *)&xprt->addr,
(struct sockaddr *)&main_xprt->addr);
rcu_read_unlock();
@@ -3120,7 +3148,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
status = rpc_clnt_add_xprt_helper(clnt, xprt, data);
out:
xprt_put(xprt);
- xprt_switch_put(xps);
return status;
}
@@ -3235,34 +3262,27 @@ rpc_set_connect_timeout(struct rpc_clnt *clnt,
}
EXPORT_SYMBOL_GPL(rpc_set_connect_timeout);
-void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
-{
- rcu_read_lock();
- xprt_switch_put(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
- rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put);
-
void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
struct rpc_xprt_switch *xps;
- rcu_read_lock();
- xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
- rcu_read_unlock();
+ xps = rpc_clnt_xprt_switch_get(clnt);
xprt_set_online_locked(xprt, xps);
+ xprt_switch_put(xps);
}
void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
+ struct rpc_xprt_switch *xps;
+
if (rpc_clnt_xprt_switch_has_addr(clnt,
(const struct sockaddr *)&xprt->addr)) {
return rpc_clnt_xprt_set_online(clnt, xprt);
}
- rcu_read_lock();
- rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch),
- xprt);
- rcu_read_unlock();
+
+ xps = rpc_clnt_xprt_switch_get(clnt);
+ rpc_xprt_switch_add_xprt(xps, xprt);
+ xprt_switch_put(xps);
}
EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index f420d8457345..dcc2b4f49e77 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -472,7 +472,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
return NULL;
inode->i_ino = get_next_ino();
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+ simple_inode_init_ts(inode);
switch (mode & S_IFMT) {
case S_IFDIR:
inode->i_fop = &simple_dir_operations;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 5988a5c5ff3f..102c3818bc54 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -769,6 +769,10 @@ void rpcb_getport_async(struct rpc_task *task)
child = rpcb_call_async(rpcb_clnt, map, proc);
rpc_release_client(rpcb_clnt);
+ if (IS_ERR(child)) {
+ /* rpcb_map_release() has freed the arguments */
+ return;
+ }
xprt->stat.bind_count++;
rpc_put_task(child);
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 691c0000e9ea..bab6cab29405 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -148,6 +148,7 @@ cleanup_sunrpc(void)
#endif
rcu_barrier(); /* Wait for completion of call_rcu()'s */
}
+MODULE_DESCRIPTION("Sun RPC core");
MODULE_LICENSE("GPL");
fs_initcall(init_sunrpc); /* Ensure we're initialised before nfs */
module_exit(cleanup_sunrpc);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 812fda9d45dd..b969e505c7b7 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -438,9 +438,7 @@ EXPORT_SYMBOL_GPL(svc_bind);
static void
__svc_init_bc(struct svc_serv *serv)
{
- INIT_LIST_HEAD(&serv->sv_cb_list);
- spin_lock_init(&serv->sv_cb_lock);
- init_waitqueue_head(&serv->sv_cb_waitq);
+ lwq_init(&serv->sv_cb_list);
}
#else
static void
@@ -465,7 +463,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
return NULL;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
- kref_init(&serv->sv_refcnt);
serv->sv_stats = prog->pg_stats;
if (bufsize > RPCSVC_MAXPAYLOAD)
bufsize = RPCSVC_MAXPAYLOAD;
@@ -509,9 +506,9 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
i, serv->sv_name);
pool->sp_id = i;
- INIT_LIST_HEAD(&pool->sp_sockets);
+ lwq_init(&pool->sp_xprts);
INIT_LIST_HEAD(&pool->sp_all_threads);
- spin_lock_init(&pool->sp_lock);
+ init_llist_head(&pool->sp_idle_threads);
percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL);
percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL);
@@ -566,20 +563,23 @@ EXPORT_SYMBOL_GPL(svc_create_pooled);
* protect sv_permsocks and sv_tempsocks.
*/
void
-svc_destroy(struct kref *ref)
+svc_destroy(struct svc_serv **servp)
{
- struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt);
+ struct svc_serv *serv = *servp;
unsigned int i;
+ *servp = NULL;
+
dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name);
timer_shutdown_sync(&serv->sv_temptimer);
/*
- * The last user is gone and thus all sockets have to be destroyed to
- * the point. Check this.
+ * Remaining transports at this point are not expected.
*/
- BUG_ON(!list_empty(&serv->sv_permsocks));
- BUG_ON(!list_empty(&serv->sv_tempsocks));
+ WARN_ONCE(!list_empty(&serv->sv_permsocks),
+ "SVC: permsocks remain for %s\n", serv->sv_program->pg_name);
+ WARN_ONCE(!list_empty(&serv->sv_tempsocks),
+ "SVC: tempsocks remain for %s\n", serv->sv_program->pg_name);
cache_clean_deferred(serv);
@@ -642,7 +642,6 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
folio_batch_init(&rqstp->rq_fbatch);
- __set_bit(RQ_BUSY, &rqstp->rq_flags);
rqstp->rq_server = serv;
rqstp->rq_pool = pool;
@@ -677,15 +676,17 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
if (!rqstp)
return ERR_PTR(-ENOMEM);
- svc_get(serv);
spin_lock_bh(&serv->sv_lock);
serv->sv_nrthreads += 1;
spin_unlock_bh(&serv->sv_lock);
- spin_lock_bh(&pool->sp_lock);
- pool->sp_nrthreads++;
+ atomic_inc(&pool->sp_nrthreads);
+
+ /* Protected by whatever lock the service uses when calling
+ * svc_set_num_threads()
+ */
list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
- spin_unlock_bh(&pool->sp_lock);
+
return rqstp;
}
@@ -701,23 +702,25 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
void svc_pool_wake_idle_thread(struct svc_pool *pool)
{
struct svc_rqst *rqstp;
+ struct llist_node *ln;
rcu_read_lock();
- list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
- if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
- continue;
-
+ ln = READ_ONCE(pool->sp_idle_threads.first);
+ if (ln) {
+ rqstp = llist_entry(ln, struct svc_rqst, rq_idle);
WRITE_ONCE(rqstp->rq_qtime, ktime_get());
- wake_up_process(rqstp->rq_task);
+ if (!task_is_running(rqstp->rq_task)) {
+ wake_up_process(rqstp->rq_task);
+ trace_svc_wake_up(rqstp->rq_task->pid);
+ percpu_counter_inc(&pool->sp_threads_woken);
+ }
rcu_read_unlock();
- percpu_counter_inc(&pool->sp_threads_woken);
- trace_svc_wake_up(rqstp->rq_task->pid);
return;
}
rcu_read_unlock();
- set_bit(SP_CONGESTED, &pool->sp_flags);
}
+EXPORT_SYMBOL_GPL(svc_pool_wake_idle_thread);
static struct svc_pool *
svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
@@ -725,36 +728,38 @@ svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools];
}
-static struct task_struct *
-svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+static struct svc_pool *
+svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool,
+ unsigned int *state)
{
+ struct svc_pool *pool;
unsigned int i;
- struct task_struct *task = NULL;
+
+retry:
+ pool = target_pool;
if (pool != NULL) {
- spin_lock_bh(&pool->sp_lock);
+ if (atomic_inc_not_zero(&pool->sp_nrthreads))
+ goto found_pool;
+ return NULL;
} else {
for (i = 0; i < serv->sv_nrpools; i++) {
pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
- spin_lock_bh(&pool->sp_lock);
- if (!list_empty(&pool->sp_all_threads))
+ if (atomic_inc_not_zero(&pool->sp_nrthreads))
goto found_pool;
- spin_unlock_bh(&pool->sp_lock);
}
return NULL;
}
found_pool:
- if (!list_empty(&pool->sp_all_threads)) {
- struct svc_rqst *rqstp;
-
- rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
- set_bit(RQ_VICTIM, &rqstp->rq_flags);
- list_del_rcu(&rqstp->rq_all);
- task = rqstp->rq_task;
- }
- spin_unlock_bh(&pool->sp_lock);
- return task;
+ set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
+ set_bit(SP_NEED_VICTIM, &pool->sp_flags);
+ if (!atomic_dec_and_test(&pool->sp_nrthreads))
+ return pool;
+ /* Nothing left in this pool any more */
+ clear_bit(SP_NEED_VICTIM, &pool->sp_flags);
+ clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
+ goto retry;
}
static int
@@ -795,18 +800,16 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
static int
svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
- struct svc_rqst *rqstp;
- struct task_struct *task;
unsigned int state = serv->sv_nrthreads-1;
+ struct svc_pool *victim;
do {
- task = svc_pool_victim(serv, pool, &state);
- if (task == NULL)
+ victim = svc_pool_victim(serv, pool, &state);
+ if (!victim)
break;
- rqstp = kthread_data(task);
- /* Did we lose a race to svo_function threadfn? */
- if (kthread_stop(task) == -EINTR)
- svc_exit_thread(rqstp);
+ svc_pool_wake_idle_thread(victim);
+ wait_on_bit(&victim->sp_flags, SP_VICTIM_REMAINS,
+ TASK_IDLE);
nrservs++;
} while (nrservs < 0);
return 0;
@@ -832,13 +835,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
int
svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
- if (pool == NULL) {
+ if (!pool)
nrservs -= serv->sv_nrthreads;
- } else {
- spin_lock_bh(&pool->sp_lock);
- nrservs -= pool->sp_nrthreads;
- spin_unlock_bh(&pool->sp_lock);
- }
+ else
+ nrservs -= atomic_read(&pool->sp_nrthreads);
if (nrservs > 0)
return svc_start_kthreads(serv, pool, nrservs);
@@ -924,11 +924,9 @@ svc_exit_thread(struct svc_rqst *rqstp)
struct svc_serv *serv = rqstp->rq_server;
struct svc_pool *pool = rqstp->rq_pool;
- spin_lock_bh(&pool->sp_lock);
- pool->sp_nrthreads--;
- if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags))
- list_del_rcu(&rqstp->rq_all);
- spin_unlock_bh(&pool->sp_lock);
+ list_del_rcu(&rqstp->rq_all);
+
+ atomic_dec(&pool->sp_nrthreads);
spin_lock_bh(&serv->sv_lock);
serv->sv_nrthreads -= 1;
@@ -937,7 +935,7 @@ svc_exit_thread(struct svc_rqst *rqstp)
svc_rqst_free(rqstp);
- svc_put(serv);
+ clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
}
EXPORT_SYMBOL_GPL(svc_exit_thread);
@@ -1302,8 +1300,6 @@ svc_process_common(struct svc_rqst *rqstp)
int rc;
__be32 *p;
- /* Will be turned off by GSS integrity and privacy services */
- set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Will be turned off only when NFSv4 Sessions are used */
set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
clear_bit(RQ_DROPME, &rqstp->rq_flags);
@@ -1544,24 +1540,21 @@ out_drop:
}
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-/*
- * Process a backchannel RPC request that arrived over an existing
- * outbound connection
+/**
+ * svc_process_bc - process a reverse-direction RPC request
+ * @req: RPC request to be used for client-side processing
+ * @rqstp: server-side execution context
+ *
*/
-int
-bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
- struct svc_rqst *rqstp)
+void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
{
struct rpc_task *task;
int proc_error;
- int error;
-
- dprintk("svc: %s(%p)\n", __func__, req);
+ struct rpc_timeout timeout;
/* Build the svc_rqst used by the common processing routine */
rqstp->rq_xid = req->rq_xid;
rqstp->rq_prot = req->rq_xprt->prot;
- rqstp->rq_server = serv;
rqstp->rq_bc_net = req->rq_xprt->xprt_net;
rqstp->rq_addrlen = sizeof(req->rq_xprt->addr);
@@ -1590,10 +1583,8 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
* been processed by the caller.
*/
svcxdr_init_decode(rqstp);
- if (!xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 2)) {
- error = -EINVAL;
- goto out;
- }
+ if (!xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 2))
+ return;
/* Parse and execute the bc call */
proc_error = svc_process_common(rqstp);
@@ -1602,26 +1593,26 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
if (!proc_error) {
/* Processing error: drop the request */
xprt_free_bc_request(req);
- error = -EINVAL;
- goto out;
+ return;
}
/* Finally, send the reply synchronously */
- memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
- task = rpc_run_bc_task(req);
- if (IS_ERR(task)) {
- error = PTR_ERR(task);
- goto out;
+ if (rqstp->bc_to_initval > 0) {
+ timeout.to_initval = rqstp->bc_to_initval;
+ timeout.to_retries = rqstp->bc_to_retries;
+ } else {
+ timeout.to_initval = req->rq_xprt->timeout->to_initval;
+ timeout.to_retries = req->rq_xprt->timeout->to_retries;
}
+ memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
+ task = rpc_run_bc_task(req, &timeout);
+
+ if (IS_ERR(task))
+ return;
WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
- error = task->tk_status;
rpc_put_task(task);
-
-out:
- dprintk("svc: %s(), error=%d\n", __func__, error);
- return error;
}
-EXPORT_SYMBOL_GPL(bc_svc_process);
+EXPORT_SYMBOL_GPL(svc_process_bc);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/**
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 4cfe9640df48..b4a85a227bd7 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -9,7 +9,6 @@
#include <linux/sched/mm.h>
#include <linux/errno.h>
#include <linux/freezer.h>
-#include <linux/kthread.h>
#include <linux/slab.h>
#include <net/sock.h>
#include <linux/sunrpc/addr.h>
@@ -17,6 +16,7 @@
#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/bc_xprt.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <trace/events/sunrpc.h>
@@ -201,7 +201,6 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
kref_init(&xprt->xpt_ref);
xprt->xpt_server = serv;
INIT_LIST_HEAD(&xprt->xpt_list);
- INIT_LIST_HEAD(&xprt->xpt_ready);
INIT_LIST_HEAD(&xprt->xpt_deferred);
INIT_LIST_HEAD(&xprt->xpt_users);
mutex_init(&xprt->xpt_mutex);
@@ -472,9 +471,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
pool = svc_pool_for_cpu(xprt->xpt_server);
percpu_counter_inc(&pool->sp_sockets_queued);
- spin_lock_bh(&pool->sp_lock);
- list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
- spin_unlock_bh(&pool->sp_lock);
+ lwq_enqueue(&xprt->xpt_ready, &pool->sp_xprts);
svc_pool_wake_idle_thread(pool);
}
@@ -487,18 +484,9 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
{
struct svc_xprt *xprt = NULL;
- if (list_empty(&pool->sp_sockets))
- goto out;
-
- spin_lock_bh(&pool->sp_lock);
- if (likely(!list_empty(&pool->sp_sockets))) {
- xprt = list_first_entry(&pool->sp_sockets,
- struct svc_xprt, xpt_ready);
- list_del_init(&xprt->xpt_ready);
+ xprt = lwq_dequeue(&pool->sp_xprts, struct svc_xprt, xpt_ready);
+ if (xprt)
svc_xprt_get(xprt);
- }
- spin_unlock_bh(&pool->sp_lock);
-out:
return xprt;
}
@@ -666,15 +654,14 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp)
}
for (filled = 0; filled < pages; filled = ret) {
- ret = alloc_pages_bulk_array_node(GFP_KERNEL,
- rqstp->rq_pool->sp_id,
- pages, rqstp->rq_pages);
+ ret = alloc_pages_bulk_array(GFP_KERNEL, pages,
+ rqstp->rq_pages);
if (ret > filled)
/* Made progress, don't sleep yet */
continue;
set_current_state(TASK_IDLE);
- if (kthread_should_stop()) {
+ if (svc_thread_should_stop(rqstp)) {
set_current_state(TASK_RUNNING);
return false;
}
@@ -699,7 +686,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp)
}
static bool
-rqst_should_sleep(struct svc_rqst *rqstp)
+svc_thread_should_sleep(struct svc_rqst *rqstp)
{
struct svc_pool *pool = rqstp->rq_pool;
@@ -708,65 +695,51 @@ rqst_should_sleep(struct svc_rqst *rqstp)
return false;
/* was a socket queued? */
- if (!list_empty(&pool->sp_sockets))
+ if (!lwq_empty(&pool->sp_xprts))
return false;
/* are we shutting down? */
- if (kthread_should_stop())
+ if (svc_thread_should_stop(rqstp))
return false;
- /* are we freezing? */
- if (freezing(current))
- return false;
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ if (svc_is_backchannel(rqstp)) {
+ if (!lwq_empty(&rqstp->rq_server->sv_cb_list))
+ return false;
+ }
+#endif
return true;
}
-static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
+static void svc_thread_wait_for_work(struct svc_rqst *rqstp)
{
- struct svc_pool *pool = rqstp->rq_pool;
-
- /* rq_xprt should be clear on entry */
- WARN_ON_ONCE(rqstp->rq_xprt);
-
- rqstp->rq_xprt = svc_xprt_dequeue(pool);
- if (rqstp->rq_xprt)
- goto out_found;
-
- set_current_state(TASK_IDLE);
- smp_mb__before_atomic();
- clear_bit(SP_CONGESTED, &pool->sp_flags);
- clear_bit(RQ_BUSY, &rqstp->rq_flags);
- smp_mb__after_atomic();
-
- if (likely(rqst_should_sleep(rqstp)))
- schedule();
- else
+ struct svc_pool *pool = rqstp->rq_pool;
+
+ if (svc_thread_should_sleep(rqstp)) {
+ set_current_state(TASK_IDLE | TASK_FREEZABLE);
+ llist_add(&rqstp->rq_idle, &pool->sp_idle_threads);
+ if (likely(svc_thread_should_sleep(rqstp)))
+ schedule();
+
+ while (!llist_del_first_this(&pool->sp_idle_threads,
+ &rqstp->rq_idle)) {
+ /* Work just became available. This thread can only
+ * handle it after removing rqstp from the idle
+ * list. If that attempt failed, some other thread
+ * must have queued itself after finding no
+ * work to do, so that thread has taken responsibly
+ * for this new work. This thread can safely sleep
+ * until woken again.
+ */
+ schedule();
+ set_current_state(TASK_IDLE | TASK_FREEZABLE);
+ }
__set_current_state(TASK_RUNNING);
-
+ } else {
+ cond_resched();
+ }
try_to_freeze();
-
- set_bit(RQ_BUSY, &rqstp->rq_flags);
- smp_mb__after_atomic();
- clear_bit(SP_TASK_PENDING, &pool->sp_flags);
- rqstp->rq_xprt = svc_xprt_dequeue(pool);
- if (rqstp->rq_xprt)
- goto out_found;
-
- if (kthread_should_stop())
- return NULL;
- return NULL;
-out_found:
- clear_bit(SP_TASK_PENDING, &pool->sp_flags);
- /* Normally we will wait up to 5 seconds for any required
- * cache information to be provided.
- */
- if (!test_bit(SP_CONGESTED, &pool->sp_flags))
- rqstp->rq_chandle.thread_wait = 5*HZ;
- else
- rqstp->rq_chandle.thread_wait = 1*HZ;
- trace_svc_xprt_dequeue(rqstp);
- return rqstp->rq_xprt;
}
static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
@@ -785,7 +758,7 @@ static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt
svc_xprt_received(newxpt);
}
-static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
+static void svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
{
struct svc_serv *serv = rqstp->rq_server;
int len = 0;
@@ -826,11 +799,35 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
len = xprt->xpt_ops->xpo_recvfrom(rqstp);
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+ if (len <= 0)
+ goto out;
+
+ trace_svc_xdr_recvfrom(&rqstp->rq_arg);
+
+ clear_bit(XPT_OLD, &xprt->xpt_flags);
+
+ rqstp->rq_chandle.defer = svc_defer;
+
+ if (serv->sv_stats)
+ serv->sv_stats->netcnt++;
+ percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived);
+ rqstp->rq_stime = ktime_get();
+ svc_process(rqstp);
} else
svc_xprt_received(xprt);
out:
- return len;
+ rqstp->rq_res.len = 0;
+ svc_xprt_release(rqstp);
+}
+
+static void svc_thread_wake_next(struct svc_rqst *rqstp)
+{
+ if (!svc_thread_should_sleep(rqstp))
+ /* More work pending after I dequeued some,
+ * wake another worker
+ */
+ svc_pool_wake_idle_thread(rqstp->rq_pool);
}
/**
@@ -843,44 +840,51 @@ out:
*/
void svc_recv(struct svc_rqst *rqstp)
{
- struct svc_xprt *xprt = NULL;
- struct svc_serv *serv = rqstp->rq_server;
- int len;
+ struct svc_pool *pool = rqstp->rq_pool;
if (!svc_alloc_arg(rqstp))
- goto out;
+ return;
- try_to_freeze();
- cond_resched();
- if (kthread_should_stop())
- goto out;
+ svc_thread_wait_for_work(rqstp);
- xprt = svc_get_next_xprt(rqstp);
- if (!xprt)
- goto out;
+ clear_bit(SP_TASK_PENDING, &pool->sp_flags);
- len = svc_handle_xprt(rqstp, xprt);
+ if (svc_thread_should_stop(rqstp)) {
+ svc_thread_wake_next(rqstp);
+ return;
+ }
- /* No data, incomplete (TCP) read, or accept() */
- if (len <= 0)
- goto out_release;
+ rqstp->rq_xprt = svc_xprt_dequeue(pool);
+ if (rqstp->rq_xprt) {
+ struct svc_xprt *xprt = rqstp->rq_xprt;
- trace_svc_xdr_recvfrom(&rqstp->rq_arg);
+ svc_thread_wake_next(rqstp);
+ /* Normally we will wait up to 5 seconds for any required
+ * cache information to be provided. When there are no
+ * idle threads, we reduce the wait time.
+ */
+ if (pool->sp_idle_threads.first)
+ rqstp->rq_chandle.thread_wait = 5 * HZ;
+ else
+ rqstp->rq_chandle.thread_wait = 1 * HZ;
- clear_bit(XPT_OLD, &xprt->xpt_flags);
+ trace_svc_xprt_dequeue(rqstp);
+ svc_handle_xprt(rqstp, xprt);
+ }
- rqstp->rq_chandle.defer = svc_defer;
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ if (svc_is_backchannel(rqstp)) {
+ struct svc_serv *serv = rqstp->rq_server;
+ struct rpc_rqst *req;
- if (serv->sv_stats)
- serv->sv_stats->netcnt++;
- percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived);
- rqstp->rq_stime = ktime_get();
- svc_process(rqstp);
-out:
- return;
-out_release:
- rqstp->rq_res.len = 0;
- svc_xprt_release(rqstp);
+ req = lwq_dequeue(&serv->sv_cb_list,
+ struct rpc_rqst, rq_bc_list);
+ if (req) {
+ svc_thread_wake_next(rqstp);
+ svc_process_bc(req, rqstp);
+ }
+ }
+#endif
}
EXPORT_SYMBOL_GPL(svc_recv);
@@ -890,7 +894,6 @@ EXPORT_SYMBOL_GPL(svc_recv);
void svc_drop(struct svc_rqst *rqstp)
{
trace_svc_drop(rqstp);
- svc_xprt_release(rqstp);
}
EXPORT_SYMBOL_GPL(svc_drop);
@@ -906,8 +909,6 @@ void svc_send(struct svc_rqst *rqstp)
int status;
xprt = rqstp->rq_xprt;
- if (!xprt)
- return;
/* calculate over-all length */
xb = &rqstp->rq_res;
@@ -920,7 +921,6 @@ void svc_send(struct svc_rqst *rqstp)
status = xprt->xpt_ops->xpo_sendto(rqstp);
trace_svc_send(rqstp, status);
- svc_xprt_release(rqstp);
}
/*
@@ -1031,7 +1031,6 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
spin_lock_bh(&serv->sv_lock);
list_del_init(&xprt->xpt_list);
- WARN_ON_ONCE(!list_empty(&xprt->xpt_ready));
if (test_bit(XPT_TEMP, &xprt->xpt_flags))
serv->sv_tmpcnt--;
spin_unlock_bh(&serv->sv_lock);
@@ -1082,36 +1081,26 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st
return ret;
}
-static struct svc_xprt *svc_dequeue_net(struct svc_serv *serv, struct net *net)
+static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
{
- struct svc_pool *pool;
struct svc_xprt *xprt;
- struct svc_xprt *tmp;
int i;
for (i = 0; i < serv->sv_nrpools; i++) {
- pool = &serv->sv_pools[i];
-
- spin_lock_bh(&pool->sp_lock);
- list_for_each_entry_safe(xprt, tmp, &pool->sp_sockets, xpt_ready) {
- if (xprt->xpt_net != net)
- continue;
- list_del_init(&xprt->xpt_ready);
- spin_unlock_bh(&pool->sp_lock);
- return xprt;
+ struct svc_pool *pool = &serv->sv_pools[i];
+ struct llist_node *q, **t1, *t2;
+
+ q = lwq_dequeue_all(&pool->sp_xprts);
+ lwq_for_each_safe(xprt, t1, t2, &q, xpt_ready) {
+ if (xprt->xpt_net == net) {
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ svc_delete_xprt(xprt);
+ xprt = NULL;
+ }
}
- spin_unlock_bh(&pool->sp_lock);
- }
- return NULL;
-}
-static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
-{
- struct svc_xprt *xprt;
-
- while ((xprt = svc_dequeue_net(serv, net))) {
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
- svc_delete_xprt(xprt);
+ if (q)
+ lwq_enqueue_batch(q, &pool->sp_xprts);
}
}
@@ -1373,29 +1362,36 @@ int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen)
}
EXPORT_SYMBOL_GPL(svc_xprt_names);
-
/*----------------------------------------------------------------------------*/
static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos)
{
unsigned int pidx = (unsigned int)*pos;
- struct svc_serv *serv = m->private;
+ struct svc_info *si = m->private;
dprintk("svc_pool_stats_start, *pidx=%u\n", pidx);
+ mutex_lock(si->mutex);
+
if (!pidx)
return SEQ_START_TOKEN;
- return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]);
+ if (!si->serv)
+ return NULL;
+ return pidx > si->serv->sv_nrpools ? NULL
+ : &si->serv->sv_pools[pidx - 1];
}
static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
{
struct svc_pool *pool = p;
- struct svc_serv *serv = m->private;
+ struct svc_info *si = m->private;
+ struct svc_serv *serv = si->serv;
dprintk("svc_pool_stats_next, *pos=%llu\n", *pos);
- if (p == SEQ_START_TOKEN) {
+ if (!serv) {
+ pool = NULL;
+ } else if (p == SEQ_START_TOKEN) {
pool = &serv->sv_pools[0];
} else {
unsigned int pidx = (pool - &serv->sv_pools[0]);
@@ -1410,6 +1406,9 @@ static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
static void svc_pool_stats_stop(struct seq_file *m, void *p)
{
+ struct svc_info *si = m->private;
+
+ mutex_unlock(si->mutex);
}
static int svc_pool_stats_show(struct seq_file *m, void *p)
@@ -1437,14 +1436,18 @@ static const struct seq_operations svc_pool_stats_seq_ops = {
.show = svc_pool_stats_show,
};
-int svc_pool_stats_open(struct svc_serv *serv, struct file *file)
+int svc_pool_stats_open(struct svc_info *info, struct file *file)
{
+ struct seq_file *seq;
int err;
err = seq_open(file, &svc_pool_stats_seq_ops);
- if (!err)
- ((struct seq_file *) file->private_data)->private = serv;
- return err;
+ if (err)
+ return err;
+ seq = file->private_data;
+ seq->private = info;
+
+ return 0;
}
EXPORT_SYMBOL(svc_pool_stats_open);
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index aa4429d0b810..1619211f0960 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -160,6 +160,22 @@ svc_auth_unregister(rpc_authflavor_t flavor)
}
EXPORT_SYMBOL_GPL(svc_auth_unregister);
+/**
+ * svc_auth_flavor - return RPC transaction's RPC_AUTH flavor
+ * @rqstp: RPC transaction context
+ *
+ * Returns an RPC flavor or GSS pseudoflavor.
+ */
+rpc_authflavor_t svc_auth_flavor(struct svc_rqst *rqstp)
+{
+ struct auth_ops *aops = rqstp->rq_authop;
+
+ if (!aops->pseudoflavor)
+ return aops->flavour;
+ return aops->pseudoflavor(rqstp);
+}
+EXPORT_SYMBOL_GPL(svc_auth_flavor);
+
/**************************************************
* 'auth_domains' are stored in a hash table indexed by name.
* When the last reference to an 'auth_domain' is dropped,
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 998687421fa6..545017a3daa4 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -717,12 +717,12 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
ARRAY_SIZE(rqstp->rq_bvec), xdr);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
- count, 0);
+ count, rqstp->rq_res.len);
err = sock_sendmsg(svsk->sk_sock, &msg);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
- count, 0);
+ count, rqstp->rq_res.len);
err = sock_sendmsg(svsk->sk_sock, &msg);
}
@@ -1049,18 +1049,14 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
struct rpc_rqst *req = NULL;
struct kvec *src, *dst;
__be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
- __be32 xid;
- __be32 calldir;
-
- xid = *p++;
- calldir = *p;
+ __be32 xid = *p;
if (!bc_xprt)
return -EAGAIN;
spin_lock(&bc_xprt->queue_lock);
req = xprt_lookup_rqst(bc_xprt, xid);
if (!req)
- goto unlock_notfound;
+ goto unlock_eagain;
memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
/*
@@ -1077,12 +1073,6 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
rqstp->rq_arg.len = 0;
spin_unlock(&bc_xprt->queue_lock);
return 0;
-unlock_notfound:
- printk(KERN_NOTICE
- "%s: Got unrecognized reply: "
- "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
- __func__, ntohl(calldir),
- bc_xprt, ntohl(xid));
unlock_eagain:
spin_unlock(&bc_xprt->queue_lock);
return -EAGAIN;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ab453ede54f0..af13fdfa6672 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -283,7 +283,7 @@ out_unlock:
xprt_clear_locked(xprt);
out_sleep:
task->tk_status = -EAGAIN;
- if (RPC_IS_SOFT(task))
+ if (RPC_IS_SOFT(task) || RPC_IS_SOFTCONN(task))
rpc_sleep_on_timeout(&xprt->sending, task, NULL,
xprt_request_timeout(req));
else
@@ -349,7 +349,7 @@ out_unlock:
xprt_clear_locked(xprt);
out_sleep:
task->tk_status = -EAGAIN;
- if (RPC_IS_SOFT(task))
+ if (RPC_IS_SOFT(task) || RPC_IS_SOFTCONN(task))
rpc_sleep_on_timeout(&xprt->sending, task, NULL,
xprt_request_timeout(req));
else
@@ -651,9 +651,9 @@ static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime)
jiffies + nsecs_to_jiffies(-delta);
}
-static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
+static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req,
+ const struct rpc_timeout *to)
{
- const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
unsigned long majortimeo = req->rq_timeout;
if (to->to_exponential)
@@ -665,9 +665,10 @@ static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
return majortimeo;
}
-static void xprt_reset_majortimeo(struct rpc_rqst *req)
+static void xprt_reset_majortimeo(struct rpc_rqst *req,
+ const struct rpc_timeout *to)
{
- req->rq_majortimeo += xprt_calc_majortimeo(req);
+ req->rq_majortimeo += xprt_calc_majortimeo(req, to);
}
static void xprt_reset_minortimeo(struct rpc_rqst *req)
@@ -675,7 +676,8 @@ static void xprt_reset_minortimeo(struct rpc_rqst *req)
req->rq_minortimeo += req->rq_timeout;
}
-static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
+static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req,
+ const struct rpc_timeout *to)
{
unsigned long time_init;
struct rpc_xprt *xprt = req->rq_xprt;
@@ -684,8 +686,9 @@ static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
time_init = jiffies;
else
time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
- req->rq_timeout = task->tk_client->cl_timeout->to_initval;
- req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
+
+ req->rq_timeout = to->to_initval;
+ req->rq_majortimeo = time_init + xprt_calc_majortimeo(req, to);
req->rq_minortimeo = time_init + req->rq_timeout;
}
@@ -713,7 +716,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
} else {
req->rq_timeout = to->to_initval;
req->rq_retries = 0;
- xprt_reset_majortimeo(req);
+ xprt_reset_majortimeo(req, to);
/* Reset the RTT counters == "slow start" */
spin_lock(&xprt->transport_lock);
rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
@@ -1886,7 +1889,7 @@ xprt_request_init(struct rpc_task *task)
req->rq_snd_buf.bvec = NULL;
req->rq_rcv_buf.bvec = NULL;
req->rq_release_snd_buf = NULL;
- xprt_init_majortimeo(task, req);
+ xprt_init_majortimeo(task, req, task->tk_client->cl_timeout);
trace_xprt_reserve(req);
}
@@ -1983,7 +1986,8 @@ void xprt_release(struct rpc_task *task)
#ifdef CONFIG_SUNRPC_BACKCHANNEL
void
-xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
+xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task,
+ const struct rpc_timeout *to)
{
struct xdr_buf *xbufp = &req->rq_snd_buf;
@@ -1996,6 +2000,13 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
*/
xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
xbufp->tail[0].iov_len;
+ /*
+ * Backchannel Replies are sent with !RPC_TASK_SOFT and
+ * RPC_TASK_NO_RETRANS_TIMEOUT. The major timeout setting
+ * affects only how long each Reply waits to be sent when
+ * a transport connection cannot be established.
+ */
+ xprt_init_majortimeo(task, req, to);
}
#endif
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 701250b305db..720d3ba742ec 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -284,7 +284,7 @@ struct rpc_xprt *_xprt_switch_find_current_entry(struct list_head *head,
if (cur == pos)
found = true;
if (found && ((find_active && xprt_is_active(pos)) ||
- (!find_active && xprt_is_active(pos))))
+ (!find_active && !xprt_is_active(pos))))
return pos;
}
return NULL;
@@ -336,8 +336,9 @@ struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi)
xprt_switch_find_current_entry_offline);
}
-bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
- const struct sockaddr *sap)
+static
+bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
+ const struct sockaddr *sap)
{
struct list_head *head;
struct rpc_xprt *pos;
@@ -356,6 +357,18 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
return false;
}
+bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
+ const struct sockaddr *sap)
+{
+ bool res;
+
+ rcu_read_lock();
+ res = __rpc_xprt_switch_has_addr(xps, sap);
+ rcu_read_unlock();
+
+ return res;
+}
+
static
struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
const struct rpc_xprt *cur, bool check_active)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index e4d84a13c566..8c817e755262 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -263,11 +263,9 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
/* Queue rqst for ULP's callback service */
bc_serv = xprt->bc_serv;
xprt_get(xprt);
- spin_lock(&bc_serv->sv_cb_lock);
- list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list);
- spin_unlock(&bc_serv->sv_cb_lock);
+ lwq_enqueue(&rqst->rq_bc_list, &bc_serv->sv_cb_list);
- wake_up(&bc_serv->sv_cb_waitq);
+ svc_pool_wake_idle_thread(&bc_serv->sv_pools[0]);
r_xprt->rx_stats.bcall_count++;
return;
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index f0d5eeed4c88..f86970733eb0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -256,28 +256,44 @@ out_err:
return rc;
}
+struct workqueue_struct *svcrdma_wq;
+
void svc_rdma_cleanup(void)
{
- dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
svc_unreg_xprt_class(&svc_rdma_class);
svc_rdma_proc_cleanup();
+ if (svcrdma_wq) {
+ struct workqueue_struct *wq = svcrdma_wq;
+
+ svcrdma_wq = NULL;
+ destroy_workqueue(wq);
+ }
+
+ dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
}
int svc_rdma_init(void)
{
+ struct workqueue_struct *wq;
int rc;
- dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
- dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
- dprintk("\tmax_requests : %u\n", svcrdma_max_requests);
- dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
- dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
+ wq = alloc_workqueue("svcrdma", WQ_UNBOUND, 0);
+ if (!wq)
+ return -ENOMEM;
rc = svc_rdma_proc_init();
- if (rc)
+ if (rc) {
+ destroy_workqueue(wq);
return rc;
+ }
- /* Register RDMA with the SVC transport switch */
+ svcrdma_wq = wq;
svc_reg_xprt_class(&svc_rdma_class);
+
+ dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
+ dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
+ dprintk("\tmax_requests : %u\n", svcrdma_max_requests);
+ dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
+ dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
return 0;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 7420a2c990c7..c9be6778643b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -76,15 +76,12 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
struct rpc_rqst *rqst,
struct svc_rdma_send_ctxt *sctxt)
{
- struct svc_rdma_recv_ctxt *rctxt;
+ struct svc_rdma_pcl empty_pcl;
int ret;
- rctxt = svc_rdma_recv_ctxt_get(rdma);
- if (!rctxt)
- return -EIO;
-
- ret = svc_rdma_map_reply_msg(rdma, sctxt, rctxt, &rqst->rq_snd_buf);
- svc_rdma_recv_ctxt_put(rdma, rctxt);
+ pcl_init(&empty_pcl);
+ ret = svc_rdma_map_reply_msg(rdma, sctxt, &empty_pcl, &empty_pcl,
+ &rqst->rq_snd_buf);
if (ret < 0)
return -EIO;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 85c8bcaebb80..d72953f29258 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -115,13 +115,6 @@ svc_rdma_next_recv_ctxt(struct list_head *list)
rc_list);
}
-static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
- struct rpc_rdma_cid *cid)
-{
- cid->ci_queue_id = rdma->sc_rq_cq->res.id;
- cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
-}
-
static struct svc_rdma_recv_ctxt *
svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
{
@@ -130,7 +123,7 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
dma_addr_t addr;
void *buffer;
- ctxt = kmalloc_node(sizeof(*ctxt), GFP_KERNEL, node);
+ ctxt = kzalloc_node(sizeof(*ctxt), GFP_KERNEL, node);
if (!ctxt)
goto fail0;
buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
@@ -156,6 +149,7 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
ctxt->rc_recv_buf = buffer;
+ svc_rdma_cc_init(rdma, &ctxt->rc_cc);
return ctxt;
fail2:
@@ -204,18 +198,11 @@ struct svc_rdma_recv_ctxt *svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
node = llist_del_first(&rdma->sc_recv_ctxts);
if (!node)
- goto out_empty;
- ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
+ return NULL;
-out:
+ ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
ctxt->rc_page_count = 0;
return ctxt;
-
-out_empty:
- ctxt = svc_rdma_recv_ctxt_alloc(rdma);
- if (!ctxt)
- return NULL;
- goto out;
}
/**
@@ -227,6 +214,13 @@ out_empty:
void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt)
{
+ svc_rdma_cc_release(rdma, &ctxt->rc_cc, DMA_FROM_DEVICE);
+
+ /* @rc_page_count is normally zero here, but error flows
+ * can leave pages in @rc_pages.
+ */
+ release_pages(ctxt->rc_pages, ctxt->rc_page_count);
+
pcl_free(&ctxt->rc_call_pcl);
pcl_free(&ctxt->rc_read_pcl);
pcl_free(&ctxt->rc_write_pcl);
@@ -271,13 +265,13 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
if (!ctxt)
break;
- trace_svcrdma_post_recv(ctxt);
+ trace_svcrdma_post_recv(&ctxt->rc_cid);
ctxt->rc_recv_wr.next = recv_chain;
recv_chain = &ctxt->rc_recv_wr;
rdma->sc_pending_recvs++;
}
if (!recv_chain)
- return false;
+ return true;
ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr);
if (ret)
@@ -301,10 +295,27 @@ err_free:
* svc_rdma_post_recvs - Post initial set of Recv WRs
* @rdma: fresh svcxprt_rdma
*
- * Returns true if successful, otherwise false.
+ * Return values:
+ * %true: Receive Queue initialization successful
+ * %false: memory allocation or DMA error
*/
bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
{
+ unsigned int total;
+
+ /* For each credit, allocate enough recv_ctxts for one
+ * posted Receive and one RPC in process.
+ */
+ total = (rdma->sc_max_requests * 2) + rdma->sc_recv_batch;
+ while (total--) {
+ struct svc_rdma_recv_ctxt *ctxt;
+
+ ctxt = svc_rdma_recv_ctxt_alloc(rdma);
+ if (!ctxt)
+ return false;
+ llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
+ }
+
return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests);
}
@@ -373,6 +384,10 @@ void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;
+ while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_read_complete_q))) {
+ list_del(&ctxt->rc_list);
+ svc_rdma_recv_ctxt_put(rdma, ctxt);
+ }
while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) {
list_del(&ctxt->rc_list);
svc_rdma_recv_ctxt_put(rdma, ctxt);
@@ -754,6 +769,122 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
return true;
}
+/* Finish constructing the RPC Call message in rqstp::rq_arg.
+ *
+ * The incoming RPC/RDMA message is an RDMA_MSG type message
+ * with a single Read chunk (only the upper layer data payload
+ * was conveyed via RDMA Read).
+ */
+static void svc_rdma_read_complete_one(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ struct svc_rdma_chunk *chunk = pcl_first_chunk(&ctxt->rc_read_pcl);
+ struct xdr_buf *buf = &rqstp->rq_arg;
+ unsigned int length;
+
+ /* Split the Receive buffer between the head and tail
+ * buffers at Read chunk's position. XDR roundup of the
+ * chunk is not included in either the pagelist or in
+ * the tail.
+ */
+ buf->tail[0].iov_base = buf->head[0].iov_base + chunk->ch_position;
+ buf->tail[0].iov_len = buf->head[0].iov_len - chunk->ch_position;
+ buf->head[0].iov_len = chunk->ch_position;
+
+ /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2).
+ *
+ * If the client already rounded up the chunk length, the
+ * length does not change. Otherwise, the length of the page
+ * list is increased to include XDR round-up.
+ *
+ * Currently these chunks always start at page offset 0,
+ * thus the rounded-up length never crosses a page boundary.
+ */
+ buf->pages = &rqstp->rq_pages[0];
+ length = xdr_align_size(chunk->ch_length);
+ buf->page_len = length;
+ buf->len += length;
+ buf->buflen += length;
+}
+
+/* Finish constructing the RPC Call message in rqstp::rq_arg.
+ *
+ * The incoming RPC/RDMA message is an RDMA_MSG type message
+ * with payload in multiple Read chunks and no PZRC.
+ */
+static void svc_rdma_read_complete_multiple(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ struct xdr_buf *buf = &rqstp->rq_arg;
+
+ buf->len += ctxt->rc_readbytes;
+ buf->buflen += ctxt->rc_readbytes;
+
+ buf->head[0].iov_base = page_address(rqstp->rq_pages[0]);
+ buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, ctxt->rc_readbytes);
+ buf->pages = &rqstp->rq_pages[1];
+ buf->page_len = ctxt->rc_readbytes - buf->head[0].iov_len;
+}
+
+/* Finish constructing the RPC Call message in rqstp::rq_arg.
+ *
+ * The incoming RPC/RDMA message is an RDMA_NOMSG type message
+ * (the RPC message body was conveyed via RDMA Read).
+ */
+static void svc_rdma_read_complete_pzrc(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ struct xdr_buf *buf = &rqstp->rq_arg;
+
+ buf->len += ctxt->rc_readbytes;
+ buf->buflen += ctxt->rc_readbytes;
+
+ buf->head[0].iov_base = page_address(rqstp->rq_pages[0]);
+ buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, ctxt->rc_readbytes);
+ buf->pages = &rqstp->rq_pages[1];
+ buf->page_len = ctxt->rc_readbytes - buf->head[0].iov_len;
+}
+
+static noinline void svc_rdma_read_complete(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ unsigned int i;
+
+ /* Transfer the Read chunk pages into @rqstp.rq_pages, replacing
+ * the rq_pages that were already allocated for this rqstp.
+ */
+ release_pages(rqstp->rq_respages, ctxt->rc_page_count);
+ for (i = 0; i < ctxt->rc_page_count; i++)
+ rqstp->rq_pages[i] = ctxt->rc_pages[i];
+
+ /* Update @rqstp's result send buffer to start after the
+ * last page in the RDMA Read payload.
+ */
+ rqstp->rq_respages = &rqstp->rq_pages[ctxt->rc_page_count];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+ /* Prevent svc_rdma_recv_ctxt_put() from releasing the
+ * pages in ctxt::rc_pages a second time.
+ */
+ ctxt->rc_page_count = 0;
+
+ /* Finish constructing the RPC Call message. The exact
+ * procedure for that depends on what kind of RPC/RDMA
+ * chunks were provided by the client.
+ */
+ rqstp->rq_arg = ctxt->rc_saved_arg;
+ if (pcl_is_empty(&ctxt->rc_call_pcl)) {
+ if (ctxt->rc_read_pcl.cl_count == 1)
+ svc_rdma_read_complete_one(rqstp, ctxt);
+ else
+ svc_rdma_read_complete_multiple(rqstp, ctxt);
+ } else {
+ svc_rdma_read_complete_pzrc(rqstp, ctxt);
+ }
+
+ trace_svcrdma_read_finished(&ctxt->rc_cid);
+}
+
/**
* svc_rdma_recvfrom - Receive an RPC call
* @rqstp: request structure into which to receive an RPC Call
@@ -798,8 +929,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
- ctxt = NULL;
spin_lock(&rdma_xprt->sc_rq_dto_lock);
+ ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);
+ if (ctxt) {
+ list_del(&ctxt->rc_list);
+ spin_unlock(&rdma_xprt->sc_rq_dto_lock);
+ svc_xprt_received(xprt);
+ svc_rdma_read_complete(rqstp, ctxt);
+ goto complete;
+ }
ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q);
if (ctxt)
list_del(&ctxt->rc_list);
@@ -831,12 +969,10 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
if (!pcl_is_empty(&ctxt->rc_read_pcl) ||
- !pcl_is_empty(&ctxt->rc_call_pcl)) {
- ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt);
- if (ret < 0)
- goto out_readfail;
- }
+ !pcl_is_empty(&ctxt->rc_call_pcl))
+ goto out_readlist;
+complete:
rqstp->rq_xprt_ctxt = ctxt;
rqstp->rq_prot = IPPROTO_MAX;
svc_xprt_copy_addrs(rqstp, xprt);
@@ -848,11 +984,23 @@ out_err:
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return 0;
-out_readfail:
- if (ret == -EINVAL)
- svc_rdma_send_error(rdma_xprt, ctxt, ret);
- svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
- return ret;
+out_readlist:
+ /* This @rqstp is about to be recycled. Save the work
+ * already done constructing the Call message in rq_arg
+ * so it can be restored when the RDMA Reads have
+ * completed.
+ */
+ ctxt->rc_saved_arg = rqstp->rq_arg;
+
+ ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt);
+ if (ret < 0) {
+ if (ret == -EINVAL)
+ svc_rdma_send_error(rdma_xprt, ctxt, ret);
+ svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
+ svc_xprt_deferred_close(xprt);
+ return ret;
+ }
+ return 0;
out_backchannel:
svc_rdma_handle_bc_reply(rqstp, ctxt);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index e460e25a1d6d..c00fcce61d1e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -39,6 +39,7 @@ struct svc_rdma_rw_ctxt {
struct list_head rw_list;
struct rdma_rw_ctx rw_ctx;
unsigned int rw_nents;
+ unsigned int rw_first_sgl_nents;
struct sg_table rw_sg_table;
struct scatterlist rw_first_sgl[];
};
@@ -53,6 +54,8 @@ svc_rdma_next_ctxt(struct list_head *list)
static struct svc_rdma_rw_ctxt *
svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
{
+ struct ib_device *dev = rdma->sc_cm_id->device;
+ unsigned int first_sgl_nents = dev->attrs.max_send_sge;
struct svc_rdma_rw_ctxt *ctxt;
struct llist_node *node;
@@ -62,32 +65,33 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
if (node) {
ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
} else {
- ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
- GFP_KERNEL, ibdev_to_node(rdma->sc_cm_id->device));
+ ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, first_sgl_nents),
+ GFP_KERNEL, ibdev_to_node(dev));
if (!ctxt)
goto out_noctx;
INIT_LIST_HEAD(&ctxt->rw_list);
+ ctxt->rw_first_sgl_nents = first_sgl_nents;
}
ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl;
if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges,
ctxt->rw_sg_table.sgl,
- SG_CHUNK_SIZE))
+ first_sgl_nents))
goto out_free;
return ctxt;
out_free:
kfree(ctxt);
out_noctx:
- trace_svcrdma_no_rwctx_err(rdma, sges);
+ trace_svcrdma_rwctx_empty(rdma, sges);
return NULL;
}
static void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt,
struct llist_head *list)
{
- sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&ctxt->rw_sg_table, ctxt->rw_first_sgl_nents);
llist_add(&ctxt->rw_node, list);
}
@@ -135,57 +139,40 @@ static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma,
ctxt->rw_sg_table.sgl, ctxt->rw_nents,
0, offset, handle, direction);
if (unlikely(ret < 0)) {
+ trace_svcrdma_dma_map_rw_err(rdma, offset, handle,
+ ctxt->rw_nents, ret);
svc_rdma_put_rw_ctxt(rdma, ctxt);
- trace_svcrdma_dma_map_rw_err(rdma, ctxt->rw_nents, ret);
}
return ret;
}
-/* A chunk context tracks all I/O for moving one Read or Write
- * chunk. This is a set of rdma_rw's that handle data movement
- * for all segments of one chunk.
- *
- * These are small, acquired with a single allocator call, and
- * no more than one is needed per chunk. They are allocated on
- * demand, and not cached.
+/**
+ * svc_rdma_cc_init - Initialize an svc_rdma_chunk_ctxt
+ * @rdma: controlling transport instance
+ * @cc: svc_rdma_chunk_ctxt to be initialized
*/
-struct svc_rdma_chunk_ctxt {
- struct rpc_rdma_cid cc_cid;
- struct ib_cqe cc_cqe;
- struct svcxprt_rdma *cc_rdma;
- struct list_head cc_rwctxts;
- ktime_t cc_posttime;
- int cc_sqecount;
- enum ib_wc_status cc_status;
- struct completion cc_done;
-};
-
-static void svc_rdma_cc_cid_init(struct svcxprt_rdma *rdma,
- struct rpc_rdma_cid *cid)
+void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc)
{
- cid->ci_queue_id = rdma->sc_sq_cq->res.id;
- cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
-}
+ struct rpc_rdma_cid *cid = &cc->cc_cid;
-static void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
- struct svc_rdma_chunk_ctxt *cc)
-{
- svc_rdma_cc_cid_init(rdma, &cc->cc_cid);
- cc->cc_rdma = rdma;
+ if (unlikely(!cid->ci_completion_id))
+ svc_rdma_send_cid_init(rdma, cid);
INIT_LIST_HEAD(&cc->cc_rwctxts);
cc->cc_sqecount = 0;
}
-/*
- * The consumed rw_ctx's are cleaned and placed on a local llist so
- * that only one atomic llist operation is needed to put them all
- * back on the free list.
+/**
+ * svc_rdma_cc_release - Release resources held by a svc_rdma_chunk_ctxt
+ * @rdma: controlling transport instance
+ * @cc: svc_rdma_chunk_ctxt to be released
+ * @dir: DMA direction
*/
-static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
- enum dma_data_direction dir)
+void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc,
+ enum dma_data_direction dir)
{
- struct svcxprt_rdma *rdma = cc->cc_rdma;
struct llist_node *first, *last;
struct svc_rdma_rw_ctxt *ctxt;
LLIST_HEAD(free);
@@ -215,6 +202,8 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
* - Stores arguments for the SGL constructor functions
*/
struct svc_rdma_write_info {
+ struct svcxprt_rdma *wi_rdma;
+
const struct svc_rdma_chunk *wi_chunk;
/* write state of this chunk */
@@ -227,6 +216,7 @@ struct svc_rdma_write_info {
unsigned int wi_next_off;
struct svc_rdma_chunk_ctxt wi_cc;
+ struct work_struct wi_work;
};
static struct svc_rdma_write_info *
@@ -235,25 +225,33 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma,
{
struct svc_rdma_write_info *info;
- info = kmalloc_node(sizeof(*info), GFP_KERNEL,
+ info = kzalloc_node(sizeof(*info), GFP_KERNEL,
ibdev_to_node(rdma->sc_cm_id->device));
if (!info)
return info;
+ info->wi_rdma = rdma;
info->wi_chunk = chunk;
- info->wi_seg_off = 0;
- info->wi_seg_no = 0;
svc_rdma_cc_init(rdma, &info->wi_cc);
info->wi_cc.cc_cqe.done = svc_rdma_write_done;
return info;
}
-static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
+static void svc_rdma_write_info_free_async(struct work_struct *work)
{
- svc_rdma_cc_release(&info->wi_cc, DMA_TO_DEVICE);
+ struct svc_rdma_write_info *info;
+
+ info = container_of(work, struct svc_rdma_write_info, wi_work);
+ svc_rdma_cc_release(info->wi_rdma, &info->wi_cc, DMA_TO_DEVICE);
kfree(info);
}
+static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
+{
+ INIT_WORK(&info->wi_work, svc_rdma_write_info_free_async);
+ queue_work(svcrdma_wq, &info->wi_work);
+}
+
/**
* svc_rdma_write_done - Write chunk completion
* @cq: controlling Completion Queue
@@ -263,16 +261,16 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
*/
static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct svcxprt_rdma *rdma = cq->cq_context;
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
- struct svcxprt_rdma *rdma = cc->cc_rdma;
struct svc_rdma_write_info *info =
container_of(cc, struct svc_rdma_write_info, wi_cc);
switch (wc->status) {
case IB_WC_SUCCESS:
- trace_svcrdma_wc_write(wc, &cc->cc_cid);
+ trace_svcrdma_wc_write(&cc->cc_cid);
break;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
@@ -289,39 +287,6 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
svc_rdma_write_info_free(info);
}
-/* State for pulling a Read chunk.
- */
-struct svc_rdma_read_info {
- struct svc_rqst *ri_rqst;
- struct svc_rdma_recv_ctxt *ri_readctxt;
- unsigned int ri_pageno;
- unsigned int ri_pageoff;
- unsigned int ri_totalbytes;
-
- struct svc_rdma_chunk_ctxt ri_cc;
-};
-
-static struct svc_rdma_read_info *
-svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma)
-{
- struct svc_rdma_read_info *info;
-
- info = kmalloc_node(sizeof(*info), GFP_KERNEL,
- ibdev_to_node(rdma->sc_cm_id->device));
- if (!info)
- return info;
-
- svc_rdma_cc_init(rdma, &info->ri_cc);
- info->ri_cc.cc_cqe.done = svc_rdma_wc_read_done;
- return info;
-}
-
-static void svc_rdma_read_info_free(struct svc_rdma_read_info *info)
-{
- svc_rdma_cc_release(&info->ri_cc, DMA_FROM_DEVICE);
- kfree(info);
-}
-
/**
* svc_rdma_wc_read_done - Handle completion of an RDMA Read ctx
* @cq: controlling Completion Queue
@@ -330,17 +295,27 @@ static void svc_rdma_read_info_free(struct svc_rdma_read_info *info)
*/
static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct svcxprt_rdma *rdma = cq->cq_context;
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
- struct svc_rdma_read_info *info;
+ struct svc_rdma_recv_ctxt *ctxt;
+ svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
+
+ ctxt = container_of(cc, struct svc_rdma_recv_ctxt, rc_cc);
switch (wc->status) {
case IB_WC_SUCCESS:
- info = container_of(cc, struct svc_rdma_read_info, ri_cc);
- trace_svcrdma_wc_read(wc, &cc->cc_cid, info->ri_totalbytes,
+ trace_svcrdma_wc_read(wc, &cc->cc_cid, ctxt->rc_readbytes,
cc->cc_posttime);
- break;
+
+ spin_lock(&rdma->sc_rq_dto_lock);
+ list_add_tail(&ctxt->rc_list, &rdma->sc_read_complete_q);
+ /* the unlock pairs with the smp_rmb in svc_xprt_ready */
+ set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
+ spin_unlock(&rdma->sc_rq_dto_lock);
+ svc_xprt_enqueue(&rdma->sc_xprt);
+ return;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_read_flush(wc, &cc->cc_cid);
break;
@@ -348,10 +323,13 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_read_err(wc, &cc->cc_cid);
}
- svc_rdma_wake_send_waiters(cc->cc_rdma, cc->cc_sqecount);
- cc->cc_status = wc->status;
- complete(&cc->cc_done);
- return;
+ /* The RDMA Read has flushed, so the incoming RPC message
+ * cannot be constructed and must be dropped. Signal the
+ * loss to the client by closing the connection.
+ */
+ svc_rdma_cc_release(rdma, cc, DMA_FROM_DEVICE);
+ svc_rdma_recv_ctxt_put(rdma, ctxt);
+ svc_xprt_deferred_close(&rdma->sc_xprt);
}
/*
@@ -360,9 +338,9 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
* even if one or more WRs are flushed. This is true when posting
* an rdma_rw_ctx or when posting a single signaled WR.
*/
-static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
+static int svc_rdma_post_chunk_ctxt(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc)
{
- struct svcxprt_rdma *rdma = cc->cc_rdma;
struct ib_send_wr *first_wr;
const struct ib_send_wr *bad_wr;
struct list_head *tmp;
@@ -396,14 +374,14 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
}
percpu_counter_inc(&svcrdma_stat_sq_starve);
- trace_svcrdma_sq_full(rdma);
+ trace_svcrdma_sq_full(rdma, &cc->cc_cid);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wait_event(rdma->sc_send_wait,
atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount);
- trace_svcrdma_sq_retry(rdma);
+ trace_svcrdma_sq_retry(rdma, &cc->cc_cid);
} while (1);
- trace_svcrdma_sq_post_err(rdma, ret);
+ trace_svcrdma_sq_post_err(rdma, &cc->cc_cid, ret);
svc_xprt_deferred_close(&rdma->sc_xprt);
/* If even one was posted, there will be a completion. */
@@ -473,7 +451,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
unsigned int remaining)
{
struct svc_rdma_chunk_ctxt *cc = &info->wi_cc;
- struct svcxprt_rdma *rdma = cc->cc_rdma;
+ struct svcxprt_rdma *rdma = info->wi_rdma;
const struct svc_rdma_segment *seg;
struct svc_rdma_rw_ctxt *ctxt;
int ret;
@@ -516,7 +494,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
return 0;
out_overflow:
- trace_svcrdma_small_wrch_err(rdma, remaining, info->wi_seg_no,
+ trace_svcrdma_small_wrch_err(&cc->cc_cid, remaining, info->wi_seg_no,
info->wi_chunk->ch_segcount);
return -E2BIG;
}
@@ -633,7 +611,7 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
goto out_err;
trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
- ret = svc_rdma_post_chunk_ctxt(cc);
+ ret = svc_rdma_post_chunk_ctxt(rdma, cc);
if (ret < 0)
goto out_err;
return xdr->len;
@@ -680,7 +658,7 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
goto out_err;
trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount);
- ret = svc_rdma_post_chunk_ctxt(cc);
+ ret = svc_rdma_post_chunk_ctxt(rdma, cc);
if (ret < 0)
goto out_err;
@@ -693,7 +671,8 @@ out_err:
/**
* svc_rdma_build_read_segment - Build RDMA Read WQEs to pull one RDMA segment
- * @info: context for ongoing I/O
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
* @segment: co-ordinates of remote memory to be read
*
* Returns:
@@ -702,20 +681,20 @@ out_err:
* %-ENOMEM: allocating a local resources failed
* %-EIO: a DMA mapping error occurred
*/
-static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
+static int svc_rdma_build_read_segment(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head,
const struct svc_rdma_segment *segment)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
- struct svc_rdma_chunk_ctxt *cc = &info->ri_cc;
- struct svc_rqst *rqstp = info->ri_rqst;
+ struct svcxprt_rdma *rdma = svc_rdma_rqst_rdma(rqstp);
+ struct svc_rdma_chunk_ctxt *cc = &head->rc_cc;
unsigned int sge_no, seg_len, len;
struct svc_rdma_rw_ctxt *ctxt;
struct scatterlist *sg;
int ret;
len = segment->rs_length;
- sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT;
- ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no);
+ sge_no = PAGE_ALIGN(head->rc_pageoff + len) >> PAGE_SHIFT;
+ ctxt = svc_rdma_get_rw_ctxt(rdma, sge_no);
if (!ctxt)
return -ENOMEM;
ctxt->rw_nents = sge_no;
@@ -723,29 +702,27 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
sg = ctxt->rw_sg_table.sgl;
for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) {
seg_len = min_t(unsigned int, len,
- PAGE_SIZE - info->ri_pageoff);
+ PAGE_SIZE - head->rc_pageoff);
- if (!info->ri_pageoff)
+ if (!head->rc_pageoff)
head->rc_page_count++;
- sg_set_page(sg, rqstp->rq_pages[info->ri_pageno],
- seg_len, info->ri_pageoff);
+ sg_set_page(sg, rqstp->rq_pages[head->rc_curpage],
+ seg_len, head->rc_pageoff);
sg = sg_next(sg);
- info->ri_pageoff += seg_len;
- if (info->ri_pageoff == PAGE_SIZE) {
- info->ri_pageno++;
- info->ri_pageoff = 0;
+ head->rc_pageoff += seg_len;
+ if (head->rc_pageoff == PAGE_SIZE) {
+ head->rc_curpage++;
+ head->rc_pageoff = 0;
}
len -= seg_len;
- /* Safety check */
- if (len &&
- &rqstp->rq_pages[info->ri_pageno + 1] > rqstp->rq_page_end)
+ if (len && ((head->rc_curpage + 1) > ARRAY_SIZE(rqstp->rq_pages)))
goto out_overrun;
}
- ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, segment->rs_offset,
+ ret = svc_rdma_rw_ctx_init(rdma, ctxt, segment->rs_offset,
segment->rs_handle, DMA_FROM_DEVICE);
if (ret < 0)
return -EIO;
@@ -756,13 +733,14 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
return 0;
out_overrun:
- trace_svcrdma_page_overrun_err(cc->cc_rdma, rqstp, info->ri_pageno);
+ trace_svcrdma_page_overrun_err(&cc->cc_cid, head->rc_curpage);
return -EINVAL;
}
/**
* svc_rdma_build_read_chunk - Build RDMA Read WQEs to pull one RDMA chunk
- * @info: context for ongoing I/O
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
* @chunk: Read chunk to pull
*
* Return values:
@@ -771,7 +749,8 @@ out_overrun:
* %-ENOMEM: allocating a local resources failed
* %-EIO: a DMA mapping error occurred
*/
-static int svc_rdma_build_read_chunk(struct svc_rdma_read_info *info,
+static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head,
const struct svc_rdma_chunk *chunk)
{
const struct svc_rdma_segment *segment;
@@ -779,56 +758,56 @@ static int svc_rdma_build_read_chunk(struct svc_rdma_read_info *info,
ret = -EINVAL;
pcl_for_each_segment(segment, chunk) {
- ret = svc_rdma_build_read_segment(info, segment);
+ ret = svc_rdma_build_read_segment(rqstp, head, segment);
if (ret < 0)
break;
- info->ri_totalbytes += segment->rs_length;
+ head->rc_readbytes += segment->rs_length;
}
return ret;
}
/**
* svc_rdma_copy_inline_range - Copy part of the inline content into pages
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
* @offset: offset into the Receive buffer of region to copy
* @remaining: length of region to copy
*
* Take a page at a time from rqstp->rq_pages and copy the inline
* content from the Receive buffer into that page. Update
- * info->ri_pageno and info->ri_pageoff so that the next RDMA Read
+ * head->rc_curpage and head->rc_pageoff so that the next RDMA Read
* result will land contiguously with the copied content.
*
* Return values:
* %0: Inline content was successfully copied
* %-EINVAL: offset or length was incorrect
*/
-static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info,
+static int svc_rdma_copy_inline_range(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head,
unsigned int offset,
unsigned int remaining)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
unsigned char *dst, *src = head->rc_recv_buf;
- struct svc_rqst *rqstp = info->ri_rqst;
unsigned int page_no, numpages;
- numpages = PAGE_ALIGN(info->ri_pageoff + remaining) >> PAGE_SHIFT;
+ numpages = PAGE_ALIGN(head->rc_pageoff + remaining) >> PAGE_SHIFT;
for (page_no = 0; page_no < numpages; page_no++) {
unsigned int page_len;
page_len = min_t(unsigned int, remaining,
- PAGE_SIZE - info->ri_pageoff);
+ PAGE_SIZE - head->rc_pageoff);
- if (!info->ri_pageoff)
+ if (!head->rc_pageoff)
head->rc_page_count++;
- dst = page_address(rqstp->rq_pages[info->ri_pageno]);
- memcpy(dst + info->ri_pageno, src + offset, page_len);
+ dst = page_address(rqstp->rq_pages[head->rc_curpage]);
+ memcpy(dst + head->rc_curpage, src + offset, page_len);
- info->ri_totalbytes += page_len;
- info->ri_pageoff += page_len;
- if (info->ri_pageoff == PAGE_SIZE) {
- info->ri_pageno++;
- info->ri_pageoff = 0;
+ head->rc_readbytes += page_len;
+ head->rc_pageoff += page_len;
+ if (head->rc_pageoff == PAGE_SIZE) {
+ head->rc_curpage++;
+ head->rc_pageoff = 0;
}
remaining -= page_len;
offset += page_len;
@@ -839,7 +818,8 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info,
/**
* svc_rdma_read_multiple_chunks - Construct RDMA Reads to pull data item Read chunks
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
*
* The chunk data lands in rqstp->rq_arg as a series of contiguous pages,
* like an incoming TCP call.
@@ -851,11 +831,11 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info,
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *info)
+static noinline int
+svc_rdma_read_multiple_chunks(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
const struct svc_rdma_pcl *pcl = &head->rc_read_pcl;
- struct xdr_buf *buf = &info->ri_rqst->rq_arg;
struct svc_rdma_chunk *chunk, *next;
unsigned int start, length;
int ret;
@@ -863,12 +843,12 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
start = 0;
chunk = pcl_first_chunk(pcl);
length = chunk->ch_position;
- ret = svc_rdma_copy_inline_range(info, start, length);
+ ret = svc_rdma_copy_inline_range(rqstp, head, start, length);
if (ret < 0)
return ret;
pcl_for_each_chunk(chunk, pcl) {
- ret = svc_rdma_build_read_chunk(info, chunk);
+ ret = svc_rdma_build_read_chunk(rqstp, head, chunk);
if (ret < 0)
return ret;
@@ -877,31 +857,21 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
break;
start += length;
- length = next->ch_position - info->ri_totalbytes;
- ret = svc_rdma_copy_inline_range(info, start, length);
+ length = next->ch_position - head->rc_readbytes;
+ ret = svc_rdma_copy_inline_range(rqstp, head, start, length);
if (ret < 0)
return ret;
}
start += length;
length = head->rc_byte_len - start;
- ret = svc_rdma_copy_inline_range(info, start, length);
- if (ret < 0)
- return ret;
-
- buf->len += info->ri_totalbytes;
- buf->buflen += info->ri_totalbytes;
-
- buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]);
- buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes);
- buf->pages = &info->ri_rqst->rq_pages[1];
- buf->page_len = info->ri_totalbytes - buf->head[0].iov_len;
- return 0;
+ return svc_rdma_copy_inline_range(rqstp, head, start, length);
}
/**
* svc_rdma_read_data_item - Construct RDMA Reads to pull data item Read chunks
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
*
* The chunk data lands in the page list of rqstp->rq_arg.pages.
*
@@ -916,50 +886,17 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static int svc_rdma_read_data_item(struct svc_rdma_read_info *info)
+static int svc_rdma_read_data_item(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
- struct xdr_buf *buf = &info->ri_rqst->rq_arg;
- struct svc_rdma_chunk *chunk;
- unsigned int length;
- int ret;
-
- chunk = pcl_first_chunk(&head->rc_read_pcl);
- ret = svc_rdma_build_read_chunk(info, chunk);
- if (ret < 0)
- goto out;
-
- /* Split the Receive buffer between the head and tail
- * buffers at Read chunk's position. XDR roundup of the
- * chunk is not included in either the pagelist or in
- * the tail.
- */
- buf->tail[0].iov_base = buf->head[0].iov_base + chunk->ch_position;
- buf->tail[0].iov_len = buf->head[0].iov_len - chunk->ch_position;
- buf->head[0].iov_len = chunk->ch_position;
-
- /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2).
- *
- * If the client already rounded up the chunk length, the
- * length does not change. Otherwise, the length of the page
- * list is increased to include XDR round-up.
- *
- * Currently these chunks always start at page offset 0,
- * thus the rounded-up length never crosses a page boundary.
- */
- buf->pages = &info->ri_rqst->rq_pages[0];
- length = xdr_align_size(chunk->ch_length);
- buf->page_len = length;
- buf->len += length;
- buf->buflen += length;
-
-out:
- return ret;
+ return svc_rdma_build_read_chunk(rqstp, head,
+ pcl_first_chunk(&head->rc_read_pcl));
}
/**
- * svc_rdma_read_chunk_range - Build RDMA Read WQEs for portion of a chunk
- * @info: context for RDMA Reads
+ * svc_rdma_read_chunk_range - Build RDMA Read WRs for portion of a chunk
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
* @chunk: parsed Call chunk to pull
* @offset: offset of region to pull
* @length: length of region to pull
@@ -971,7 +908,8 @@ out:
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info,
+static int svc_rdma_read_chunk_range(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head,
const struct svc_rdma_chunk *chunk,
unsigned int offset, unsigned int length)
{
@@ -991,11 +929,11 @@ static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info,
dummy.rs_length = min_t(u32, length, segment->rs_length) - offset;
dummy.rs_offset = segment->rs_offset + offset;
- ret = svc_rdma_build_read_segment(info, &dummy);
+ ret = svc_rdma_build_read_segment(rqstp, head, &dummy);
if (ret < 0)
break;
- info->ri_totalbytes += dummy.rs_length;
+ head->rc_readbytes += dummy.rs_length;
length -= dummy.rs_length;
offset = 0;
}
@@ -1004,7 +942,8 @@ static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info,
/**
* svc_rdma_read_call_chunk - Build RDMA Read WQEs to pull a Long Message
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
*
* Return values:
* %0: RDMA Read WQEs were successfully built
@@ -1013,9 +952,9 @@ static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info,
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
+static int svc_rdma_read_call_chunk(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
const struct svc_rdma_chunk *call_chunk =
pcl_first_chunk(&head->rc_call_pcl);
const struct svc_rdma_pcl *pcl = &head->rc_read_pcl;
@@ -1024,17 +963,18 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
int ret;
if (pcl_is_empty(pcl))
- return svc_rdma_build_read_chunk(info, call_chunk);
+ return svc_rdma_build_read_chunk(rqstp, head, call_chunk);
start = 0;
chunk = pcl_first_chunk(pcl);
length = chunk->ch_position;
- ret = svc_rdma_read_chunk_range(info, call_chunk, start, length);
+ ret = svc_rdma_read_chunk_range(rqstp, head, call_chunk,
+ start, length);
if (ret < 0)
return ret;
pcl_for_each_chunk(chunk, pcl) {
- ret = svc_rdma_build_read_chunk(info, chunk);
+ ret = svc_rdma_build_read_chunk(rqstp, head, chunk);
if (ret < 0)
return ret;
@@ -1043,8 +983,8 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
break;
start += length;
- length = next->ch_position - info->ri_totalbytes;
- ret = svc_rdma_read_chunk_range(info, call_chunk,
+ length = next->ch_position - head->rc_readbytes;
+ ret = svc_rdma_read_chunk_range(rqstp, head, call_chunk,
start, length);
if (ret < 0)
return ret;
@@ -1052,12 +992,14 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
start += length;
length = call_chunk->ch_length - start;
- return svc_rdma_read_chunk_range(info, call_chunk, start, length);
+ return svc_rdma_read_chunk_range(rqstp, head, call_chunk,
+ start, length);
}
/**
* svc_rdma_read_special - Build RDMA Read WQEs to pull a Long Message
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
*
* The start of the data lands in the first page just after the
* Transport header, and the rest lands in rqstp->rq_arg.pages.
@@ -1073,25 +1015,31 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static noinline int svc_rdma_read_special(struct svc_rdma_read_info *info)
+static noinline int svc_rdma_read_special(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
{
- struct xdr_buf *buf = &info->ri_rqst->rq_arg;
- int ret;
-
- ret = svc_rdma_read_call_chunk(info);
- if (ret < 0)
- goto out;
-
- buf->len += info->ri_totalbytes;
- buf->buflen += info->ri_totalbytes;
+ return svc_rdma_read_call_chunk(rqstp, head);
+}
- buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]);
- buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes);
- buf->pages = &info->ri_rqst->rq_pages[1];
- buf->page_len = info->ri_totalbytes - buf->head[0].iov_len;
+/* Pages under I/O have been copied to head->rc_pages. Ensure that
+ * svc_xprt_release() does not put them when svc_rdma_recvfrom()
+ * returns. This has to be done after all Read WRs are constructed
+ * to properly handle a page that happens to be part of I/O on behalf
+ * of two different RDMA segments.
+ *
+ * Note: if the subsequent post_send fails, these pages have already
+ * been moved to head->rc_pages and thus will be cleaned up by
+ * svc_rdma_recv_ctxt_put().
+ */
+static void svc_rdma_clear_rqst_pages(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
+{
+ unsigned int i;
-out:
- return ret;
+ for (i = 0; i < head->rc_page_count; i++) {
+ head->rc_pages[i] = rqstp->rq_pages[i];
+ rqstp->rq_pages[i] = NULL;
+ }
}
/**
@@ -1121,49 +1069,27 @@ int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *head)
{
- struct svc_rdma_read_info *info;
- struct svc_rdma_chunk_ctxt *cc;
+ struct svc_rdma_chunk_ctxt *cc = &head->rc_cc;
int ret;
- info = svc_rdma_read_info_alloc(rdma);
- if (!info)
- return -ENOMEM;
- cc = &info->ri_cc;
- info->ri_rqst = rqstp;
- info->ri_readctxt = head;
- info->ri_pageno = 0;
- info->ri_pageoff = 0;
- info->ri_totalbytes = 0;
+ cc->cc_cqe.done = svc_rdma_wc_read_done;
+ cc->cc_sqecount = 0;
+ head->rc_pageoff = 0;
+ head->rc_curpage = 0;
+ head->rc_readbytes = 0;
if (pcl_is_empty(&head->rc_call_pcl)) {
if (head->rc_read_pcl.cl_count == 1)
- ret = svc_rdma_read_data_item(info);
+ ret = svc_rdma_read_data_item(rqstp, head);
else
- ret = svc_rdma_read_multiple_chunks(info);
+ ret = svc_rdma_read_multiple_chunks(rqstp, head);
} else
- ret = svc_rdma_read_special(info);
+ ret = svc_rdma_read_special(rqstp, head);
+ svc_rdma_clear_rqst_pages(rqstp, head);
if (ret < 0)
- goto out_err;
+ return ret;
trace_svcrdma_post_read_chunk(&cc->cc_cid, cc->cc_sqecount);
- init_completion(&cc->cc_done);
- ret = svc_rdma_post_chunk_ctxt(cc);
- if (ret < 0)
- goto out_err;
-
- ret = 1;
- wait_for_completion(&cc->cc_done);
- if (cc->cc_status != IB_WC_SUCCESS)
- ret = -EIO;
-
- /* rq_respages starts after the last arg page */
- rqstp->rq_respages = &rqstp->rq_pages[head->rc_page_count];
- rqstp->rq_next_page = rqstp->rq_respages + 1;
-
- /* Ensure svc_rdma_recv_ctxt_put() does not try to release pages */
- head->rc_page_count = 0;
-
-out_err:
- svc_rdma_read_info_free(info);
- return ret;
+ ret = svc_rdma_post_chunk_ctxt(rdma, cc);
+ return ret < 0 ? ret : 1;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index c6644cca52c5..1a49b7f02041 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -113,13 +113,6 @@
static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc);
-static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
- struct rpc_rdma_cid *cid)
-{
- cid->ci_queue_id = rdma->sc_sq_cq->res.id;
- cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
-}
-
static struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
{
@@ -129,7 +122,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
void *buffer;
int i;
- ctxt = kmalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges),
+ ctxt = kzalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges),
GFP_KERNEL, node);
if (!ctxt)
goto fail0;
@@ -143,6 +136,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
svc_rdma_send_cid_init(rdma, &ctxt->sc_cid);
+ ctxt->sc_rdma = rdma;
ctxt->sc_send_wr.next = NULL;
ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
@@ -200,10 +194,11 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
spin_lock(&rdma->sc_send_lock);
node = llist_del_first(&rdma->sc_send_ctxts);
+ spin_unlock(&rdma->sc_send_lock);
if (!node)
goto out_empty;
+
ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
- spin_unlock(&rdma->sc_send_lock);
out:
rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0);
@@ -216,22 +211,14 @@ out:
return ctxt;
out_empty:
- spin_unlock(&rdma->sc_send_lock);
ctxt = svc_rdma_send_ctxt_alloc(rdma);
if (!ctxt)
return NULL;
goto out;
}
-/**
- * svc_rdma_send_ctxt_put - Return send_ctxt to free list
- * @rdma: controlling svcxprt_rdma
- * @ctxt: object to return to the free list
- *
- * Pages left in sc_pages are DMA unmapped and released.
- */
-void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt)
+static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
{
struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i;
@@ -243,18 +230,40 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
* remains mapped until @ctxt is destroyed.
*/
for (i = 1; i < ctxt->sc_send_wr.num_sge; i++) {
+ trace_svcrdma_dma_unmap_page(&ctxt->sc_cid,
+ ctxt->sc_sges[i].addr,
+ ctxt->sc_sges[i].length);
ib_dma_unmap_page(device,
ctxt->sc_sges[i].addr,
ctxt->sc_sges[i].length,
DMA_TO_DEVICE);
- trace_svcrdma_dma_unmap_page(rdma,
- ctxt->sc_sges[i].addr,
- ctxt->sc_sges[i].length);
}
llist_add(&ctxt->sc_node, &rdma->sc_send_ctxts);
}
+static void svc_rdma_send_ctxt_put_async(struct work_struct *work)
+{
+ struct svc_rdma_send_ctxt *ctxt;
+
+ ctxt = container_of(work, struct svc_rdma_send_ctxt, sc_work);
+ svc_rdma_send_ctxt_release(ctxt->sc_rdma, ctxt);
+}
+
+/**
+ * svc_rdma_send_ctxt_put - Return send_ctxt to free list
+ * @rdma: controlling svcxprt_rdma
+ * @ctxt: object to return to the free list
+ *
+ * Pages left in sc_pages are DMA unmapped and released.
+ */
+void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
+{
+ INIT_WORK(&ctxt->sc_work, svc_rdma_send_ctxt_put_async);
+ queue_work(svcrdma_wq, &ctxt->sc_work);
+}
+
/**
* svc_rdma_wake_send_waiters - manage Send Queue accounting
* @rdma: controlling transport
@@ -289,7 +298,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->status != IB_WC_SUCCESS))
goto flushed;
- trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
+ trace_svcrdma_wc_send(&ctxt->sc_cid);
svc_rdma_send_ctxt_put(rdma, ctxt);
return;
@@ -327,13 +336,13 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
while (1) {
if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
percpu_counter_inc(&svcrdma_stat_sq_starve);
- trace_svcrdma_sq_full(rdma);
+ trace_svcrdma_sq_full(rdma, &ctxt->sc_cid);
atomic_inc(&rdma->sc_sq_avail);
wait_event(rdma->sc_send_wait,
atomic_read(&rdma->sc_sq_avail) > 1);
if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
return -ENOTCONN;
- trace_svcrdma_sq_retry(rdma);
+ trace_svcrdma_sq_retry(rdma, &ctxt->sc_cid);
continue;
}
@@ -344,7 +353,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
return 0;
}
- trace_svcrdma_sq_post_err(rdma, ret);
+ trace_svcrdma_sq_post_err(rdma, &ctxt->sc_cid, ret);
svc_xprt_deferred_close(&rdma->sc_xprt);
wake_up(&rdma->sc_send_wait);
return ret;
@@ -534,14 +543,14 @@ static int svc_rdma_page_dma_map(void *data, struct page *page,
if (ib_dma_mapping_error(dev, dma_addr))
goto out_maperr;
- trace_svcrdma_dma_map_page(rdma, dma_addr, len);
+ trace_svcrdma_dma_map_page(&ctxt->sc_cid, dma_addr, len);
ctxt->sc_sges[ctxt->sc_cur_sge_no].addr = dma_addr;
ctxt->sc_sges[ctxt->sc_cur_sge_no].length = len;
ctxt->sc_send_wr.num_sge++;
return 0;
out_maperr:
- trace_svcrdma_dma_map_err(rdma, dma_addr, len);
+ trace_svcrdma_dma_map_err(&ctxt->sc_cid, dma_addr, len);
return -EIO;
}
@@ -653,7 +662,7 @@ static int svc_rdma_xb_count_sges(const struct xdr_buf *xdr,
* svc_rdma_pull_up_needed - Determine whether to use pull-up
* @rdma: controlling transport
* @sctxt: send_ctxt for the Send WR
- * @rctxt: Write and Reply chunks provided by client
+ * @write_pcl: Write chunk list provided by client
* @xdr: xdr_buf containing RPC message to transmit
*
* Returns:
@@ -662,7 +671,7 @@ static int svc_rdma_xb_count_sges(const struct xdr_buf *xdr,
*/
static bool svc_rdma_pull_up_needed(const struct svcxprt_rdma *rdma,
const struct svc_rdma_send_ctxt *sctxt,
- const struct svc_rdma_recv_ctxt *rctxt,
+ const struct svc_rdma_pcl *write_pcl,
const struct xdr_buf *xdr)
{
/* Resources needed for the transport header */
@@ -672,7 +681,7 @@ static bool svc_rdma_pull_up_needed(const struct svcxprt_rdma *rdma,
};
int ret;
- ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ ret = pcl_process_nonpayloads(write_pcl, xdr,
svc_rdma_xb_count_sges, &args);
if (ret < 0)
return false;
@@ -728,7 +737,7 @@ static int svc_rdma_xb_linearize(const struct xdr_buf *xdr,
* svc_rdma_pull_up_reply_msg - Copy Reply into a single buffer
* @rdma: controlling transport
* @sctxt: send_ctxt for the Send WR; xprt hdr is already prepared
- * @rctxt: Write and Reply chunks provided by client
+ * @write_pcl: Write chunk list provided by client
* @xdr: prepared xdr_buf containing RPC message
*
* The device is not capable of sending the reply directly.
@@ -743,7 +752,7 @@ static int svc_rdma_xb_linearize(const struct xdr_buf *xdr,
*/
static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
- const struct svc_rdma_recv_ctxt *rctxt,
+ const struct svc_rdma_pcl *write_pcl,
const struct xdr_buf *xdr)
{
struct svc_rdma_pullup_data args = {
@@ -751,7 +760,7 @@ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma,
};
int ret;
- ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ ret = pcl_process_nonpayloads(write_pcl, xdr,
svc_rdma_xb_linearize, &args);
if (ret < 0)
return ret;
@@ -764,7 +773,8 @@ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma,
/* svc_rdma_map_reply_msg - DMA map the buffer holding RPC message
* @rdma: controlling transport
* @sctxt: send_ctxt for the Send WR
- * @rctxt: Write and Reply chunks provided by client
+ * @write_pcl: Write chunk list provided by client
+ * @reply_pcl: Reply chunk provided by client
* @xdr: prepared xdr_buf containing RPC message
*
* Returns:
@@ -776,7 +786,8 @@ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma,
*/
int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
- const struct svc_rdma_recv_ctxt *rctxt,
+ const struct svc_rdma_pcl *write_pcl,
+ const struct svc_rdma_pcl *reply_pcl,
const struct xdr_buf *xdr)
{
struct svc_rdma_map_data args = {
@@ -789,18 +800,18 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
/* If there is a Reply chunk, nothing follows the transport
- * header, and we're done here.
+ * header, so there is nothing to map.
*/
- if (!pcl_is_empty(&rctxt->rc_reply_pcl))
+ if (!pcl_is_empty(reply_pcl))
return 0;
/* For pull-up, svc_rdma_send() will sync the transport header.
* No additional DMA mapping is necessary.
*/
- if (svc_rdma_pull_up_needed(rdma, sctxt, rctxt, xdr))
- return svc_rdma_pull_up_reply_msg(rdma, sctxt, rctxt, xdr);
+ if (svc_rdma_pull_up_needed(rdma, sctxt, write_pcl, xdr))
+ return svc_rdma_pull_up_reply_msg(rdma, sctxt, write_pcl, xdr);
- return pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ return pcl_process_nonpayloads(write_pcl, xdr,
svc_rdma_xb_dma_map, &args);
}
@@ -848,7 +859,8 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
{
int ret;
- ret = svc_rdma_map_reply_msg(rdma, sctxt, rctxt, &rqstp->rq_res);
+ ret = svc_rdma_map_reply_msg(rdma, sctxt, &rctxt->rc_write_pcl,
+ &rctxt->rc_reply_pcl, &rqstp->rq_res);
if (ret < 0)
return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 2abd895046ee..4f27325ace4a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -125,6 +125,9 @@ static void qp_event_handler(struct ib_event *event, void *context)
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
struct net *net, int node)
{
+ static struct lock_class_key svcrdma_rwctx_lock;
+ static struct lock_class_key svcrdma_sctx_lock;
+ static struct lock_class_key svcrdma_dto_lock;
struct svcxprt_rdma *cma_xprt;
cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node);
@@ -134,6 +137,7 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
+ INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
init_llist_head(&cma_xprt->sc_send_ctxts);
init_llist_head(&cma_xprt->sc_recv_ctxts);
init_llist_head(&cma_xprt->sc_rw_ctxts);
@@ -141,8 +145,11 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
+ lockdep_set_class(&cma_xprt->sc_rq_dto_lock, &svcrdma_dto_lock);
spin_lock_init(&cma_xprt->sc_send_lock);
+ lockdep_set_class(&cma_xprt->sc_send_lock, &svcrdma_sctx_lock);
spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
+ lockdep_set_class(&cma_xprt->sc_rw_ctxt_lock, &svcrdma_rwctx_lock);
/*
* Note that this implies that the underlying transport support
@@ -391,37 +398,35 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dev = newxprt->sc_cm_id->device;
newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
- /* Qualify the transport resource defaults with the
- * capabilities of this particular device */
+ newxprt->sc_max_req_size = svcrdma_max_req_size;
+ newxprt->sc_max_requests = svcrdma_max_requests;
+ newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
+ newxprt->sc_recv_batch = RPCRDMA_MAX_RECV_BATCH;
+ newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
+
+ /* Qualify the transport's resource defaults with the
+ * capabilities of this particular device.
+ */
+
/* Transport header, head iovec, tail iovec */
newxprt->sc_max_send_sges = 3;
/* Add one SGE per page list entry */
newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1;
if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
- newxprt->sc_max_req_size = svcrdma_max_req_size;
- newxprt->sc_max_requests = svcrdma_max_requests;
- newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
- newxprt->sc_recv_batch = RPCRDMA_MAX_RECV_BATCH;
rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests +
newxprt->sc_recv_batch;
if (rq_depth > dev->attrs.max_qp_wr) {
- pr_warn("svcrdma: reducing receive depth to %d\n",
- dev->attrs.max_qp_wr);
rq_depth = dev->attrs.max_qp_wr;
newxprt->sc_recv_batch = 1;
newxprt->sc_max_requests = rq_depth - 2;
newxprt->sc_max_bc_requests = 2;
}
- newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
ctxts *= newxprt->sc_max_requests;
newxprt->sc_sq_depth = rq_depth + ctxts;
- if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) {
- pr_warn("svcrdma: reducing send depth to %d\n",
- dev->attrs.max_qp_wr);
+ if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
- }
atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
newxprt->sc_pd = ib_alloc_pd(dev, 0);
@@ -451,8 +456,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.qp_type = IB_QPT_RC;
qp_attr.send_cq = newxprt->sc_sq_cq;
qp_attr.recv_cq = newxprt->sc_rq_cq;
- dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n",
- newxprt->sc_cm_id, newxprt->sc_pd);
dprintk(" cap.max_send_wr = %d, cap.max_recv_wr = %d\n",
qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr);
dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n",
@@ -506,7 +509,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
}
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- dprintk("svcrdma: new connection %p accepted:\n", newxprt);
+ dprintk("svcrdma: new connection accepted on device %s:\n", dev->name);
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
@@ -547,6 +550,7 @@ static void __svc_rdma_free(struct work_struct *work)
/* This blocks until the Completion Queues are empty */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
ib_drain_qp(rdma->sc_qp);
+ flush_workqueue(svcrdma_wq);
svc_rdma_flush_recv_queues(rdma);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 28c0771c4e8c..4f8d7efa469f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1364,7 +1364,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
}
rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id;
- trace_xprtrdma_post_recv(rep);
+ trace_xprtrdma_post_recv(&rep->rr_cid);
rep->rr_recv_wr.next = wr;
wr = &rep->rr_recv_wr;
--needed;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 71cd916e384f..58f3dc8d0d71 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1181,6 +1181,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ transport->xprt_err = 0;
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state);
clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state);
@@ -2672,6 +2673,10 @@ static void xs_tcp_tls_setup_socket(struct work_struct *work)
rcu_read_lock();
lower_xprt = rcu_dereference(lower_clnt->cl_xprt);
rcu_read_unlock();
+
+ if (wait_on_bit_lock(&lower_xprt->state, XPRT_LOCKED, TASK_KILLABLE))
+ goto out_unlock;
+
status = xs_tls_handshake_sync(lower_xprt, &upper_xprt->xprtsec);
if (status) {
trace_rpc_tls_not_started(upper_clnt, upper_xprt);
@@ -2681,6 +2686,7 @@ static void xs_tcp_tls_setup_socket(struct work_struct *work)
status = xs_tcp_tls_finish_connecting(lower_xprt, upper_transport);
if (status)
goto out_close;
+ xprt_release_write(lower_xprt, NULL);
trace_rpc_socket_connect(upper_xprt, upper_transport->sock, 0);
if (!xprt_test_and_set_connected(upper_xprt)) {
@@ -2702,6 +2708,7 @@ out_unlock:
return;
out_close:
+ xprt_release_write(lower_xprt, NULL);
rpc_shutdown_client(lower_clnt);
/* xprt_force_disconnect() wakes tasks with a fixed tk_status code.
@@ -2766,18 +2773,13 @@ static void xs_wake_error(struct sock_xprt *transport)
{
int sockerr;
- if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
- return;
- mutex_lock(&transport->recv_mutex);
- if (transport->sock == NULL)
- goto out;
if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
- goto out;
+ return;
sockerr = xchg(&transport->xprt_err, 0);
- if (sockerr < 0)
+ if (sockerr < 0) {
xprt_wake_pending_tasks(&transport->xprt, sockerr);
-out:
- mutex_unlock(&transport->recv_mutex);
+ xs_tcp_force_close(&transport->xprt);
+ }
}
static void xs_wake_pending(struct sock_xprt *transport)
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 2cde375477e3..878415c43527 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1086,6 +1086,12 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
#ifdef CONFIG_TIPC_MEDIA_UDP
if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
+ if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) {
+ rtnl_unlock();
+ NL_SET_ERR_MSG(info->extack, "UDP option is unsupported");
+ return -EINVAL;
+ }
+
err = tipc_udp_nl_bearer_add(b,
attrs[TIPC_NLA_BEARER_UDP_OPTS]);
if (err) {
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 302fd749c424..43c3f1c971b8 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -1441,14 +1441,14 @@ static int tipc_crypto_key_revoke(struct net *net, u8 tx_key)
struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
struct tipc_key key;
- spin_lock(&tx->lock);
+ spin_lock_bh(&tx->lock);
key = tx->key;
WARN_ON(!key.active || tx_key != key.active);
/* Free the active key */
tipc_crypto_key_set_state(tx, key.passive, 0, key.pending);
tipc_crypto_key_detach(tx->aead[key.active], &tx->lock);
- spin_unlock(&tx->lock);
+ spin_unlock_bh(&tx->lock);
pr_warn("%s: key is revoked\n", tx->name);
return -EKEYREVOKED;
diff --git a/net/tipc/diag.c b/net/tipc/diag.c
index 73137f4aeb68..18733451c9e0 100644
--- a/net/tipc/diag.c
+++ b/net/tipc/diag.c
@@ -113,4 +113,5 @@ module_init(tipc_diag_init);
module_exit(tipc_diag_exit);
MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("TIPC socket monitoring via SOCK_DIAG");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index e33b4f29f77c..0716eb5c8a31 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -82,10 +82,7 @@ struct tipc_stats {
* struct tipc_link - TIPC link data structure
* @addr: network address of link's peer node
* @name: link name character string
- * @media_addr: media address to use when sending messages over link
- * @timer: link timer
* @net: pointer to namespace struct
- * @refcnt: reference counter for permanent references (owner node & timer)
* @peer_session: link session # being used by peer end of link
* @peer_bearer_id: bearer id used by link's peer endpoint
* @bearer_id: local bearer id used by link
@@ -94,31 +91,19 @@ struct tipc_stats {
* @state: current state of link FSM
* @peer_caps: bitmap describing capabilities of peer node
* @silent_intv_cnt: # of timer intervals without any reception from peer
- * @proto_msg: template for control messages generated by link
- * @pmsg: convenience pointer to "proto_msg" field
* @priority: current link priority
* @net_plane: current link network plane ('A' through 'H')
* @mon_state: cookie with information needed by link monitor
- * @backlog_limit: backlog queue congestion thresholds (indexed by importance)
- * @exp_msg_count: # of tunnelled messages expected during link changeover
- * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset
* @mtu: current maximum packet size for this link
* @advertised_mtu: advertised own mtu when link is being established
- * @transmitq: queue for sent, non-acked messages
* @backlogq: queue for messages waiting to be sent
- * @snt_nxt: next sequence number to use for outbound messages
* @ackers: # of peers that needs to ack each packet before it can be released
* @acked: # last packet acked by a certain peer. Used for broadcast.
* @rcv_nxt: next sequence number to expect for inbound messages
- * @deferred_queue: deferred queue saved OOS b'cast message received from node
- * @unacked_window: # of inbound messages rx'd without ack'ing back to peer
* @inputq: buffer queue for messages to be delivered upwards
* @namedq: buffer queue for name table messages to be delivered upwards
- * @next_out: ptr to first unsent outbound message in queue
* @wakeupq: linked list of wakeup msgs waiting for link congestion to abate
- * @long_msg_seq_no: next identifier to use for outbound fragmented messages
* @reasm_buf: head of partially reassembled inbound message fragments
- * @bc_rcvr: marks that this is a broadcast receiver link
* @stats: collects statistics regarding link activity
* @session: session to be used by link
* @snd_nxt_state: next send seq number
@@ -1446,7 +1431,7 @@ u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
p = (struct tipc_gap_ack_blks *)msg_data(hdr);
sz = ntohs(p->len);
/* Sanity check */
- if (sz == struct_size(p, gacks, p->ugack_cnt + p->bgack_cnt)) {
+ if (sz == struct_size(p, gacks, size_add(p->ugack_cnt, p->bgack_cnt))) {
/* Good, check if the desired type exists */
if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt))
goto ok;
@@ -1533,7 +1518,7 @@ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr)
__tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0;
/* Total len */
- len = struct_size(ga, gacks, ga->bgack_cnt + ga->ugack_cnt);
+ len = struct_size(ga, gacks, size_add(ga->bgack_cnt, ga->ugack_cnt));
ga->len = htons(len);
return len;
}
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index e8fd257c0e68..1a9a5bdaccf4 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -88,7 +88,7 @@ const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
[TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
- [TIPC_NLA_LINK_NAME] = { .type = NLA_STRING,
+ [TIPC_NLA_LINK_NAME] = { .type = NLA_NUL_STRING,
.len = TIPC_MAX_LINK_NAME },
[TIPC_NLA_LINK_MTU] = { .type = NLA_U32 },
[TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG },
@@ -125,7 +125,7 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = {
[TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC },
- [TIPC_NLA_BEARER_NAME] = { .type = NLA_STRING,
+ [TIPC_NLA_BEARER_NAME] = { .type = NLA_NUL_STRING,
.len = TIPC_MAX_BEARER_NAME },
[TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED },
[TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 }
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 5bc076f2fa74..079aebb16ed8 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -102,6 +102,7 @@ static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len)
return -EMSGSIZE;
skb_put(skb, TLV_SPACE(len));
+ memset(tlv, 0, TLV_SPACE(len));
tlv->tlv_type = htons(type);
tlv->tlv_len = htons(TLV_LENGTH(len));
if (len && data)
@@ -167,7 +168,7 @@ static struct sk_buff *tipc_get_err_tlv(char *str)
int str_len = strlen(str) + 1;
struct sk_buff *buf;
- buf = tipc_tlv_alloc(TLV_SPACE(str_len));
+ buf = tipc_tlv_alloc(str_len);
if (buf)
tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len);
diff --git a/net/tls/tls.h b/net/tls/tls.h
index 28a8c0e80e3c..762f424ff2d5 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -127,7 +127,7 @@ struct tls_rec {
struct sock *sk;
char aad_space[TLS_AAD_SPACE_SIZE];
- u8 iv_data[MAX_IV_SIZE];
+ u8 iv_data[TLS_MAX_IV_SIZE];
struct aead_request aead_req;
u8 aead_req_ctx[];
};
@@ -142,7 +142,10 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx);
int wait_on_pending_writer(struct sock *sk, long *timeo);
void tls_err_abort(struct sock *sk, int err);
-int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
+int init_prot_info(struct tls_prot_info *prot,
+ const struct tls_crypto_info *crypto_info,
+ const struct tls_cipher_desc *cipher_desc);
+int tls_set_sw_offload(struct sock *sk, int tx);
void tls_update_rx_zc_capable(struct tls_context *tls_ctx);
void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
void tls_sw_strparser_done(struct tls_context *tls_ctx);
@@ -223,7 +226,7 @@ static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx)
#ifdef CONFIG_TLS_DEVICE
int tls_device_init(void);
void tls_device_cleanup(void);
-int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
+int tls_set_device_offload(struct sock *sk);
void tls_device_free_resources_tx(struct sock *sk);
int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx);
void tls_device_offload_cleanup_rx(struct sock *sk);
@@ -234,7 +237,7 @@ static inline int tls_device_init(void) { return 0; }
static inline void tls_device_cleanup(void) {}
static inline int
-tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
+tls_set_device_offload(struct sock *sk)
{
return -EOPNOTSUPP;
}
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 8c94c926606a..bf8ed36b1ad6 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -56,11 +56,8 @@ static struct page *dummy_page;
static void tls_device_free_ctx(struct tls_context *ctx)
{
- if (ctx->tx_conf == TLS_HW) {
+ if (ctx->tx_conf == TLS_HW)
kfree(tls_offload_ctx_tx(ctx));
- kfree(ctx->tx.rec_seq);
- kfree(ctx->tx.iv);
- }
if (ctx->rx_conf == TLS_HW)
kfree(tls_offload_ctx_rx(ctx));
@@ -891,14 +888,8 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
struct strp_msg *rxm;
char *orig_buf, *buf;
- switch (tls_ctx->crypto_recv.info.cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- case TLS_CIPHER_AES_GCM_256:
- break;
- default:
- return -EINVAL;
- }
cipher_desc = get_cipher_desc(tls_ctx->crypto_recv.info.cipher_type);
+ DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable);
rxm = strp_msg(tls_strp_msg(sw_ctx));
orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_desc->iv,
@@ -1042,22 +1033,45 @@ static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
}
}
-int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
+static struct tls_offload_context_tx *alloc_offload_ctx_tx(struct tls_context *ctx)
+{
+ struct tls_offload_context_tx *offload_ctx;
+ __be64 rcd_sn;
+
+ offload_ctx = kzalloc(sizeof(*offload_ctx), GFP_KERNEL);
+ if (!offload_ctx)
+ return NULL;
+
+ INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task);
+ INIT_LIST_HEAD(&offload_ctx->records_list);
+ spin_lock_init(&offload_ctx->lock);
+ sg_init_table(offload_ctx->sg_tx_data,
+ ARRAY_SIZE(offload_ctx->sg_tx_data));
+
+ /* start at rec_seq - 1 to account for the start marker record */
+ memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn));
+ offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1;
+
+ offload_ctx->ctx = ctx;
+
+ return offload_ctx;
+}
+
+int tls_set_device_offload(struct sock *sk)
{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_prot_info *prot = &tls_ctx->prot_info;
- const struct tls_cipher_desc *cipher_desc;
struct tls_record_info *start_marker_record;
struct tls_offload_context_tx *offload_ctx;
+ const struct tls_cipher_desc *cipher_desc;
struct tls_crypto_info *crypto_info;
+ struct tls_prot_info *prot;
struct net_device *netdev;
- char *iv, *rec_seq;
+ struct tls_context *ctx;
struct sk_buff *skb;
- __be64 rcd_sn;
+ char *iv, *rec_seq;
int rc;
- if (!ctx)
- return -EINVAL;
+ ctx = tls_get_ctx(sk);
+ prot = &ctx->prot_info;
if (ctx->priv_ctx_tx)
return -EEXIST;
@@ -1085,38 +1099,23 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
goto release_netdev;
}
+ rc = init_prot_info(prot, crypto_info, cipher_desc);
+ if (rc)
+ goto release_netdev;
+
iv = crypto_info_iv(crypto_info, cipher_desc);
rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc);
- prot->version = crypto_info->version;
- prot->cipher_type = crypto_info->cipher_type;
- prot->prepend_size = TLS_HEADER_SIZE + cipher_desc->iv;
- prot->tag_size = cipher_desc->tag;
- prot->overhead_size = prot->prepend_size + prot->tag_size;
- prot->iv_size = cipher_desc->iv;
- prot->salt_size = cipher_desc->salt;
- ctx->tx.iv = kmalloc(cipher_desc->iv + cipher_desc->salt, GFP_KERNEL);
- if (!ctx->tx.iv) {
- rc = -ENOMEM;
- goto release_netdev;
- }
-
memcpy(ctx->tx.iv + cipher_desc->salt, iv, cipher_desc->iv);
-
- prot->rec_seq_size = cipher_desc->rec_seq;
- ctx->tx.rec_seq = kmemdup(rec_seq, cipher_desc->rec_seq, GFP_KERNEL);
- if (!ctx->tx.rec_seq) {
- rc = -ENOMEM;
- goto free_iv;
- }
+ memcpy(ctx->tx.rec_seq, rec_seq, cipher_desc->rec_seq);
start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL);
if (!start_marker_record) {
rc = -ENOMEM;
- goto free_rec_seq;
+ goto release_netdev;
}
- offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL);
+ offload_ctx = alloc_offload_ctx_tx(ctx);
if (!offload_ctx) {
rc = -ENOMEM;
goto free_marker_record;
@@ -1126,22 +1125,10 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
if (rc)
goto free_offload_ctx;
- /* start at rec_seq - 1 to account for the start marker record */
- memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn));
- offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1;
-
start_marker_record->end_seq = tcp_sk(sk)->write_seq;
start_marker_record->len = 0;
start_marker_record->num_frags = 0;
-
- INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task);
- offload_ctx->ctx = ctx;
-
- INIT_LIST_HEAD(&offload_ctx->records_list);
list_add_tail(&start_marker_record->list, &offload_ctx->records_list);
- spin_lock_init(&offload_ctx->lock);
- sg_init_table(offload_ctx->sg_tx_data,
- ARRAY_SIZE(offload_ctx->sg_tx_data));
clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked);
ctx->push_pending_record = tls_device_push_pending_record;
@@ -1198,10 +1185,6 @@ free_offload_ctx:
ctx->priv_ctx_tx = NULL;
free_marker_record:
kfree(start_marker_record);
-free_rec_seq:
- kfree(ctx->tx.rec_seq);
-free_iv:
- kfree(ctx->tx.iv);
release_netdev:
dev_put(netdev);
return rc;
@@ -1242,7 +1225,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
goto release_lock;
}
- context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL);
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context) {
rc = -ENOMEM;
goto release_lock;
@@ -1250,7 +1233,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
context->resync_nh_reset = 1;
ctx->priv_ctx_rx = context;
- rc = tls_set_sw_offload(sk, ctx, 0);
+ rc = tls_set_sw_offload(sk, 0);
if (rc)
goto release_ctx;
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 1d743f310f4f..4e7228f275fa 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -54,7 +54,7 @@ static int tls_enc_record(struct aead_request *aead_req,
struct scatter_walk *out, int *in_len,
struct tls_prot_info *prot)
{
- unsigned char buf[TLS_HEADER_SIZE + MAX_IV_SIZE];
+ unsigned char buf[TLS_HEADER_SIZE + TLS_MAX_IV_SIZE];
const struct tls_cipher_desc *cipher_desc;
struct scatterlist sg_in[3];
struct scatterlist sg_out[3];
@@ -62,14 +62,8 @@ static int tls_enc_record(struct aead_request *aead_req,
u16 len;
int rc;
- switch (prot->cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- case TLS_CIPHER_AES_GCM_256:
- break;
- default:
- return -EINVAL;
- }
cipher_desc = get_cipher_desc(prot->cipher_type);
+ DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable);
buf_size = TLS_HEADER_SIZE + cipher_desc->iv;
len = min_t(int, *in_len, buf_size);
@@ -338,17 +332,9 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
if (!aead_req)
return NULL;
- switch (tls_ctx->crypto_send.info.cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- salt = tls_ctx->crypto_send.aes_gcm_128.salt;
- break;
- case TLS_CIPHER_AES_GCM_256:
- salt = tls_ctx->crypto_send.aes_gcm_256.salt;
- break;
- default:
- goto free_req;
- }
cipher_desc = get_cipher_desc(tls_ctx->crypto_send.info.cipher_type);
+ DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable);
+
buf_len = cipher_desc->salt + cipher_desc->iv + TLS_AAD_SPACE_SIZE +
sync_size + cipher_desc->tag;
buf = kmalloc(buf_len, GFP_ATOMIC);
@@ -356,6 +342,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
goto free_req;
iv = buf;
+ salt = crypto_info_salt(&tls_ctx->crypto_send.info, cipher_desc);
memcpy(iv, salt, cipher_desc->salt);
aad = buf + cipher_desc->salt + cipher_desc->iv;
dummy_buf = aad + TLS_AAD_SPACE_SIZE;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 02f583ff9239..1c2c6800949d 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -59,7 +59,8 @@ enum {
};
#define CHECK_CIPHER_DESC(cipher,ci) \
- static_assert(cipher ## _IV_SIZE <= MAX_IV_SIZE); \
+ static_assert(cipher ## _IV_SIZE <= TLS_MAX_IV_SIZE); \
+ static_assert(cipher ## _SALT_SIZE <= TLS_MAX_SALT_SIZE); \
static_assert(cipher ## _REC_SEQ_SIZE <= TLS_MAX_REC_SEQ_SIZE); \
static_assert(cipher ## _TAG_SIZE == TLS_TAG_SIZE); \
static_assert(sizeof_field(struct ci, iv) == cipher ## _IV_SIZE); \
@@ -139,8 +140,8 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx)
int wait_on_pending_writer(struct sock *sk, long *timeo)
{
- int rc = 0;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ int ret, rc = 0;
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
@@ -154,9 +155,13 @@ int wait_on_pending_writer(struct sock *sk, long *timeo)
break;
}
- if (sk_wait_event(sk, timeo,
- !READ_ONCE(sk->sk_write_pending), &wait))
+ ret = sk_wait_event(sk, timeo,
+ !READ_ONCE(sk->sk_write_pending), &wait);
+ if (ret) {
+ if (ret < 0)
+ rc = ret;
break;
+ }
}
remove_wait_queue(sk_sleep(sk), &wait);
return rc;
@@ -344,8 +349,6 @@ static void tls_sk_proto_cleanup(struct sock *sk,
/* We need these for tls_sw_fallback handling of other packets */
if (ctx->tx_conf == TLS_SW) {
- kfree(ctx->tx.rec_seq);
- kfree(ctx->tx.iv);
tls_sw_release_resources_tx(sk);
TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW);
} else if (ctx->tx_conf == TLS_HW) {
@@ -581,6 +584,31 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
return do_tls_getsockopt(sk, optname, optval, optlen);
}
+static int validate_crypto_info(const struct tls_crypto_info *crypto_info,
+ const struct tls_crypto_info *alt_crypto_info)
+{
+ if (crypto_info->version != TLS_1_2_VERSION &&
+ crypto_info->version != TLS_1_3_VERSION)
+ return -EINVAL;
+
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_ARIA_GCM_128:
+ case TLS_CIPHER_ARIA_GCM_256:
+ if (crypto_info->version != TLS_1_2_VERSION)
+ return -EINVAL;
+ break;
+ }
+
+ /* Ensure that TLS version and ciphers are same in both directions */
+ if (TLS_CRYPTO_INFO_READY(alt_crypto_info)) {
+ if (alt_crypto_info->version != crypto_info->version ||
+ alt_crypto_info->cipher_type != crypto_info->cipher_type)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
unsigned int optlen, int tx)
{
@@ -612,21 +640,9 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
goto err_crypto_info;
}
- /* check version */
- if (crypto_info->version != TLS_1_2_VERSION &&
- crypto_info->version != TLS_1_3_VERSION) {
- rc = -EINVAL;
+ rc = validate_crypto_info(crypto_info, alt_crypto_info);
+ if (rc)
goto err_crypto_info;
- }
-
- /* Ensure that TLS version and ciphers are same in both directions */
- if (TLS_CRYPTO_INFO_READY(alt_crypto_info)) {
- if (alt_crypto_info->version != crypto_info->version ||
- alt_crypto_info->cipher_type != crypto_info->cipher_type) {
- rc = -EINVAL;
- goto err_crypto_info;
- }
- }
cipher_desc = get_cipher_desc(crypto_info->cipher_type);
if (!cipher_desc) {
@@ -634,16 +650,6 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
goto err_crypto_info;
}
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_ARIA_GCM_128:
- case TLS_CIPHER_ARIA_GCM_256:
- if (crypto_info->version != TLS_1_2_VERSION) {
- rc = -EINVAL;
- goto err_crypto_info;
- }
- break;
- }
-
if (optlen != cipher_desc->crypto_info) {
rc = -EINVAL;
goto err_crypto_info;
@@ -658,13 +664,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
}
if (tx) {
- rc = tls_set_device_offload(sk, ctx);
+ rc = tls_set_device_offload(sk);
conf = TLS_HW;
if (!rc) {
TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE);
TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE);
} else {
- rc = tls_set_sw_offload(sk, ctx, 1);
+ rc = tls_set_sw_offload(sk, 1);
if (rc)
goto err_crypto_info;
TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW);
@@ -678,7 +684,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICE);
TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE);
} else {
- rc = tls_set_sw_offload(sk, ctx, 0);
+ rc = tls_set_sw_offload(sk, 0);
if (rc)
goto err_crypto_info;
TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 1ed4a611631f..31e8a94dfc11 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -60,7 +60,7 @@ struct tls_decrypt_arg {
struct tls_decrypt_ctx {
struct sock *sk;
- u8 iv[MAX_IV_SIZE];
+ u8 iv[TLS_MAX_IV_SIZE];
u8 aad[TLS_MAX_AAD_SIZE];
u8 tail;
struct scatterlist sg[];
@@ -817,7 +817,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
psock = sk_psock_get(sk);
if (!psock || !policy) {
err = tls_push_record(sk, flags, record_type);
- if (err && sk->sk_err == EBADMSG) {
+ if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) {
*copied -= sk_msg_free(sk, msg);
tls_free_open_rec(sk);
err = -sk->sk_err;
@@ -846,7 +846,7 @@ more_data:
switch (psock->eval) {
case __SK_PASS:
err = tls_push_record(sk, flags, record_type);
- if (err && sk->sk_err == EBADMSG) {
+ if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) {
*copied -= sk_msg_free(sk, msg);
tls_free_open_rec(sk);
err = -sk->sk_err;
@@ -952,6 +952,8 @@ static int tls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg,
}
sk_msg_page_add(msg_pl, page, part, off);
+ msg_pl->sg.copybreak = 0;
+ msg_pl->sg.curr = msg_pl->sg.end;
sk_mem_charge(sk, part);
*copied += part;
try_to_copy -= part;
@@ -1050,7 +1052,11 @@ alloc_encrypted:
if (ret < 0)
goto send_end;
tls_ctx->pending_open_record_frags = true;
- if (full_record || eor || sk_msg_full(msg_pl))
+
+ if (sk_msg_full(msg_pl))
+ full_record = true;
+
+ if (full_record || eor)
goto copied;
continue;
}
@@ -1232,11 +1238,14 @@ void tls_sw_splice_eof(struct socket *sock)
lock_sock(sk);
retry:
+ /* same checks as in tls_sw_push_pending_record() */
rec = ctx->open_rec;
if (!rec)
goto unlock;
msg_pl = &rec->msg_plaintext;
+ if (msg_pl->sg.size == 0)
+ goto unlock;
/* Check the BPF advisor and perform transmission. */
ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA,
@@ -1291,6 +1300,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ int ret = 0;
long timeo;
timeo = sock_rcvtimeo(sk, nonblock);
@@ -1302,6 +1312,9 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
if (sk->sk_err)
return sock_error(sk);
+ if (ret < 0)
+ return ret;
+
if (!skb_queue_empty(&sk->sk_receive_queue)) {
tls_strp_check_rcv(&ctx->strp);
if (tls_strp_msg_ready(ctx))
@@ -1320,10 +1333,10 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
released = true;
add_wait_queue(sk_sleep(sk), &wait);
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- sk_wait_event(sk, &timeo,
- tls_strp_msg_ready(ctx) ||
- !sk_psock_queue_empty(psock),
- &wait);
+ ret = sk_wait_event(sk, &timeo,
+ tls_strp_msg_ready(ctx) ||
+ !sk_psock_queue_empty(psock),
+ &wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
remove_wait_queue(sk_sleep(sk), &wait);
@@ -1487,7 +1500,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
*/
aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv);
aead_size = ALIGN(aead_size, __alignof__(*dctx));
- mem = kmalloc(aead_size + struct_size(dctx, sg, n_sgin + n_sgout),
+ mem = kmalloc(aead_size + struct_size(dctx, sg, size_add(n_sgin, n_sgout)),
sk->sk_allocation);
if (!mem) {
err = -ENOMEM;
@@ -1852,6 +1865,7 @@ static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx,
bool nonblock)
{
long timeo;
+ int ret;
timeo = sock_rcvtimeo(sk, nonblock);
@@ -1861,14 +1875,16 @@ static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx,
ctx->reader_contended = 1;
add_wait_queue(&ctx->wq, &wait);
- sk_wait_event(sk, &timeo,
- !READ_ONCE(ctx->reader_present), &wait);
+ ret = sk_wait_event(sk, &timeo,
+ !READ_ONCE(ctx->reader_present), &wait);
remove_wait_queue(&ctx->wq, &wait);
if (timeo <= 0)
return -EAGAIN;
if (signal_pending(current))
return sock_intr_errno(timeo);
+ if (ret < 0)
+ return ret;
}
WRITE_ONCE(ctx->reader_present, 1);
@@ -2319,7 +2335,7 @@ int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb)
{
struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
- char header[TLS_HEADER_SIZE + MAX_IV_SIZE];
+ char header[TLS_HEADER_SIZE + TLS_MAX_IV_SIZE];
size_t cipher_overhead;
size_t data_len = 0;
int ret;
@@ -2467,9 +2483,6 @@ void tls_sw_release_resources_rx(struct sock *sk)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
- kfree(tls_ctx->rx.rec_seq);
- kfree(tls_ctx->rx.iv);
-
if (ctx->aead_recv) {
__skb_queue_purge(&ctx->rx_list);
crypto_free_aead(ctx->aead_recv);
@@ -2581,69 +2594,113 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx)
tls_ctx->prot_info.version != TLS_1_3_VERSION;
}
-int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
+static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk)
+{
+ struct tls_sw_context_tx *sw_ctx_tx;
+
+ if (!ctx->priv_ctx_tx) {
+ sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL);
+ if (!sw_ctx_tx)
+ return NULL;
+ } else {
+ sw_ctx_tx = ctx->priv_ctx_tx;
+ }
+
+ crypto_init_wait(&sw_ctx_tx->async_wait);
+ spin_lock_init(&sw_ctx_tx->encrypt_compl_lock);
+ INIT_LIST_HEAD(&sw_ctx_tx->tx_list);
+ INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler);
+ sw_ctx_tx->tx_work.sk = sk;
+
+ return sw_ctx_tx;
+}
+
+static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx)
+{
+ struct tls_sw_context_rx *sw_ctx_rx;
+
+ if (!ctx->priv_ctx_rx) {
+ sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL);
+ if (!sw_ctx_rx)
+ return NULL;
+ } else {
+ sw_ctx_rx = ctx->priv_ctx_rx;
+ }
+
+ crypto_init_wait(&sw_ctx_rx->async_wait);
+ spin_lock_init(&sw_ctx_rx->decrypt_compl_lock);
+ init_waitqueue_head(&sw_ctx_rx->wq);
+ skb_queue_head_init(&sw_ctx_rx->rx_list);
+ skb_queue_head_init(&sw_ctx_rx->async_hold);
+
+ return sw_ctx_rx;
+}
+
+int init_prot_info(struct tls_prot_info *prot,
+ const struct tls_crypto_info *crypto_info,
+ const struct tls_cipher_desc *cipher_desc)
+{
+ u16 nonce_size = cipher_desc->nonce;
+
+ if (crypto_info->version == TLS_1_3_VERSION) {
+ nonce_size = 0;
+ prot->aad_size = TLS_HEADER_SIZE;
+ prot->tail_size = 1;
+ } else {
+ prot->aad_size = TLS_AAD_SPACE_SIZE;
+ prot->tail_size = 0;
+ }
+
+ /* Sanity-check the sizes for stack allocations. */
+ if (nonce_size > TLS_MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE)
+ return -EINVAL;
+
+ prot->version = crypto_info->version;
+ prot->cipher_type = crypto_info->cipher_type;
+ prot->prepend_size = TLS_HEADER_SIZE + nonce_size;
+ prot->tag_size = cipher_desc->tag;
+ prot->overhead_size = prot->prepend_size + prot->tag_size + prot->tail_size;
+ prot->iv_size = cipher_desc->iv;
+ prot->salt_size = cipher_desc->salt;
+ prot->rec_seq_size = cipher_desc->rec_seq;
+
+ return 0;
+}
+
+int tls_set_sw_offload(struct sock *sk, int tx)
{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_prot_info *prot = &tls_ctx->prot_info;
- struct tls_crypto_info *crypto_info;
struct tls_sw_context_tx *sw_ctx_tx = NULL;
struct tls_sw_context_rx *sw_ctx_rx = NULL;
+ const struct tls_cipher_desc *cipher_desc;
+ struct tls_crypto_info *crypto_info;
+ char *iv, *rec_seq, *key, *salt;
struct cipher_context *cctx;
+ struct tls_prot_info *prot;
struct crypto_aead **aead;
+ struct tls_context *ctx;
struct crypto_tfm *tfm;
- char *iv, *rec_seq, *key, *salt;
- const struct tls_cipher_desc *cipher_desc;
- u16 nonce_size;
int rc = 0;
- if (!ctx) {
- rc = -EINVAL;
- goto out;
- }
+ ctx = tls_get_ctx(sk);
+ prot = &ctx->prot_info;
if (tx) {
- if (!ctx->priv_ctx_tx) {
- sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL);
- if (!sw_ctx_tx) {
- rc = -ENOMEM;
- goto out;
- }
- ctx->priv_ctx_tx = sw_ctx_tx;
- } else {
- sw_ctx_tx =
- (struct tls_sw_context_tx *)ctx->priv_ctx_tx;
- }
- } else {
- if (!ctx->priv_ctx_rx) {
- sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL);
- if (!sw_ctx_rx) {
- rc = -ENOMEM;
- goto out;
- }
- ctx->priv_ctx_rx = sw_ctx_rx;
- } else {
- sw_ctx_rx =
- (struct tls_sw_context_rx *)ctx->priv_ctx_rx;
- }
- }
+ ctx->priv_ctx_tx = init_ctx_tx(ctx, sk);
+ if (!ctx->priv_ctx_tx)
+ return -ENOMEM;
- if (tx) {
- crypto_init_wait(&sw_ctx_tx->async_wait);
- spin_lock_init(&sw_ctx_tx->encrypt_compl_lock);
+ sw_ctx_tx = ctx->priv_ctx_tx;
crypto_info = &ctx->crypto_send.info;
cctx = &ctx->tx;
aead = &sw_ctx_tx->aead_send;
- INIT_LIST_HEAD(&sw_ctx_tx->tx_list);
- INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler);
- sw_ctx_tx->tx_work.sk = sk;
} else {
- crypto_init_wait(&sw_ctx_rx->async_wait);
- spin_lock_init(&sw_ctx_rx->decrypt_compl_lock);
- init_waitqueue_head(&sw_ctx_rx->wq);
+ ctx->priv_ctx_rx = init_ctx_rx(ctx);
+ if (!ctx->priv_ctx_rx)
+ return -ENOMEM;
+
+ sw_ctx_rx = ctx->priv_ctx_rx;
crypto_info = &ctx->crypto_recv.info;
cctx = &ctx->rx;
- skb_queue_head_init(&sw_ctx_rx->rx_list);
- skb_queue_head_init(&sw_ctx_rx->async_hold);
aead = &sw_ctx_rx->aead_recv;
}
@@ -2653,58 +2710,25 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
goto free_priv;
}
- nonce_size = cipher_desc->nonce;
+ rc = init_prot_info(prot, crypto_info, cipher_desc);
+ if (rc)
+ goto free_priv;
iv = crypto_info_iv(crypto_info, cipher_desc);
key = crypto_info_key(crypto_info, cipher_desc);
salt = crypto_info_salt(crypto_info, cipher_desc);
rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc);
- if (crypto_info->version == TLS_1_3_VERSION) {
- nonce_size = 0;
- prot->aad_size = TLS_HEADER_SIZE;
- prot->tail_size = 1;
- } else {
- prot->aad_size = TLS_AAD_SPACE_SIZE;
- prot->tail_size = 0;
- }
-
- /* Sanity-check the sizes for stack allocations. */
- if (nonce_size > MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) {
- rc = -EINVAL;
- goto free_priv;
- }
-
- prot->version = crypto_info->version;
- prot->cipher_type = crypto_info->cipher_type;
- prot->prepend_size = TLS_HEADER_SIZE + nonce_size;
- prot->tag_size = cipher_desc->tag;
- prot->overhead_size = prot->prepend_size +
- prot->tag_size + prot->tail_size;
- prot->iv_size = cipher_desc->iv;
- prot->salt_size = cipher_desc->salt;
- cctx->iv = kmalloc(cipher_desc->iv + cipher_desc->salt, GFP_KERNEL);
- if (!cctx->iv) {
- rc = -ENOMEM;
- goto free_priv;
- }
- /* Note: 128 & 256 bit salt are the same size */
- prot->rec_seq_size = cipher_desc->rec_seq;
memcpy(cctx->iv, salt, cipher_desc->salt);
memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv);
-
- cctx->rec_seq = kmemdup(rec_seq, cipher_desc->rec_seq, GFP_KERNEL);
- if (!cctx->rec_seq) {
- rc = -ENOMEM;
- goto free_iv;
- }
+ memcpy(cctx->rec_seq, rec_seq, cipher_desc->rec_seq);
if (!*aead) {
*aead = crypto_alloc_aead(cipher_desc->cipher_name, 0, 0);
if (IS_ERR(*aead)) {
rc = PTR_ERR(*aead);
*aead = NULL;
- goto free_rec_seq;
+ goto free_priv;
}
}
@@ -2736,12 +2760,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
free_aead:
crypto_free_aead(*aead);
*aead = NULL;
-free_rec_seq:
- kfree(cctx->rec_seq);
- cctx->rec_seq = NULL;
-free_iv:
- kfree(cctx->iv);
- cctx->iv = NULL;
free_priv:
if (tx) {
kfree(ctx->priv_ctx_tx);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 86930a8ed012..30b178ebba60 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -116,6 +116,7 @@
#include <linux/freezer.h>
#include <linux/file.h>
#include <linux/btf_ids.h>
+#include <linux/bpf-cgroup.h>
#include "scm.h"
@@ -212,8 +213,6 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
}
#endif /* CONFIG_SECURITY_NETWORK */
-#define unix_peer(sk) (unix_sk(sk)->peer)
-
static inline int unix_our_peer(struct sock *sk, struct sock *osk)
{
return unix_peer(osk) == sk;
@@ -680,7 +679,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
* What the above comment does talk about? --ANK(980817)
*/
- if (unix_tot_inflight)
+ if (READ_ONCE(unix_tot_inflight))
unix_gc(); /* Garbage collect fds */
}
@@ -1345,13 +1344,11 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
unix_state_lock(sk1);
return;
}
- if (sk1 < sk2) {
- unix_state_lock(sk1);
- unix_state_lock_nested(sk2);
- } else {
- unix_state_lock(sk2);
- unix_state_lock_nested(sk1);
- }
+ if (sk1 > sk2)
+ swap(sk1, sk2);
+
+ unix_state_lock(sk1);
+ unix_state_lock_nested(sk2, U_LOCK_SECOND);
}
static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
@@ -1381,6 +1378,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
if (err)
goto out;
+ err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
+ if (err)
+ goto out;
+
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
!unix_sk(sk)->addr) {
@@ -1490,6 +1491,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (err)
goto out;
+ err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
+ if (err)
+ goto out;
+
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
err = unix_autobind(sk);
@@ -1584,7 +1589,7 @@ restart:
goto out_unlock;
}
- unix_state_lock_nested(sk);
+ unix_state_lock_nested(sk, U_LOCK_SECOND);
if (sk->sk_state != st) {
unix_state_unlock(sk);
@@ -1770,6 +1775,13 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
} else {
err = addr->len;
memcpy(sunaddr, addr->name, addr->len);
+
+ if (peer)
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
+ CGROUP_UNIX_GETPEERNAME);
+ else
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
+ CGROUP_UNIX_GETSOCKNAME);
}
sock_put(sk);
out:
@@ -1922,6 +1934,13 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
err = unix_validate_addr(sunaddr, msg->msg_namelen);
if (err)
goto out;
+
+ err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
+ msg->msg_name,
+ &msg->msg_namelen,
+ NULL);
+ if (err)
+ goto out;
} else {
sunaddr = NULL;
err = -ENOTCONN;
@@ -2390,9 +2409,14 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
EPOLLOUT | EPOLLWRNORM |
EPOLLWRBAND);
- if (msg->msg_name)
+ if (msg->msg_name) {
unix_copy_addr(msg, skb->sk);
+ BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
+ msg->msg_name,
+ &msg->msg_namelen);
+ }
+
if (size > skb->len - skip)
size = skb->len - skip;
else if (size < skb->len - skip)
@@ -2553,15 +2577,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
if (!(state->flags & MSG_PEEK))
WRITE_ONCE(u->oob_skb, NULL);
-
+ else
+ skb_get(oob_skb);
unix_state_unlock(sk);
chunk = state->recv_actor(oob_skb, 0, chunk, state);
- if (!(state->flags & MSG_PEEK)) {
+ if (!(state->flags & MSG_PEEK))
UNIXCB(oob_skb).consumed += 1;
- kfree_skb(oob_skb);
- }
+
+ consume_skb(oob_skb);
mutex_unlock(&u->iolock);
@@ -2744,6 +2769,11 @@ unlock:
DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
state->msg->msg_name);
unix_copy_addr(state->msg, skb->sk);
+
+ BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
+ state->msg->msg_name,
+ &state->msg->msg_namelen);
+
sunaddr = NULL;
}
@@ -3311,7 +3341,7 @@ static const struct seq_operations unix_seq_ops = {
.show = unix_seq_show,
};
-#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
+#ifdef CONFIG_BPF_SYSCALL
struct bpf_unix_iter_state {
struct seq_net_private p;
unsigned int cur_sk;
@@ -3573,7 +3603,7 @@ static struct pernet_operations unix_net_ops = {
.exit = unix_net_exit,
};
-#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
struct unix_sock *unix_sk, uid_t uid)
@@ -3673,7 +3703,7 @@ static int __init af_unix_init(void)
register_pernet_subsys(&unix_net_ops);
unix_bpf_build_proto();
-#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
bpf_iter_register();
#endif
@@ -3681,20 +3711,5 @@ out:
return rc;
}
-static void __exit af_unix_exit(void)
-{
- sock_unregister(PF_UNIX);
- proto_unregister(&unix_dgram_proto);
- proto_unregister(&unix_stream_proto);
- unregister_pernet_subsys(&unix_net_ops);
-}
-
-/* Earlier than device_initcall() so that other drivers invoking
- request_module() don't end up in a loop when modprobe tries
- to use a UNIX socket. But later than subsys_initcall() because
- we depend on stuff initialised there */
+/* Later than subsys_initcall() because we depend on stuff initialised there */
fs_initcall(af_unix_init);
-module_exit(af_unix_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_UNIX);
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 616b55c5b890..be19827eca36 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
* queue lock. With the other's queue locked it's
* OK to lock the state.
*/
- unix_state_lock_nested(req);
+ unix_state_lock_nested(req, U_LOCK_DIAG);
peer = unix_sk(req)->peer;
buf[i++] = (peer ? sock_i_ino(peer) : 0);
unix_state_unlock(req);
@@ -339,4 +339,5 @@ static void __exit unix_diag_exit(void)
module_init(unix_diag_init);
module_exit(unix_diag_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("UNIX socket monitoring via SOCK_DIAG");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 1 /* AF_LOCAL */);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2405f0f9af31..8f63f0b4bf01 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -314,6 +314,17 @@ void unix_gc(void)
/* Here we are. Hitlist is filled. Die. */
__skb_queue_purge(&hitlist);
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ list_for_each_entry_safe(u, next, &gc_candidates, link) {
+ struct sk_buff *skb = u->oob_skb;
+
+ if (skb) {
+ u->oob_skb = NULL;
+ kfree_skb(skb);
+ }
+ }
+#endif
+
spin_lock(&unix_gc_lock);
/* There could be io_uring registered files, just push them back to
diff --git a/net/unix/scm.c b/net/unix/scm.c
index e9dde7176c8a..822ce0d0d791 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -35,10 +35,8 @@ struct sock *unix_get_socket(struct file *filp)
/* PF_UNIX ? */
if (s && ops && ops->family == PF_UNIX)
u_sock = s;
- } else {
- /* Could be an io_uring instance */
- u_sock = io_uring_get_socket(filp);
}
+
return u_sock;
}
EXPORT_SYMBOL(unix_get_socket);
@@ -64,7 +62,7 @@ void unix_inflight(struct user_struct *user, struct file *fp)
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
}
- user->unix_inflight++;
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
spin_unlock(&unix_gc_lock);
}
@@ -85,7 +83,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
}
- user->unix_inflight--;
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
spin_unlock(&unix_gc_lock);
}
@@ -99,7 +97,7 @@ static inline bool too_many_unix_fds(struct task_struct *p)
{
struct user_struct *user = current_user();
- if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+ if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
return false;
}
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index 2f9d8271c6ec..bd84785bf8d6 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -159,12 +159,32 @@ int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool re
int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
+ struct sock *sk_pair;
+
+ /* Restore does not decrement the sk_pair reference yet because we must
+ * keep the a reference to the socket until after an RCU grace period
+ * and any pending sends have completed.
+ */
if (restore) {
sk->sk_write_space = psock->saved_write_space;
sock_replace_proto(sk, psock->sk_proto);
return 0;
}
+ /* psock_update_sk_prot can be called multiple times if psock is
+ * added to multiple maps and/or slots in the same map. There is
+ * also an edge case where replacing a psock with itself can trigger
+ * an extra psock_update_sk_prot during the insert process. So it
+ * must be safe to do multiple calls. Here we need to ensure we don't
+ * increment the refcnt through sock_hold many times. There will only
+ * be a single matching destroy operation.
+ */
+ if (!psock->sk_pair) {
+ sk_pair = unix_peer(sk);
+ sock_hold(sk_pair);
+ psock->sk_pair = sk_pair;
+ }
+
unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
sock_replace_proto(sk, &unix_stream_bpf_prot);
return 0;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 020cf17ab7e4..54ba7316f808 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -89,6 +89,7 @@
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/cred.h>
+#include <linux/errqueue.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/kernel.h>
@@ -110,6 +111,7 @@
#include <linux/workqueue.h>
#include <net/sock.h>
#include <net/af_vsock.h>
+#include <uapi/linux/vm_sockets.h>
static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
static void vsock_sk_destruct(struct sock *sk);
@@ -1030,7 +1032,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
poll_wait(file, sk_sleep(sk), wait);
mask = 0;
- if (sk->sk_err)
+ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
/* Signify that there has been an error on this socket. */
mask |= EPOLLERR;
@@ -1404,6 +1406,17 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
goto out;
}
+ if (vsock_msgzerocopy_allow(transport)) {
+ set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
+ } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
+ /* If this option was set before 'connect()',
+ * when transport was unknown, check that this
+ * feature is supported here.
+ */
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
err = vsock_auto_bind(vsk);
if (err)
goto out;
@@ -1558,6 +1571,9 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
} else {
newsock->state = SS_CONNECTED;
sock_graft(connected, newsock);
+ if (vsock_msgzerocopy_allow(vconnected->transport))
+ set_bit(SOCK_SUPPORT_ZC,
+ &connected->sk_socket->flags);
}
release_sock(connected);
@@ -1635,7 +1651,7 @@ static int vsock_connectible_setsockopt(struct socket *sock,
const struct vsock_transport *transport;
u64 val;
- if (level != AF_VSOCK)
+ if (level != AF_VSOCK && level != SOL_SOCKET)
return -ENOPROTOOPT;
#define COPY_IN(_v) \
@@ -1658,6 +1674,33 @@ static int vsock_connectible_setsockopt(struct socket *sock,
transport = vsk->transport;
+ if (level == SOL_SOCKET) {
+ int zerocopy;
+
+ if (optname != SO_ZEROCOPY) {
+ release_sock(sk);
+ return sock_setsockopt(sock, level, optname, optval, optlen);
+ }
+
+ /* Use 'int' type here, because variable to
+ * set this option usually has this type.
+ */
+ COPY_IN(zerocopy);
+
+ if (zerocopy < 0 || zerocopy > 1) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ if (transport && !vsock_msgzerocopy_allow(transport)) {
+ err = -EOPNOTSUPP;
+ goto exit;
+ }
+
+ sock_valbool_flag(sk, SOCK_ZEROCOPY, zerocopy);
+ goto exit;
+ }
+
switch (optname) {
case SO_VM_SOCKETS_BUFFER_SIZE:
COPY_IN(val);
@@ -1822,6 +1865,12 @@ static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg,
goto out;
}
+ if (msg->msg_flags & MSG_ZEROCOPY &&
+ !vsock_msgzerocopy_allow(transport)) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
/* Wait for room in the produce queue to enqueue our user's data. */
timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
@@ -1921,6 +1970,9 @@ out_err:
err = total_written;
}
out:
+ if (sk->sk_type == SOCK_STREAM)
+ err = sk_stream_error(sk, msg->msg_flags, err);
+
release_sock(sk);
return err;
}
@@ -2134,6 +2186,10 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int err;
sk = sock->sk;
+
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return sock_recv_errqueue(sk, msg, len, SOL_VSOCK, VSOCK_RECVERR);
+
vsk = vsock_sk(sk);
err = 0;
@@ -2208,8 +2264,13 @@ static int vsock_set_rcvlowat(struct sock *sk, int val)
transport = vsk->transport;
- if (transport && transport->set_rcvlowat)
- return transport->set_rcvlowat(vsk, val);
+ if (transport && transport->notify_set_rcvlowat) {
+ int err;
+
+ err = transport->notify_set_rcvlowat(vsk, val);
+ if (err)
+ return err;
+ }
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
return 0;
@@ -2301,6 +2362,12 @@ static int vsock_create(struct net *net, struct socket *sock,
}
}
+ /* SOCK_DGRAM doesn't have 'setsockopt' callback set in its
+ * proto_ops, so there is no handler for custom logic.
+ */
+ if (sock_type_connectible(sock->type))
+ set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
+
vsock_insert_unbound(vsk);
return 0;
diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c
index a2823b1c5e28..2e29994f92ff 100644
--- a/net/vmw_vsock/diag.c
+++ b/net/vmw_vsock/diag.c
@@ -174,5 +174,6 @@ static void __exit vsock_diag_exit(void)
module_init(vsock_diag_init);
module_exit(vsock_diag_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("VMware Virtual Sockets monitoring via SOCK_DIAG");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG,
40 /* AF_VSOCK */);
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 7cb1a9d2cdb4..e2157e387217 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -816,7 +816,7 @@ int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
}
static
-int hvs_set_rcvlowat(struct vsock_sock *vsk, int val)
+int hvs_notify_set_rcvlowat(struct vsock_sock *vsk, int val)
{
return -EOPNOTSUPP;
}
@@ -856,7 +856,7 @@ static struct vsock_transport hvs_transport = {
.notify_send_pre_enqueue = hvs_notify_send_pre_enqueue,
.notify_send_post_enqueue = hvs_notify_send_post_enqueue,
- .set_rcvlowat = hvs_set_rcvlowat
+ .notify_set_rcvlowat = hvs_notify_set_rcvlowat
};
static bool hvs_check_transport(struct vsock_sock *vsk)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index e95df847176b..1748268e0694 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -63,6 +63,17 @@ struct virtio_vsock {
u32 guest_cid;
bool seqpacket_allow;
+
+ /* These fields are used only in tx path in function
+ * 'virtio_transport_send_pkt_work()', so to save
+ * stack space in it, place both of them here. Each
+ * pointer from 'out_sgs' points to the corresponding
+ * element in 'out_bufs' - this is initialized in
+ * 'virtio_vsock_probe()'. Both fields are protected
+ * by 'tx_lock'. +1 is needed for packet header.
+ */
+ struct scatterlist *out_sgs[MAX_SKB_FRAGS + 1];
+ struct scatterlist out_bufs[MAX_SKB_FRAGS + 1];
};
static u32 virtio_transport_get_local_cid(void)
@@ -100,8 +111,8 @@ virtio_transport_send_pkt_work(struct work_struct *work)
vq = vsock->vqs[VSOCK_VQ_TX];
for (;;) {
- struct scatterlist hdr, buf, *sgs[2];
int ret, in_sg = 0, out_sg = 0;
+ struct scatterlist **sgs;
struct sk_buff *skb;
bool reply;
@@ -111,12 +122,43 @@ virtio_transport_send_pkt_work(struct work_struct *work)
virtio_transport_deliver_tap_pkt(skb);
reply = virtio_vsock_skb_reply(skb);
-
- sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
- sgs[out_sg++] = &hdr;
- if (skb->len > 0) {
- sg_init_one(&buf, skb->data, skb->len);
- sgs[out_sg++] = &buf;
+ sgs = vsock->out_sgs;
+ sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb),
+ sizeof(*virtio_vsock_hdr(skb)));
+ out_sg++;
+
+ if (!skb_is_nonlinear(skb)) {
+ if (skb->len > 0) {
+ sg_init_one(sgs[out_sg], skb->data, skb->len);
+ out_sg++;
+ }
+ } else {
+ struct skb_shared_info *si;
+ int i;
+
+ /* If skb is nonlinear, then its buffer must contain
+ * only header and nothing more. Data is stored in
+ * the fragged part.
+ */
+ WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb)));
+
+ si = skb_shinfo(skb);
+
+ for (i = 0; i < si->nr_frags; i++) {
+ skb_frag_t *skb_frag = &si->frags[i];
+ void *va;
+
+ /* We will use 'page_to_virt()' for the userspace page
+ * here, because virtio or dma-mapping layers will call
+ * 'virt_to_phys()' later to fill the buffer descriptor.
+ * We don't touch memory at "virtual" address of this page.
+ */
+ va = page_to_virt(skb_frag_page(skb_frag));
+ sg_init_one(sgs[out_sg],
+ va + skb_frag_off(skb_frag),
+ skb_frag_size(skb_frag));
+ out_sg++;
+ }
}
ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
@@ -413,6 +455,42 @@ static void virtio_vsock_rx_done(struct virtqueue *vq)
queue_work(virtio_vsock_workqueue, &vsock->rx_work);
}
+static bool virtio_transport_can_msgzerocopy(int bufs_num)
+{
+ struct virtio_vsock *vsock;
+ bool res = false;
+
+ rcu_read_lock();
+
+ vsock = rcu_dereference(the_virtio_vsock);
+ if (vsock) {
+ struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX];
+
+ /* Check that tx queue is large enough to keep whole
+ * data to send. This is needed, because when there is
+ * not enough free space in the queue, current skb to
+ * send will be reinserted to the head of tx list of
+ * the socket to retry transmission later, so if skb
+ * is bigger than whole queue, it will be reinserted
+ * again and again, thus blocking other skbs to be sent.
+ * Each page of the user provided buffer will be added
+ * as a single buffer to the tx virtqueue, so compare
+ * number of pages against maximum capacity of the queue.
+ */
+ if (bufs_num <= vq->num_max)
+ res = true;
+ }
+
+ rcu_read_unlock();
+
+ return res;
+}
+
+static bool virtio_transport_msgzerocopy_allow(void)
+{
+ return true;
+}
+
static bool virtio_transport_seqpacket_allow(u32 remote_cid);
static struct virtio_transport virtio_transport = {
@@ -446,6 +524,8 @@ static struct virtio_transport virtio_transport = {
.seqpacket_allow = virtio_transport_seqpacket_allow,
.seqpacket_has_data = virtio_transport_seqpacket_has_data,
+ .msgzerocopy_allow = virtio_transport_msgzerocopy_allow,
+
.notify_poll_in = virtio_transport_notify_poll_in,
.notify_poll_out = virtio_transport_notify_poll_out,
.notify_recv_init = virtio_transport_notify_recv_init,
@@ -457,11 +537,13 @@ static struct virtio_transport virtio_transport = {
.notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
.notify_buffer_size = virtio_transport_notify_buffer_size,
+ .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
.read_skb = virtio_transport_read_skb,
},
.send_pkt = virtio_transport_send_pkt,
+ .can_msgzerocopy = virtio_transport_can_msgzerocopy,
};
static bool virtio_transport_seqpacket_allow(u32 remote_cid)
@@ -555,6 +637,11 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock)
virtio_device_ready(vdev);
+ return 0;
+}
+
+static void virtio_vsock_vqs_start(struct virtio_vsock *vsock)
+{
mutex_lock(&vsock->tx_lock);
vsock->tx_run = true;
mutex_unlock(&vsock->tx_lock);
@@ -569,7 +656,16 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock)
vsock->event_run = true;
mutex_unlock(&vsock->event_lock);
- return 0;
+ /* virtio_transport_send_pkt() can queue packets once
+ * the_virtio_vsock is set, but they won't be processed until
+ * vsock->tx_run is set to true. We queue vsock->send_pkt_work
+ * when initialization finishes to send those packets queued
+ * earlier.
+ * We don't need to queue the other workers (rx, event) because
+ * as long as we don't fill the queues with empty buffers, the
+ * host can't send us any notification.
+ */
+ queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
}
static void virtio_vsock_vqs_del(struct virtio_vsock *vsock)
@@ -621,6 +717,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
{
struct virtio_vsock *vsock = NULL;
int ret;
+ int i;
ret = mutex_lock_interruptible(&the_virtio_vsock_mutex);
if (ret)
@@ -663,7 +760,11 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
if (ret < 0)
goto out;
+ for (i = 0; i < ARRAY_SIZE(vsock->out_sgs); i++)
+ vsock->out_sgs[i] = &vsock->out_bufs[i];
+
rcu_assign_pointer(the_virtio_vsock, vsock);
+ virtio_vsock_vqs_start(vsock);
mutex_unlock(&the_virtio_vsock_mutex);
@@ -736,6 +837,7 @@ static int virtio_vsock_restore(struct virtio_device *vdev)
goto out;
rcu_assign_pointer(the_virtio_vsock, vsock);
+ virtio_vsock_vqs_start(vsock);
out:
mutex_unlock(&the_virtio_vsock_mutex);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 352d042b130b..16ff976a86e3 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -37,27 +37,88 @@ virtio_transport_get_ops(struct vsock_sock *vsk)
return container_of(t, struct virtio_transport, transport);
}
-/* Returns a new packet on success, otherwise returns NULL.
- *
- * If NULL is returned, errp is set to a negative errno.
- */
-static struct sk_buff *
-virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
- size_t len,
- u32 src_cid,
- u32 src_port,
- u32 dst_cid,
- u32 dst_port)
-{
- const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len;
- struct virtio_vsock_hdr *hdr;
- struct sk_buff *skb;
- void *payload;
- int err;
+static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
+ struct virtio_vsock_pkt_info *info,
+ size_t pkt_len)
+{
+ struct iov_iter *iov_iter;
- skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
- if (!skb)
- return NULL;
+ if (!info->msg)
+ return false;
+
+ iov_iter = &info->msg->msg_iter;
+
+ if (iov_iter->iov_offset)
+ return false;
+
+ /* We can't send whole iov. */
+ if (iov_iter->count > pkt_len)
+ return false;
+
+ /* Check that transport can send data in zerocopy mode. */
+ t_ops = virtio_transport_get_ops(info->vsk);
+
+ if (t_ops->can_msgzerocopy) {
+ int pages_to_send = iov_iter_npages(iov_iter, MAX_SKB_FRAGS);
+
+ /* +1 is for packet header. */
+ return t_ops->can_msgzerocopy(pages_to_send + 1);
+ }
+
+ return true;
+}
+
+static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk,
+ struct sk_buff *skb,
+ struct msghdr *msg,
+ bool zerocopy)
+{
+ struct ubuf_info *uarg;
+
+ if (msg->msg_ubuf) {
+ uarg = msg->msg_ubuf;
+ net_zcopy_get(uarg);
+ } else {
+ struct iov_iter *iter = &msg->msg_iter;
+ struct ubuf_info_msgzc *uarg_zc;
+
+ uarg = msg_zerocopy_realloc(sk_vsock(vsk),
+ iter->count,
+ NULL);
+ if (!uarg)
+ return -1;
+
+ uarg_zc = uarg_to_msgzc(uarg);
+ uarg_zc->zerocopy = zerocopy ? 1 : 0;
+ }
+
+ skb_zcopy_init(skb, uarg);
+
+ return 0;
+}
+
+static int virtio_transport_fill_skb(struct sk_buff *skb,
+ struct virtio_vsock_pkt_info *info,
+ size_t len,
+ bool zcopy)
+{
+ if (zcopy)
+ return __zerocopy_sg_from_iter(info->msg, NULL, skb,
+ &info->msg->msg_iter,
+ len);
+
+ return memcpy_from_msg(skb_put(skb, len), info->msg, len);
+}
+
+static void virtio_transport_init_hdr(struct sk_buff *skb,
+ struct virtio_vsock_pkt_info *info,
+ size_t payload_len,
+ u32 src_cid,
+ u32 src_port,
+ u32 dst_cid,
+ u32 dst_port)
+{
+ struct virtio_vsock_hdr *hdr;
hdr = virtio_vsock_hdr(skb);
hdr->type = cpu_to_le16(info->type);
@@ -67,43 +128,30 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
hdr->src_port = cpu_to_le32(src_port);
hdr->dst_port = cpu_to_le32(dst_port);
hdr->flags = cpu_to_le32(info->flags);
- hdr->len = cpu_to_le32(len);
-
- if (info->msg && len > 0) {
- payload = skb_put(skb, len);
- err = memcpy_from_msg(payload, info->msg, len);
- if (err)
- goto out;
-
- if (msg_data_left(info->msg) == 0 &&
- info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
- hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
-
- if (info->msg->msg_flags & MSG_EOR)
- hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
- }
- }
+ hdr->len = cpu_to_le32(payload_len);
+ hdr->buf_alloc = cpu_to_le32(0);
+ hdr->fwd_cnt = cpu_to_le32(0);
+}
- if (info->reply)
- virtio_vsock_skb_set_reply(skb);
+static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb,
+ void *dst,
+ size_t len)
+{
+ struct iov_iter iov_iter = { 0 };
+ struct kvec kvec;
+ size_t to_copy;
- trace_virtio_transport_alloc_pkt(src_cid, src_port,
- dst_cid, dst_port,
- len,
- info->type,
- info->op,
- info->flags);
+ kvec.iov_base = dst;
+ kvec.iov_len = len;
- if (info->vsk && !skb_set_owner_sk_safe(skb, sk_vsock(info->vsk))) {
- WARN_ONCE(1, "failed to allocate skb on vsock socket with sk_refcnt == 0\n");
- goto out;
- }
+ iov_iter.iter_type = ITER_KVEC;
+ iov_iter.kvec = &kvec;
+ iov_iter.nr_segs = 1;
- return skb;
+ to_copy = min_t(size_t, len, skb->len);
-out:
- kfree_skb(skb);
- return NULL;
+ skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
+ &iov_iter, to_copy);
}
/* Packet capture */
@@ -114,7 +162,6 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
struct af_vsockmon_hdr *hdr;
struct sk_buff *skb;
size_t payload_len;
- void *payload_buf;
/* A packet could be split to fit the RX buffer, so we can retrieve
* the payload length from the header and the buffer pointer taking
@@ -122,7 +169,6 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
*/
pkt_hdr = virtio_vsock_hdr(pkt);
payload_len = pkt->len;
- payload_buf = pkt->data;
skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len,
GFP_ATOMIC);
@@ -165,7 +211,13 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr));
if (payload_len) {
- skb_put_data(skb, payload_buf, payload_len);
+ if (skb_is_nonlinear(pkt)) {
+ void *data = skb_put(skb, payload_len);
+
+ virtio_transport_copy_nonlinear_skb(pkt, data, payload_len);
+ } else {
+ skb_put_data(skb, pkt->data, payload_len);
+ }
}
return skb;
@@ -189,6 +241,82 @@ static u16 virtio_transport_get_type(struct sock *sk)
return VIRTIO_VSOCK_TYPE_SEQPACKET;
}
+/* Returns new sk_buff on success, otherwise returns NULL. */
+static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
+ size_t payload_len,
+ bool zcopy,
+ u32 src_cid,
+ u32 src_port,
+ u32 dst_cid,
+ u32 dst_port)
+{
+ struct vsock_sock *vsk;
+ struct sk_buff *skb;
+ size_t skb_len;
+
+ skb_len = VIRTIO_VSOCK_SKB_HEADROOM;
+
+ if (!zcopy)
+ skb_len += payload_len;
+
+ skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
+ if (!skb)
+ return NULL;
+
+ virtio_transport_init_hdr(skb, info, payload_len, src_cid, src_port,
+ dst_cid, dst_port);
+
+ vsk = info->vsk;
+
+ /* If 'vsk' != NULL then payload is always present, so we
+ * will never call '__zerocopy_sg_from_iter()' below without
+ * setting skb owner in 'skb_set_owner_w()'. The only case
+ * when 'vsk' == NULL is VIRTIO_VSOCK_OP_RST control message
+ * without payload.
+ */
+ WARN_ON_ONCE(!(vsk && (info->msg && payload_len)) && zcopy);
+
+ /* Set owner here, because '__zerocopy_sg_from_iter()' uses
+ * owner of skb without check to update 'sk_wmem_alloc'.
+ */
+ if (vsk)
+ skb_set_owner_w(skb, sk_vsock(vsk));
+
+ if (info->msg && payload_len > 0) {
+ int err;
+
+ err = virtio_transport_fill_skb(skb, info, payload_len, zcopy);
+ if (err)
+ goto out;
+
+ if (msg_data_left(info->msg) == 0 &&
+ info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+
+ hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+
+ if (info->msg->msg_flags & MSG_EOR)
+ hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ }
+ }
+
+ if (info->reply)
+ virtio_vsock_skb_set_reply(skb);
+
+ trace_virtio_transport_alloc_pkt(src_cid, src_port,
+ dst_cid, dst_port,
+ payload_len,
+ info->type,
+ info->op,
+ info->flags,
+ zcopy);
+
+ return skb;
+out:
+ kfree_skb(skb);
+ return NULL;
+}
+
/* This function can only be used on connecting/connected sockets,
* since a socket assigned to a transport is required.
*
@@ -197,10 +325,12 @@ static u16 virtio_transport_get_type(struct sock *sk)
static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
struct virtio_vsock_pkt_info *info)
{
+ u32 max_skb_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
u32 src_cid, src_port, dst_cid, dst_port;
const struct virtio_transport *t_ops;
struct virtio_vsock_sock *vvs;
u32 pkt_len = info->pkt_len;
+ bool can_zcopy = false;
u32 rest_len;
int ret;
@@ -229,15 +359,30 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
return pkt_len;
+ if (info->msg) {
+ /* If zerocopy is not enabled by 'setsockopt()', we behave as
+ * there is no MSG_ZEROCOPY flag set.
+ */
+ if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY))
+ info->msg->msg_flags &= ~MSG_ZEROCOPY;
+
+ if (info->msg->msg_flags & MSG_ZEROCOPY)
+ can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
+
+ if (can_zcopy)
+ max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
+ (MAX_SKB_FRAGS * PAGE_SIZE));
+ }
+
rest_len = pkt_len;
do {
struct sk_buff *skb;
size_t skb_len;
- skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, rest_len);
+ skb_len = min(max_skb_len, rest_len);
- skb = virtio_transport_alloc_skb(info, skb_len,
+ skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy,
src_cid, src_port,
dst_cid, dst_port);
if (!skb) {
@@ -245,6 +390,21 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
break;
}
+ /* We process buffer part by part, allocating skb on
+ * each iteration. If this is last skb for this buffer
+ * and MSG_ZEROCOPY mode is in use - we must allocate
+ * completion for the current syscall.
+ */
+ if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY &&
+ skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) {
+ if (virtio_transport_init_zcopy_skb(vsk, skb,
+ info->msg,
+ can_zcopy)) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+
virtio_transport_inc_tx_pkt(vvs, skb);
ret = t_ops->send_pkt(skb);
@@ -364,9 +524,10 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk,
spin_unlock_bh(&vvs->rx_lock);
/* sk_lock is held by caller so no one else can dequeue.
- * Unlock rx_lock since memcpy_to_msg() may sleep.
+ * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
*/
- err = memcpy_to_msg(msg, skb->data, bytes);
+ err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
+ &msg->msg_iter, bytes);
if (err)
goto out;
@@ -396,6 +557,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
struct virtio_vsock_sock *vvs = vsk->trans;
size_t bytes, total = 0;
struct sk_buff *skb;
+ u32 fwd_cnt_delta;
+ bool low_rx_bytes;
int err = -EFAULT;
u32 free_space;
@@ -410,25 +573,27 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
skb = skb_peek(&vvs->rx_queue);
- bytes = len - total;
- if (bytes > skb->len)
- bytes = skb->len;
+ bytes = min_t(size_t, len - total,
+ skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset);
/* sk_lock is held by caller so no one else can dequeue.
- * Unlock rx_lock since memcpy_to_msg() may sleep.
+ * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
*/
spin_unlock_bh(&vvs->rx_lock);
- err = memcpy_to_msg(msg, skb->data, bytes);
+ err = skb_copy_datagram_iter(skb,
+ VIRTIO_VSOCK_SKB_CB(skb)->offset,
+ &msg->msg_iter, bytes);
if (err)
goto out;
spin_lock_bh(&vvs->rx_lock);
total += bytes;
- skb_pull(skb, bytes);
- if (skb->len == 0) {
+ VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes;
+
+ if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) {
u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
virtio_transport_dec_rx_pkt(vvs, pkt_len);
@@ -437,7 +602,10 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
}
}
- free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
+ fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt;
+ free_space = vvs->buf_alloc - fwd_cnt_delta;
+ low_rx_bytes = (vvs->rx_bytes <
+ sock_rcvlowat(sk_vsock(vsk), 0, INT_MAX));
spin_unlock_bh(&vvs->rx_lock);
@@ -447,9 +615,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
* too high causes extra messages. Too low causes transmitter
* stalls. As stalls are in theory more expensive than extra
* messages, we set the limit to a high value. TODO: experiment
- * with different values.
+ * with different values. Also send credit update message when
+ * number of bytes in rx queue is not enough to wake up reader.
*/
- if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
+ if (fwd_cnt_delta &&
+ (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || low_rx_bytes))
virtio_transport_send_credit_update(vsk);
return total;
@@ -492,9 +662,10 @@ virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk,
spin_unlock_bh(&vvs->rx_lock);
/* sk_lock is held by caller so no one else can dequeue.
- * Unlock rx_lock since memcpy_to_msg() may sleep.
+ * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
*/
- err = memcpy_to_msg(msg, skb->data, bytes);
+ err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
+ &msg->msg_iter, bytes);
if (err)
return err;
@@ -553,11 +724,13 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
int err;
/* sk_lock is held by caller so no one else can dequeue.
- * Unlock rx_lock since memcpy_to_msg() may sleep.
+ * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
*/
spin_unlock_bh(&vvs->rx_lock);
- err = memcpy_to_msg(msg, skb->data, bytes_to_copy);
+ err = skb_copy_datagram_iter(skb, 0,
+ &msg->msg_iter,
+ bytes_to_copy);
if (err) {
/* Copy of message failed. Rest of
* fragments will be freed without copy.
@@ -677,7 +850,7 @@ static s64 virtio_transport_has_space(struct vsock_sock *vsk)
struct virtio_vsock_sock *vvs = vsk->trans;
s64 bytes;
- bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
+ bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
if (bytes < 0)
bytes = 0;
@@ -954,7 +1127,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
if (!t)
return -ENOTCONN;
- reply = virtio_transport_alloc_skb(&info, 0,
+ reply = virtio_transport_alloc_skb(&info, 0, false,
le64_to_cpu(hdr->dst_cid),
le32_to_cpu(hdr->dst_port),
le64_to_cpu(hdr->src_cid),
@@ -1204,11 +1377,17 @@ virtio_transport_recv_connected(struct sock *sk,
vsk->peer_shutdown |= RCV_SHUTDOWN;
if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
vsk->peer_shutdown |= SEND_SHUTDOWN;
- if (vsk->peer_shutdown == SHUTDOWN_MASK &&
- vsock_stream_has_data(vsk) <= 0 &&
- !sock_flag(sk, SOCK_DONE)) {
- (void)virtio_transport_reset(vsk, NULL);
- virtio_transport_do_close(vsk, true);
+ if (vsk->peer_shutdown == SHUTDOWN_MASK) {
+ if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) {
+ (void)virtio_transport_reset(vsk, NULL);
+ virtio_transport_do_close(vsk, true);
+ }
+ /* Remove this socket anyway because the remote peer sent
+ * the shutdown. This way a new connection will succeed
+ * if the remote peer uses the same source port,
+ * even if the old socket is still unreleased, but now disconnected.
+ */
+ vsock_remove_sock(vsk);
}
if (le32_to_cpu(virtio_vsock_hdr(skb)->flags))
sk->sk_state_change(sk);
@@ -1511,6 +1690,36 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
}
EXPORT_SYMBOL_GPL(virtio_transport_read_skb);
+int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ bool send_update;
+
+ spin_lock_bh(&vvs->rx_lock);
+
+ /* If number of available bytes is less than new SO_RCVLOWAT value,
+ * kick sender to send more data, because sender may sleep in its
+ * 'send()' syscall waiting for enough space at our side. Also
+ * don't send credit update when peer already knows actual value -
+ * such transmission will be useless.
+ */
+ send_update = (vvs->rx_bytes < val) &&
+ (vvs->fwd_cnt != vvs->last_fwd_cnt);
+
+ spin_unlock_bh(&vvs->rx_lock);
+
+ if (send_update) {
+ int err;
+
+ err = virtio_transport_send_credit_update(vsk);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(virtio_transport_notify_set_rcvlowat);
+
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Asias He");
MODULE_DESCRIPTION("common code for virtio vsock");
diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
index 5c6360df1f31..6dea6119f5b2 100644
--- a/net/vmw_vsock/vsock_loopback.c
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -47,6 +47,10 @@ static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk)
}
static bool vsock_loopback_seqpacket_allow(u32 remote_cid);
+static bool vsock_loopback_msgzerocopy_allow(void)
+{
+ return true;
+}
static struct virtio_transport loopback_transport = {
.transport = {
@@ -79,6 +83,8 @@ static struct virtio_transport loopback_transport = {
.seqpacket_allow = vsock_loopback_seqpacket_allow,
.seqpacket_has_data = virtio_transport_seqpacket_has_data,
+ .msgzerocopy_allow = vsock_loopback_msgzerocopy_allow,
+
.notify_poll_in = virtio_transport_notify_poll_in,
.notify_poll_out = virtio_transport_notify_poll_out,
.notify_recv_init = virtio_transport_notify_recv_init,
@@ -90,6 +96,7 @@ static struct virtio_transport loopback_transport = {
.notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
.notify_buffer_size = virtio_transport_notify_buffer_size,
+ .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
.read_skb = virtio_transport_read_skb,
},
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index f620acd2a0f5..10345388ad13 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -201,6 +201,16 @@ config CFG80211_WEXT_EXPORT
Drivers should select this option if they require cfg80211's
wext compatibility symbols to be exported.
+config CFG80211_KUNIT_TEST
+ tristate "KUnit tests for cfg80211" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ depends on CFG80211
+ default KUNIT_ALL_TESTS
+ help
+ Enable this option to test cfg80211 functions with kunit.
+
+ If unsure, say N.
+
endif # CFG80211
config LIB80211
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 527ae669f6f7..72074fd36df4 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_LIB80211) += lib80211.o
obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o
obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o
obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o
+obj-y += tests/
obj-$(CONFIG_WEXT_CORE) += wext-core.o
obj-$(CONFIG_WEXT_PROC) += wext-proc.o
@@ -24,7 +25,7 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
cfg80211-y += extra-certs.o
endif
-$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
+$(obj)/shipped-certs.c: $(sort $(wildcard $(srctree)/$(src)/certs/*.hex))
@$(kecho) " GEN $@"
$(Q)(echo '#include "reg.h"'; \
echo 'const u8 shipped_regdb_certs[] = {'; \
@@ -34,7 +35,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
) > $@
$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR) \
- $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR)/*.x509)
+ $(sort $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR)/*.x509))
@$(kecho) " GEN $@"
$(Q)(set -e; \
allf=""; \
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index 0962770303b2..9a9a870806f5 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Parts of this file are
- * Copyright (C) 2022 Intel Corporation
+ * Copyright (C) 2022-2023 Intel Corporation
*/
#include <linux/ieee80211.h>
#include <linux/export.h>
@@ -18,7 +18,7 @@ static int ___cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (!rdev->ops->stop_ap)
return -EOPNOTSUPP;
@@ -52,9 +52,9 @@ static int ___cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
return err;
}
-int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
- struct net_device *dev, int link_id,
- bool notify)
+int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, int link_id,
+ bool notify)
{
unsigned int link;
int ret = 0;
@@ -72,17 +72,3 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
return ret;
}
-
-int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
- struct net_device *dev, int link_id,
- bool notify)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- int err;
-
- wdev_lock(wdev);
- err = __cfg80211_stop_ap(rdev, dev, link_id, notify);
- wdev_unlock(wdev);
-
- return err;
-}
diff --git a/net/wireless/certs/wens.hex b/net/wireless/certs/wens.hex
new file mode 100644
index 000000000000..0d50369bede9
--- /dev/null
+++ b/net/wireless/certs/wens.hex
@@ -0,0 +1,87 @@
+/* Chen-Yu Tsai's regdb certificate */
+0x30, 0x82, 0x02, 0xa7, 0x30, 0x82, 0x01, 0x8f,
+0x02, 0x14, 0x61, 0xc0, 0x38, 0x65, 0x1a, 0xab,
+0xdc, 0xf9, 0x4b, 0xd0, 0xac, 0x7f, 0xf0, 0x6c,
+0x72, 0x48, 0xdb, 0x18, 0xc6, 0x00, 0x30, 0x0d,
+0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d,
+0x01, 0x01, 0x0b, 0x05, 0x00, 0x30, 0x0f, 0x31,
+0x0d, 0x30, 0x0b, 0x06, 0x03, 0x55, 0x04, 0x03,
+0x0c, 0x04, 0x77, 0x65, 0x6e, 0x73, 0x30, 0x20,
+0x17, 0x0d, 0x32, 0x33, 0x31, 0x32, 0x30, 0x31,
+0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a, 0x18,
+0x0f, 0x32, 0x31, 0x32, 0x33, 0x31, 0x31, 0x30,
+0x37, 0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a,
+0x30, 0x0f, 0x31, 0x0d, 0x30, 0x0b, 0x06, 0x03,
+0x55, 0x04, 0x03, 0x0c, 0x04, 0x77, 0x65, 0x6e,
+0x73, 0x30, 0x82, 0x01, 0x22, 0x30, 0x0d, 0x06,
+0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01,
+0x01, 0x01, 0x05, 0x00, 0x03, 0x82, 0x01, 0x0f,
+0x00, 0x30, 0x82, 0x01, 0x0a, 0x02, 0x82, 0x01,
+0x01, 0x00, 0xa9, 0x7a, 0x2c, 0x78, 0x4d, 0xa7,
+0x19, 0x2d, 0x32, 0x52, 0xa0, 0x2e, 0x6c, 0xef,
+0x88, 0x7f, 0x15, 0xc5, 0xb6, 0x69, 0x54, 0x16,
+0x43, 0x14, 0x79, 0x53, 0xb7, 0xae, 0x88, 0xfe,
+0xc0, 0xb7, 0x5d, 0x47, 0x8e, 0x1a, 0xe1, 0xef,
+0xb3, 0x90, 0x86, 0xda, 0xd3, 0x64, 0x81, 0x1f,
+0xce, 0x5d, 0x9e, 0x4b, 0x6e, 0x58, 0x02, 0x3e,
+0xb2, 0x6f, 0x5e, 0x42, 0x47, 0x41, 0xf4, 0x2c,
+0xb8, 0xa8, 0xd4, 0xaa, 0xc0, 0x0e, 0xe6, 0x48,
+0xf0, 0xa8, 0xce, 0xcb, 0x08, 0xae, 0x37, 0xaf,
+0xf6, 0x40, 0x39, 0xcb, 0x55, 0x6f, 0x5b, 0x4f,
+0x85, 0x34, 0xe6, 0x69, 0x10, 0x50, 0x72, 0x5e,
+0x4e, 0x9d, 0x4c, 0xba, 0x38, 0x36, 0x0d, 0xce,
+0x73, 0x38, 0xd7, 0x27, 0x02, 0x2a, 0x79, 0x03,
+0xe1, 0xac, 0xcf, 0xb0, 0x27, 0x85, 0x86, 0x93,
+0x17, 0xab, 0xec, 0x42, 0x77, 0x37, 0x65, 0x8a,
+0x44, 0xcb, 0xd6, 0x42, 0x93, 0x92, 0x13, 0xe3,
+0x39, 0x45, 0xc5, 0x6e, 0x00, 0x4a, 0x7f, 0xcb,
+0x42, 0x17, 0x2b, 0x25, 0x8c, 0xb8, 0x17, 0x3b,
+0x15, 0x36, 0x59, 0xde, 0x42, 0xce, 0x21, 0xe6,
+0xb6, 0xc7, 0x6e, 0x5e, 0x26, 0x1f, 0xf7, 0x8a,
+0x57, 0x9e, 0xa5, 0x96, 0x72, 0xb7, 0x02, 0x32,
+0xeb, 0x07, 0x2b, 0x73, 0xe2, 0x4f, 0x66, 0x58,
+0x9a, 0xeb, 0x0f, 0x07, 0xb6, 0xab, 0x50, 0x8b,
+0xc3, 0x8f, 0x17, 0xfa, 0x0a, 0x99, 0xc2, 0x16,
+0x25, 0xbf, 0x2d, 0x6b, 0x1a, 0xaa, 0xe6, 0x3e,
+0x5f, 0xeb, 0x6d, 0x9b, 0x5d, 0x4d, 0x42, 0x83,
+0x2d, 0x39, 0xb8, 0xc9, 0xac, 0xdb, 0x3a, 0x91,
+0x50, 0xdf, 0xbb, 0xb1, 0x76, 0x6d, 0x15, 0x73,
+0xfd, 0xc6, 0xe6, 0x6b, 0x71, 0x9e, 0x67, 0x36,
+0x22, 0x83, 0x79, 0xb1, 0xd6, 0xb8, 0x84, 0x52,
+0xaf, 0x96, 0x5b, 0xc3, 0x63, 0x02, 0x4e, 0x78,
+0x70, 0x57, 0x02, 0x03, 0x01, 0x00, 0x01, 0x30,
+0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7,
+0x0d, 0x01, 0x01, 0x0b, 0x05, 0x00, 0x03, 0x82,
+0x01, 0x01, 0x00, 0x24, 0x28, 0xee, 0x22, 0x74,
+0x7f, 0x7c, 0xfa, 0x6c, 0x1f, 0xb3, 0x18, 0xd1,
+0xc2, 0x3d, 0x7d, 0x29, 0x42, 0x88, 0xad, 0x82,
+0xa5, 0xb1, 0x8a, 0x05, 0xd0, 0xec, 0x5c, 0x91,
+0x20, 0xf6, 0x82, 0xfd, 0xd5, 0x67, 0x60, 0x5f,
+0x31, 0xf5, 0xbd, 0x88, 0x91, 0x70, 0xbd, 0xb8,
+0xb9, 0x8c, 0x88, 0xfe, 0x53, 0xc9, 0x54, 0x9b,
+0x43, 0xc4, 0x7a, 0x43, 0x74, 0x6b, 0xdd, 0xb0,
+0xb1, 0x3b, 0x33, 0x45, 0x46, 0x78, 0xa3, 0x1c,
+0xef, 0x54, 0x68, 0xf7, 0x85, 0x9c, 0xe4, 0x51,
+0x6f, 0x06, 0xaf, 0x81, 0xdb, 0x2a, 0x7b, 0x7b,
+0x6f, 0xa8, 0x9c, 0x67, 0xd8, 0xcb, 0xc9, 0x91,
+0x40, 0x00, 0xae, 0xd9, 0xa1, 0x9f, 0xdd, 0xa6,
+0x43, 0x0e, 0x28, 0x7b, 0xaa, 0x1b, 0xe9, 0x84,
+0xdb, 0x76, 0x64, 0x42, 0x70, 0xc9, 0xc0, 0xeb,
+0xae, 0x84, 0x11, 0x16, 0x68, 0x4e, 0x84, 0x9e,
+0x7e, 0x92, 0x36, 0xee, 0x1c, 0x3b, 0x08, 0x63,
+0xeb, 0x79, 0x84, 0x15, 0x08, 0x9d, 0xaf, 0xc8,
+0x9a, 0xc7, 0x34, 0xd3, 0x94, 0x4b, 0xd1, 0x28,
+0x97, 0xbe, 0xd1, 0x45, 0x75, 0xdc, 0x35, 0x62,
+0xac, 0x1d, 0x1f, 0xb7, 0xb7, 0x15, 0x87, 0xc8,
+0x98, 0xc0, 0x24, 0x31, 0x56, 0x8d, 0xed, 0xdb,
+0x06, 0xc6, 0x46, 0xbf, 0x4b, 0x6d, 0xa6, 0xd5,
+0xab, 0xcc, 0x60, 0xfc, 0xe5, 0x37, 0xb6, 0x53,
+0x7d, 0x58, 0x95, 0xa9, 0x56, 0xc7, 0xf7, 0xee,
+0xc3, 0xa0, 0x76, 0xf7, 0x65, 0x4d, 0x53, 0xfa,
+0xff, 0x5f, 0x76, 0x33, 0x5a, 0x08, 0xfa, 0x86,
+0x92, 0x5a, 0x13, 0xfa, 0x1a, 0xfc, 0xf2, 0x1b,
+0x8c, 0x7f, 0x42, 0x6d, 0xb7, 0x7e, 0xb7, 0xb4,
+0xf0, 0xc7, 0x83, 0xbb, 0xa2, 0x81, 0x03, 0x2d,
+0xd4, 0x2a, 0x63, 0x3f, 0xf7, 0x31, 0x2e, 0x40,
+0x33, 0x5c, 0x46, 0xbc, 0x9b, 0xc1, 0x05, 0xa5,
+0x45, 0x4e, 0xc3,
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 0b7e81db383d..ceb9174c5c3d 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -6,7 +6,7 @@
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2018-2022 Intel Corporation
+ * Copyright 2018-2023 Intel Corporation
*/
#include <linux/export.h>
@@ -141,7 +141,7 @@ static bool cfg80211_edmg_chandef_valid(const struct cfg80211_chan_def *chandef)
return true;
}
-static int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width)
+int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width)
{
int mhz;
@@ -190,6 +190,7 @@ static int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width)
}
return mhz;
}
+EXPORT_SYMBOL(nl80211_chan_width_to_mhz);
static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c)
{
@@ -514,9 +515,83 @@ static u32 cfg80211_get_end_freq(u32 center_freq,
return end_freq;
}
+static bool
+cfg80211_dfs_permissive_check_wdev(struct cfg80211_registered_device *rdev,
+ enum nl80211_iftype iftype,
+ struct wireless_dev *wdev,
+ struct ieee80211_channel *chan)
+{
+ unsigned int link_id;
+
+ for_each_valid_link(wdev, link_id) {
+ struct ieee80211_channel *other_chan = NULL;
+ struct cfg80211_chan_def chandef = {};
+ int ret;
+
+ /* In order to avoid daisy chaining only allow BSS STA */
+ if (wdev->iftype != NL80211_IFTYPE_STATION ||
+ !wdev->links[link_id].client.current_bss)
+ continue;
+
+ other_chan =
+ wdev->links[link_id].client.current_bss->pub.channel;
+
+ if (!other_chan)
+ continue;
+
+ if (chan == other_chan)
+ return true;
+
+ /* continue if we can't get the channel */
+ ret = rdev_get_channel(rdev, wdev, link_id, &chandef);
+ if (ret)
+ continue;
+
+ if (cfg80211_is_sub_chan(&chandef, chan, false))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Check if P2P GO is allowed to operate on a DFS channel
+ */
+static bool cfg80211_dfs_permissive_chan(struct wiphy *wiphy,
+ enum nl80211_iftype iftype,
+ struct ieee80211_channel *chan)
+{
+ struct wireless_dev *wdev;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+
+ lockdep_assert_held(&rdev->wiphy.mtx);
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_DFS_CONCURRENT) ||
+ !(chan->flags & IEEE80211_CHAN_DFS_CONCURRENT))
+ return false;
+
+ /* only valid for P2P GO */
+ if (iftype != NL80211_IFTYPE_P2P_GO)
+ return false;
+
+ /*
+ * Allow only if there's a concurrent BSS
+ */
+ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
+ bool ret = cfg80211_dfs_permissive_check_wdev(rdev, iftype,
+ wdev, chan);
+ if (ret)
+ return ret;
+ }
+
+ return false;
+}
+
static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy,
u32 center_freq,
- u32 bandwidth)
+ u32 bandwidth,
+ enum nl80211_iftype iftype)
{
struct ieee80211_channel *c;
u32 freq, start_freq, end_freq;
@@ -529,9 +604,11 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy,
if (!c)
return -EINVAL;
- if (c->flags & IEEE80211_CHAN_RADAR)
+ if (c->flags & IEEE80211_CHAN_RADAR &&
+ !cfg80211_dfs_permissive_chan(wiphy, iftype, c))
return 1;
}
+
return 0;
}
@@ -557,7 +634,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
ret = cfg80211_get_chans_dfs_required(wiphy,
ieee80211_chandef_to_khz(chandef),
- width);
+ width, iftype);
if (ret < 0)
return ret;
else if (ret > 0)
@@ -568,7 +645,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
ret = cfg80211_get_chans_dfs_required(wiphy,
MHZ_TO_KHZ(chandef->center_freq2),
- width);
+ width, iftype);
if (ret < 0)
return ret;
else if (ret > 0)
@@ -666,6 +743,7 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy,
return (r1 + r2 > 0);
}
+EXPORT_SYMBOL(cfg80211_chandef_dfs_usable);
/*
* Checks if center frequency of chan falls with in the bandwidth
@@ -713,7 +791,7 @@ bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev)
{
unsigned int link;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
switch (wdev->iftype) {
case NL80211_IFTYPE_AP:
@@ -782,18 +860,14 @@ static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy,
{
struct wireless_dev *wdev;
+ lockdep_assert_wiphy(wiphy);
+
list_for_each_entry(wdev, &wiphy->wdev_list, list) {
- wdev_lock(wdev);
- if (!cfg80211_beaconing_iface_active(wdev)) {
- wdev_unlock(wdev);
+ if (!cfg80211_beaconing_iface_active(wdev))
continue;
- }
- if (cfg80211_wdev_on_sub_chan(wdev, chan, false)) {
- wdev_unlock(wdev);
+ if (cfg80211_wdev_on_sub_chan(wdev, chan, false))
return true;
- }
- wdev_unlock(wdev);
}
return false;
@@ -823,14 +897,18 @@ bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy,
if (!(chan->flags & IEEE80211_CHAN_RADAR))
return false;
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ for_each_rdev(rdev) {
+ bool found;
+
if (!reg_dfs_domain_same(wiphy, &rdev->wiphy))
continue;
- if (cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan))
- return true;
+ wiphy_lock(&rdev->wiphy);
+ found = cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan) ||
+ cfg80211_offchan_chain_is_active(rdev, chan);
+ wiphy_unlock(&rdev->wiphy);
- if (cfg80211_offchan_chain_is_active(rdev, chan))
+ if (found)
return true;
}
@@ -965,6 +1043,7 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy,
return max(t1, t2);
}
+EXPORT_SYMBOL(cfg80211_chandef_dfs_cac_time);
static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
u32 center_freq, u32 bandwidth,
@@ -1162,8 +1241,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
if (!sband)
return false;
- for (i = 0; i < sband->n_iftype_data; i++) {
- iftd = &sband->iftype_data[i];
+ for_each_sband_iftype_data(sband, i, iftd) {
if (!iftd->eht_cap.has_eht)
continue;
@@ -1321,10 +1399,7 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy,
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
bool ret;
- wdev_lock(wdev);
ret = cfg80211_ir_permissive_check_wdev(iftype, wdev, chan);
- wdev_unlock(wdev);
-
if (ret)
return ret;
}
@@ -1338,15 +1413,19 @@ static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy,
bool check_no_ir)
{
bool res;
- u32 prohibited_flags = IEEE80211_CHAN_DISABLED |
- IEEE80211_CHAN_RADAR;
+ u32 prohibited_flags = IEEE80211_CHAN_DISABLED;
+ int dfs_required;
trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir);
if (check_no_ir)
prohibited_flags |= IEEE80211_CHAN_NO_IR;
- if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 &&
+ dfs_required = cfg80211_chandef_dfs_required(wiphy, chandef, iftype);
+ if (dfs_required != 0)
+ prohibited_flags |= IEEE80211_CHAN_RADAR;
+
+ if (dfs_required > 0 &&
cfg80211_chandef_dfs_available(wiphy, chandef)) {
/* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */
prohibited_flags = IEEE80211_CHAN_DISABLED;
@@ -1433,17 +1512,10 @@ EXPORT_SYMBOL(cfg80211_any_usable_channels);
struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev,
unsigned int link_id)
{
- /*
- * We need to sort out the locking here - in some cases
- * where we get here we really just don't care (yet)
- * about the valid links, but in others we do. But we
- * get here with various driver cases, so we cannot
- * easily require the wdev mutex.
- */
- if (link_id || wdev->valid_links & BIT(0)) {
- ASSERT_WDEV_LOCK(wdev);
- WARN_ON(!(wdev->valid_links & BIT(link_id)));
- }
+ lockdep_assert_wiphy(wdev->wiphy);
+
+ WARN_ON(wdev->valid_links && !(wdev->valid_links & BIT(link_id)));
+ WARN_ON(!wdev->valid_links && link_id > 0);
switch (wdev->iftype) {
case NL80211_IFTYPE_MESH_POINT:
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 25bc2e50a061..3fb1b637352a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -60,7 +60,7 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx)
ASSERT_RTNL();
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ for_each_rdev(rdev) {
if (rdev->wiphy_idx == wiphy_idx) {
result = rdev;
break;
@@ -116,7 +116,7 @@ static int cfg80211_dev_check_name(struct cfg80211_registered_device *rdev,
}
/* Ensure another device does not already have this name. */
- list_for_each_entry(rdev2, &cfg80211_rdev_list, list)
+ for_each_rdev(rdev2)
if (strcmp(newname, wiphy_name(&rdev2->wiphy)) == 0)
return -EINVAL;
@@ -191,13 +191,13 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
return err;
}
+ wiphy_lock(&rdev->wiphy);
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
if (!wdev->netdev)
continue;
nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
}
- wiphy_lock(&rdev->wiphy);
nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY);
wiphy_net_set(&rdev->wiphy, net);
@@ -206,13 +206,13 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
WARN_ON(err);
nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
- wiphy_unlock(&rdev->wiphy);
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
if (!wdev->netdev)
continue;
nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
}
+ wiphy_unlock(&rdev->wiphy);
return 0;
}
@@ -221,7 +221,9 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
{
struct cfg80211_registered_device *rdev = data;
+ wiphy_lock(&rdev->wiphy);
rdev_rfkill_poll(rdev);
+ wiphy_unlock(&rdev->wiphy);
}
void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
@@ -821,6 +823,7 @@ int wiphy_register(struct wiphy *wiphy)
/* sanity check supported bands/channels */
for (band = 0; band < NUM_NL80211_BANDS; band++) {
+ const struct ieee80211_sband_iftype_data *iftd;
u16 types = 0;
bool have_he = false;
@@ -877,14 +880,11 @@ int wiphy_register(struct wiphy *wiphy)
return -EINVAL;
}
- for (i = 0; i < sband->n_iftype_data; i++) {
- const struct ieee80211_sband_iftype_data *iftd;
+ for_each_sband_iftype_data(sband, i, iftd) {
bool has_ap, has_non_ap;
u32 ap_bits = BIT(NL80211_IFTYPE_AP) |
BIT(NL80211_IFTYPE_P2P_GO);
- iftd = &sband->iftype_data[i];
-
if (WARN_ON(!iftd->types_mask))
return -EINVAL;
if (WARN_ON(types & iftd->types_mask))
@@ -1049,7 +1049,8 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy)
}
EXPORT_SYMBOL(wiphy_rfkill_start_polling);
-void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev)
+void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev,
+ struct wiphy_work *end)
{
unsigned int runaway_limit = 100;
unsigned long flags;
@@ -1068,6 +1069,10 @@ void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev)
wk->func(&rdev->wiphy, wk);
spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
+
+ if (wk == end)
+ break;
+
if (WARN_ON(--runaway_limit == 0))
INIT_LIST_HEAD(&rdev->wiphy_work_list);
}
@@ -1118,7 +1123,7 @@ void wiphy_unregister(struct wiphy *wiphy)
#endif
/* surely nothing is reachable now, clean up work */
- cfg80211_process_wiphy_works(rdev);
+ cfg80211_process_wiphy_works(rdev, NULL);
wiphy_unlock(&rdev->wiphy);
rtnl_unlock();
@@ -1181,16 +1186,11 @@ void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked,
}
EXPORT_SYMBOL(wiphy_rfkill_set_hw_state_reason);
-void cfg80211_cqm_config_free(struct wireless_dev *wdev)
-{
- kfree(wdev->cqm_config);
- wdev->cqm_config = NULL;
-}
-
static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
bool unregister_netdev)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct cfg80211_cqm_config *cqm_config;
unsigned int link_id;
ASSERT_RTNL();
@@ -1227,7 +1227,10 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
kfree_sensitive(wdev->wext.keys);
wdev->wext.keys = NULL;
#endif
- cfg80211_cqm_config_free(wdev);
+ wiphy_work_cancel(wdev->wiphy, &wdev->cqm_rssi_work);
+ /* deleted from the list, so can't be found from nl80211 any more */
+ cqm_config = rcu_access_pointer(wdev->cqm_config);
+ kfree_rcu(cqm_config, rcu_head);
/*
* Ensure that all events have been processed and
@@ -1273,14 +1276,13 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
rdev->num_running_monitor_ifaces += num;
}
-void __cfg80211_leave(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev)
+void cfg80211_leave(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev)
{
struct net_device *dev = wdev->netdev;
struct cfg80211_sched_scan_request *pos, *tmp;
lockdep_assert_held(&rdev->wiphy.mtx);
- ASSERT_WDEV_LOCK(wdev);
cfg80211_pmsr_wdev_down(wdev);
@@ -1288,7 +1290,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
switch (wdev->iftype) {
case NL80211_IFTYPE_ADHOC:
- __cfg80211_leave_ibss(rdev, dev, true);
+ cfg80211_leave_ibss(rdev, dev, true);
break;
case NL80211_IFTYPE_P2P_CLIENT:
case NL80211_IFTYPE_STATION:
@@ -1308,14 +1310,14 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
WLAN_REASON_DEAUTH_LEAVING, true);
break;
case NL80211_IFTYPE_MESH_POINT:
- __cfg80211_leave_mesh(rdev, dev);
+ cfg80211_leave_mesh(rdev, dev);
break;
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
- __cfg80211_stop_ap(rdev, dev, -1, true);
+ cfg80211_stop_ap(rdev, dev, -1, true);
break;
case NL80211_IFTYPE_OCB:
- __cfg80211_leave_ocb(rdev, dev);
+ cfg80211_leave_ocb(rdev, dev);
break;
case NL80211_IFTYPE_P2P_DEVICE:
case NL80211_IFTYPE_NAN:
@@ -1333,14 +1335,6 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
}
}
-void cfg80211_leave(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev)
-{
- wdev_lock(wdev);
- __cfg80211_leave(rdev, wdev);
- wdev_unlock(wdev);
-}
-
void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
gfp_t gfp)
{
@@ -1365,7 +1359,6 @@ EXPORT_SYMBOL(cfg80211_stop_iface);
void cfg80211_init_wdev(struct wireless_dev *wdev)
{
- mutex_init(&wdev->mtx);
INIT_LIST_HEAD(&wdev->event_list);
spin_lock_init(&wdev->event_lock);
INIT_LIST_HEAD(&wdev->mgmt_registrations);
@@ -1379,6 +1372,8 @@ void cfg80211_init_wdev(struct wireless_dev *wdev)
wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
#endif
+ wiphy_work_init(&wdev->cqm_rssi_work, cfg80211_cqm_rssi_notify_work);
+
if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT)
wdev->ps = true;
else
@@ -1528,7 +1523,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
case NETDEV_UP:
wiphy_lock(&rdev->wiphy);
cfg80211_update_iface_num(rdev, wdev->iftype, 1);
- wdev_lock(wdev);
switch (wdev->iftype) {
#ifdef CONFIG_CFG80211_WEXT
case NL80211_IFTYPE_ADHOC:
@@ -1558,7 +1552,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
default:
break;
}
- wdev_unlock(wdev);
rdev->opencount++;
/*
@@ -1601,7 +1594,7 @@ static void __net_exit cfg80211_pernet_exit(struct net *net)
struct cfg80211_registered_device *rdev;
rtnl_lock();
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ for_each_rdev(rdev) {
if (net_eq(wiphy_net(&rdev->wiphy), net))
WARN_ON(cfg80211_switch_netns(rdev, &init_net));
}
@@ -1622,7 +1615,7 @@ void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work)
list_add_tail(&work->entry, &rdev->wiphy_work_list);
spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
- schedule_work(&rdev->wiphy_work);
+ queue_work(system_unbound_wq, &rdev->wiphy_work);
}
EXPORT_SYMBOL_GPL(wiphy_work_queue);
@@ -1640,6 +1633,21 @@ void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work)
}
EXPORT_SYMBOL_GPL(wiphy_work_cancel);
+void wiphy_work_flush(struct wiphy *wiphy, struct wiphy_work *work)
+{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ unsigned long flags;
+ bool run;
+
+ spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
+ run = !work || !list_empty(&work->entry);
+ spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
+
+ if (run)
+ cfg80211_process_wiphy_works(rdev, work);
+}
+EXPORT_SYMBOL_GPL(wiphy_work_flush);
+
void wiphy_delayed_work_timer(struct timer_list *t)
{
struct wiphy_delayed_work *dwork = from_timer(dwork, t, timer);
@@ -1653,6 +1661,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy,
unsigned long delay)
{
if (!delay) {
+ del_timer(&dwork->timer);
wiphy_work_queue(wiphy, &dwork->work);
return;
}
@@ -1672,6 +1681,16 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy,
}
EXPORT_SYMBOL_GPL(wiphy_delayed_work_cancel);
+void wiphy_delayed_work_flush(struct wiphy *wiphy,
+ struct wiphy_delayed_work *dwork)
+{
+ lockdep_assert_held(&wiphy->mtx);
+
+ del_timer_sync(&dwork->timer);
+ wiphy_work_flush(wiphy, &dwork->work);
+}
+EXPORT_SYMBOL_GPL(wiphy_delayed_work_flush);
+
static int __init cfg80211_init(void)
{
int err;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 507d184b8b40..13657a85cf61 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -3,7 +3,7 @@
* Wireless configuration interface internals.
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2023 Intel Corporation
*/
#ifndef __NET_WIRELESS_CORE_H
#define __NET_WIRELESS_CORE_H
@@ -160,6 +160,16 @@ extern struct workqueue_struct *cfg80211_wq;
extern struct list_head cfg80211_rdev_list;
extern int cfg80211_rdev_list_generation;
+/* This is constructed like this so it can be used in if/else */
+static inline int for_each_rdev_check_rtnl(void)
+{
+ ASSERT_RTNL();
+ return 0;
+}
+#define for_each_rdev(rdev) \
+ if (for_each_rdev_check_rtnl()) {} else \
+ list_for_each_entry(rdev, &cfg80211_rdev_list, list)
+
struct cfg80211_internal_bss {
struct list_head list;
struct list_head hidden_list;
@@ -225,22 +235,6 @@ void cfg80211_init_wdev(struct wireless_dev *wdev);
void cfg80211_register_wdev(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
-static inline void wdev_lock(struct wireless_dev *wdev)
- __acquires(wdev)
-{
- mutex_lock(&wdev->mtx);
- __acquire(wdev->mtx);
-}
-
-static inline void wdev_unlock(struct wireless_dev *wdev)
- __releases(wdev)
-{
- __release(wdev->mtx);
- mutex_unlock(&wdev->mtx);
-}
-
-#define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx)
-
static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev)
{
lockdep_assert_held(&rdev->wiphy.mtx);
@@ -276,7 +270,7 @@ struct cfg80211_event {
struct ieee80211_channel *channel;
} ij;
struct {
- u8 bssid[ETH_ALEN];
+ u8 peer_addr[ETH_ALEN];
const u8 *td_bitmap;
u8 td_bitmap_len;
} pa;
@@ -295,12 +289,18 @@ struct cfg80211_beacon_registration {
};
struct cfg80211_cqm_config {
+ struct rcu_head rcu_head;
u32 rssi_hyst;
s32 last_rssi_event_value;
+ enum nl80211_cqm_rssi_threshold_event last_rssi_event_type;
+ bool use_range_api;
int n_rssi_thresholds;
s32 rssi_thresholds[] __counted_by(n_rssi_thresholds);
};
+void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy,
+ struct wiphy_work *work);
+
void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev);
/* free object */
@@ -324,8 +324,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
struct cfg80211_ibss_params *params,
struct cfg80211_cached_keys *connkeys);
void cfg80211_clear_ibss(struct net_device *dev, bool nowext);
-int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
- struct net_device *dev, bool nowext);
int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
struct net_device *dev, bool nowext);
void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
@@ -340,8 +338,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct mesh_setup *setup,
const struct mesh_config *conf);
-int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
- struct net_device *dev);
int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
struct net_device *dev);
int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
@@ -349,21 +345,13 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
struct cfg80211_chan_def *chandef);
/* OCB */
-int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev,
- struct net_device *dev,
- struct ocb_setup *setup);
int cfg80211_join_ocb(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct ocb_setup *setup);
-int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev,
- struct net_device *dev);
int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev,
struct net_device *dev);
/* AP */
-int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
- struct net_device *dev, int link,
- bool notify);
int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
struct net_device *dev, int link,
bool notify);
@@ -417,7 +405,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
bool wextev);
void __cfg80211_roamed(struct wireless_dev *wdev,
struct cfg80211_roam_info *info);
-void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid,
+void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *peer_addr,
const u8 *td_bitmap, u8 td_bitmap_len);
int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
@@ -459,7 +447,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
struct net_device *dev, enum nl80211_iftype ntype,
struct vif_params *params);
void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
-void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev);
+void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev,
+ struct wiphy_work *end);
void cfg80211_process_wdev_events(struct wireless_dev *wdev);
bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range,
@@ -469,18 +458,8 @@ int cfg80211_scan(struct cfg80211_registered_device *rdev);
extern struct work_struct cfg80211_disconnect_work;
-/**
- * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable
- * @wiphy: the wiphy to validate against
- * @chandef: the channel definition to check
- *
- * Checks if chandef is usable and we can/need start CAC on such channel.
- *
- * Return: true if all channels available and at least
- * one channel requires CAC (NL80211_DFS_USABLE)
- */
-bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy,
- const struct cfg80211_chan_def *chandef);
+#define NL80211_BSS_USE_FOR_ALL (NL80211_BSS_USE_FOR_NORMAL | \
+ NL80211_BSS_USE_FOR_MLD_LINK)
void cfg80211_set_dfs_state(struct wiphy *wiphy,
const struct cfg80211_chan_def *chandef,
@@ -488,10 +467,6 @@ void cfg80211_set_dfs_state(struct wiphy *wiphy,
void cfg80211_dfs_channels_update_work(struct work_struct *work);
-unsigned int
-cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy,
- const struct cfg80211_chan_def *chandef);
-
void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev);
int
@@ -540,8 +515,6 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
enum nl80211_iftype iftype, int num);
-void __cfg80211_leave(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev);
void cfg80211_leave(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
@@ -566,8 +539,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
#define CFG80211_DEV_WARN_ON(cond) ({bool __r = (cond); __r; })
#endif
-void cfg80211_cqm_config_free(struct wireless_dev *wdev);
-
void cfg80211_release_pmsr(struct wireless_dev *wdev, u32 portid);
void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev);
void cfg80211_pmsr_free_wk(struct work_struct *work);
@@ -578,4 +549,15 @@ int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
void cfg80211_wdev_release_link_bsses(struct wireless_dev *wdev, u16 link_mask);
+#if IS_ENABLED(CONFIG_CFG80211_KUNIT_TEST)
+#define EXPORT_SYMBOL_IF_CFG80211_KUNIT(sym) EXPORT_SYMBOL_IF_KUNIT(sym)
+#define VISIBLE_IF_CFG80211_KUNIT
+size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
+ const u8 *subie, size_t subie_len,
+ u8 *new_ie, size_t new_ie_len);
+#else
+#define EXPORT_SYMBOL_IF_CFG80211_KUNIT(sym)
+#define VISIBLE_IF_CFG80211_KUNIT static
+#endif /* IS_ENABLED(CONFIG_CFG80211_KUNIT_TEST) */
+
#endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
index 0878b162890a..40e49074e2ee 100644
--- a/net/wireless/debugfs.c
+++ b/net/wireless/debugfs.c
@@ -4,6 +4,7 @@
*
* Copyright 2009 Luis R. Rodriguez <lrodriguez@atheros.com>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2023 Intel Corporation
*/
#include <linux/slab.h>
@@ -109,3 +110,162 @@ void cfg80211_debugfs_rdev_add(struct cfg80211_registered_device *rdev)
DEBUGFS_ADD(long_retry_limit);
DEBUGFS_ADD(ht40allow_map);
}
+
+struct debugfs_read_work {
+ struct wiphy_work work;
+ ssize_t (*handler)(struct wiphy *wiphy,
+ struct file *file,
+ char *buf,
+ size_t count,
+ void *data);
+ struct wiphy *wiphy;
+ struct file *file;
+ char *buf;
+ size_t bufsize;
+ void *data;
+ ssize_t ret;
+ struct completion completion;
+};
+
+static void wiphy_locked_debugfs_read_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
+{
+ struct debugfs_read_work *w = container_of(work, typeof(*w), work);
+
+ w->ret = w->handler(w->wiphy, w->file, w->buf, w->bufsize, w->data);
+ complete(&w->completion);
+}
+
+static void wiphy_locked_debugfs_read_cancel(struct dentry *dentry,
+ void *data)
+{
+ struct debugfs_read_work *w = data;
+
+ wiphy_work_cancel(w->wiphy, &w->work);
+ complete(&w->completion);
+}
+
+ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file,
+ char *buf, size_t bufsize,
+ char __user *userbuf, size_t count,
+ loff_t *ppos,
+ ssize_t (*handler)(struct wiphy *wiphy,
+ struct file *file,
+ char *buf,
+ size_t bufsize,
+ void *data),
+ void *data)
+{
+ struct debugfs_read_work work = {
+ .handler = handler,
+ .wiphy = wiphy,
+ .file = file,
+ .buf = buf,
+ .bufsize = bufsize,
+ .data = data,
+ .ret = -ENODEV,
+ .completion = COMPLETION_INITIALIZER_ONSTACK(work.completion),
+ };
+ struct debugfs_cancellation cancellation = {
+ .cancel = wiphy_locked_debugfs_read_cancel,
+ .cancel_data = &work,
+ };
+
+ /* don't leak stack data or whatever */
+ memset(buf, 0, bufsize);
+
+ wiphy_work_init(&work.work, wiphy_locked_debugfs_read_work);
+ wiphy_work_queue(wiphy, &work.work);
+
+ debugfs_enter_cancellation(file, &cancellation);
+ wait_for_completion(&work.completion);
+ debugfs_leave_cancellation(file, &cancellation);
+
+ if (work.ret < 0)
+ return work.ret;
+
+ if (WARN_ON(work.ret > bufsize))
+ return -EINVAL;
+
+ return simple_read_from_buffer(userbuf, count, ppos, buf, work.ret);
+}
+EXPORT_SYMBOL_GPL(wiphy_locked_debugfs_read);
+
+struct debugfs_write_work {
+ struct wiphy_work work;
+ ssize_t (*handler)(struct wiphy *wiphy,
+ struct file *file,
+ char *buf,
+ size_t count,
+ void *data);
+ struct wiphy *wiphy;
+ struct file *file;
+ char *buf;
+ size_t count;
+ void *data;
+ ssize_t ret;
+ struct completion completion;
+};
+
+static void wiphy_locked_debugfs_write_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
+{
+ struct debugfs_write_work *w = container_of(work, typeof(*w), work);
+
+ w->ret = w->handler(w->wiphy, w->file, w->buf, w->count, w->data);
+ complete(&w->completion);
+}
+
+static void wiphy_locked_debugfs_write_cancel(struct dentry *dentry,
+ void *data)
+{
+ struct debugfs_write_work *w = data;
+
+ wiphy_work_cancel(w->wiphy, &w->work);
+ complete(&w->completion);
+}
+
+ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy,
+ struct file *file, char *buf, size_t bufsize,
+ const char __user *userbuf, size_t count,
+ ssize_t (*handler)(struct wiphy *wiphy,
+ struct file *file,
+ char *buf,
+ size_t count,
+ void *data),
+ void *data)
+{
+ struct debugfs_write_work work = {
+ .handler = handler,
+ .wiphy = wiphy,
+ .file = file,
+ .buf = buf,
+ .count = count,
+ .data = data,
+ .ret = -ENODEV,
+ .completion = COMPLETION_INITIALIZER_ONSTACK(work.completion),
+ };
+ struct debugfs_cancellation cancellation = {
+ .cancel = wiphy_locked_debugfs_write_cancel,
+ .cancel_data = &work,
+ };
+
+ /* mostly used for strings so enforce NUL-termination for safety */
+ if (count >= bufsize)
+ return -EINVAL;
+
+ memset(buf, 0, bufsize);
+
+ if (copy_from_user(buf, userbuf, count))
+ return -EFAULT;
+
+ wiphy_work_init(&work.work, wiphy_locked_debugfs_write_work);
+ wiphy_work_queue(wiphy, &work.work);
+
+ debugfs_enter_cancellation(file, &cancellation);
+ wait_for_completion(&work.completion);
+ debugfs_leave_cancellation(file, &cancellation);
+
+ return work.ret;
+}
+EXPORT_SYMBOL_GPL(wiphy_locked_debugfs_write);
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index e6fdb0b8187d..9f02ee5f08be 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -3,7 +3,7 @@
* Some IBSS support code for cfg80211.
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2020-2022 Intel Corporation
+ * Copyright (C) 2020-2023 Intel Corporation
*/
#include <linux/etherdevice.h>
@@ -93,7 +93,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
int err;
lockdep_assert_held(&rdev->wiphy.mtx);
- ASSERT_WDEV_LOCK(wdev);
if (wdev->u.ibss.ssid_len)
return -EALREADY;
@@ -151,13 +150,13 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
return 0;
}
-static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
+void cfg80211_clear_ibss(struct net_device *dev, bool nowext)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
int i;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
kfree_sensitive(wdev->connect_keys);
wdev->connect_keys = NULL;
@@ -187,22 +186,13 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
cfg80211_sched_dfs_chan_update(rdev);
}
-void cfg80211_clear_ibss(struct net_device *dev, bool nowext)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
-
- wdev_lock(wdev);
- __cfg80211_clear_ibss(dev, nowext);
- wdev_unlock(wdev);
-}
-
-int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
- struct net_device *dev, bool nowext)
+int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, bool nowext)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (!wdev->u.ibss.ssid_len)
return -ENOLINK;
@@ -213,24 +203,11 @@ int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
return err;
wdev->conn_owner_nlportid = 0;
- __cfg80211_clear_ibss(dev, nowext);
+ cfg80211_clear_ibss(dev, nowext);
return 0;
}
-int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
- struct net_device *dev, bool nowext)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- int err;
-
- wdev_lock(wdev);
- err = __cfg80211_leave_ibss(rdev, dev, nowext);
- wdev_unlock(wdev);
-
- return err;
-}
-
#ifdef CONFIG_CFG80211_WEXT
int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev)
@@ -239,7 +216,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
enum nl80211_band band;
int i, err;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (!wdev->wext.ibss.beacon_interval)
wdev->wext.ibss.beacon_interval = 100;
@@ -336,11 +313,9 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
if (wdev->wext.ibss.chandef.chan == chan)
return 0;
- wdev_lock(wdev);
err = 0;
if (wdev->u.ibss.ssid_len)
- err = __cfg80211_leave_ibss(rdev, dev, true);
- wdev_unlock(wdev);
+ err = cfg80211_leave_ibss(rdev, dev, true);
if (err)
return err;
@@ -354,11 +329,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
wdev->wext.ibss.channel_fixed = false;
}
- wdev_lock(wdev);
- err = cfg80211_ibss_wext_join(rdev, wdev);
- wdev_unlock(wdev);
-
- return err;
+ return cfg80211_ibss_wext_join(rdev, wdev);
}
int cfg80211_ibss_wext_giwfreq(struct net_device *dev,
@@ -372,12 +343,10 @@ int cfg80211_ibss_wext_giwfreq(struct net_device *dev,
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC))
return -EINVAL;
- wdev_lock(wdev);
if (wdev->u.ibss.current_bss)
chan = wdev->u.ibss.current_bss->pub.channel;
else if (wdev->wext.ibss.chandef.chan)
chan = wdev->wext.ibss.chandef.chan;
- wdev_unlock(wdev);
if (chan) {
freq->m = chan->center_freq;
@@ -405,11 +374,9 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev,
if (!rdev->ops->join_ibss)
return -EOPNOTSUPP;
- wdev_lock(wdev);
err = 0;
if (wdev->u.ibss.ssid_len)
- err = __cfg80211_leave_ibss(rdev, dev, true);
- wdev_unlock(wdev);
+ err = cfg80211_leave_ibss(rdev, dev, true);
if (err)
return err;
@@ -422,11 +389,7 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev,
wdev->wext.ibss.ssid = wdev->u.ibss.ssid;
wdev->wext.ibss.ssid_len = len;
- wdev_lock(wdev);
- err = cfg80211_ibss_wext_join(rdev, wdev);
- wdev_unlock(wdev);
-
- return err;
+ return cfg80211_ibss_wext_join(rdev, wdev);
}
int cfg80211_ibss_wext_giwessid(struct net_device *dev,
@@ -441,7 +404,6 @@ int cfg80211_ibss_wext_giwessid(struct net_device *dev,
data->flags = 0;
- wdev_lock(wdev);
if (wdev->u.ibss.ssid_len) {
data->flags = 1;
data->length = wdev->u.ibss.ssid_len;
@@ -451,7 +413,6 @@ int cfg80211_ibss_wext_giwessid(struct net_device *dev,
data->length = wdev->wext.ibss.ssid_len;
memcpy(ssid, wdev->wext.ibss.ssid, data->length);
}
- wdev_unlock(wdev);
return 0;
}
@@ -491,11 +452,9 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev,
ether_addr_equal(bssid, wdev->wext.ibss.bssid))
return 0;
- wdev_lock(wdev);
err = 0;
if (wdev->u.ibss.ssid_len)
- err = __cfg80211_leave_ibss(rdev, dev, true);
- wdev_unlock(wdev);
+ err = cfg80211_leave_ibss(rdev, dev, true);
if (err)
return err;
@@ -506,11 +465,7 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev,
} else
wdev->wext.ibss.bssid = NULL;
- wdev_lock(wdev);
- err = cfg80211_ibss_wext_join(rdev, wdev);
- wdev_unlock(wdev);
-
- return err;
+ return cfg80211_ibss_wext_join(rdev, wdev);
}
int cfg80211_ibss_wext_giwap(struct net_device *dev,
@@ -525,7 +480,6 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev,
ap_addr->sa_family = ARPHRD_ETHER;
- wdev_lock(wdev);
if (wdev->u.ibss.current_bss)
memcpy(ap_addr->sa_data, wdev->u.ibss.current_bss->pub.bssid,
ETH_ALEN);
@@ -534,8 +488,6 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev,
else
eth_zero_addr(ap_addr->sa_data);
- wdev_unlock(wdev);
-
return 0;
}
#endif
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index 1b4d6c87a5c5..5c8cdf7681e3 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -662,12 +662,12 @@ static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv)
memcpy(key, tkey->key, TKIP_KEY_LEN);
if (seq) {
- /* Return the sequence number of the last transmitted frame. */
- u16 iv16 = tkey->tx_iv16;
- u32 iv32 = tkey->tx_iv32;
- if (iv16 == 0)
- iv32--;
- iv16--;
+ /*
+ * Not clear if this should return the value as is
+ * or - as the code previously seemed to partially
+ * have been written as - subtract one from it. It
+ * was working this way for a long time so leave it.
+ */
seq[0] = tkey->tx_iv16;
seq[1] = tkey->tx_iv16 >> 8;
seq[2] = tkey->tx_iv32;
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 59a3c5c092b1..83306979fbe2 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Portions
- * Copyright (C) 2022 Intel Corporation
+ * Copyright (C) 2022-2023 Intel Corporation
*/
#include <linux/ieee80211.h>
#include <linux/export.h>
@@ -109,7 +109,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN);
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
return -EOPNOTSUPP;
@@ -172,7 +172,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
* basic rates
*/
if (!setup->basic_rates) {
- enum nl80211_bss_scan_width scan_width;
struct ieee80211_supported_band *sband =
rdev->wiphy.bands[setup->chandef.chan->band];
@@ -193,9 +192,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
}
}
} else {
- scan_width = cfg80211_chandef_to_scan_width(&setup->chandef);
- setup->basic_rates = ieee80211_mandatory_rates(sband,
- scan_width);
+ setup->basic_rates = ieee80211_mandatory_rates(sband);
}
}
@@ -257,13 +254,13 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
return 0;
}
-int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
+int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+ struct net_device *dev)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
return -EOPNOTSUPP;
@@ -287,16 +284,3 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
return err;
}
-
-int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- int err;
-
- wdev_lock(wdev);
- err = __cfg80211_leave_mesh(rdev, dev);
- wdev_unlock(wdev);
-
- return err;
-}
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 775cac4d6100..f635a8b6ca2e 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -4,7 +4,7 @@
*
* Copyright (c) 2009, Jouni Malinen <j@w1.fi>
* Copyright (c) 2015 Intel Deutschland GmbH
- * Copyright (C) 2019-2020, 2022 Intel Corporation
+ * Copyright (C) 2019-2020, 2022-2023 Intel Corporation
*/
#include <linux/kernel.h>
@@ -22,7 +22,7 @@
void cfg80211_rx_assoc_resp(struct net_device *dev,
- struct cfg80211_rx_assoc_resp *data)
+ const struct cfg80211_rx_assoc_resp_data *data)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
@@ -43,16 +43,18 @@ void cfg80211_rx_assoc_resp(struct net_device *dev,
for (link_id = 0; link_id < ARRAY_SIZE(data->links); link_id++) {
cr.links[link_id].status = data->links[link_id].status;
+ cr.links[link_id].bss = data->links[link_id].bss;
+
WARN_ON_ONCE(cr.links[link_id].status != WLAN_STATUS_SUCCESS &&
(!cr.ap_mld_addr || !cr.links[link_id].bss));
- cr.links[link_id].bss = data->links[link_id].bss;
if (!cr.links[link_id].bss)
continue;
cr.links[link_id].bssid = data->links[link_id].bss->bssid;
cr.links[link_id].addr = data->links[link_id].addr;
/* need to have local link addresses for MLO connections */
- WARN_ON(cr.ap_mld_addr && !cr.links[link_id].addr);
+ WARN_ON(cr.ap_mld_addr &&
+ !is_valid_ether_addr(cr.links[link_id].addr));
BUG_ON(!cr.links[link_id].bss->channel);
@@ -149,7 +151,7 @@ void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len)
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct ieee80211_mgmt *mgmt = (void *)buf;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
trace_cfg80211_rx_mlme_mgmt(dev, buf, len);
@@ -214,7 +216,7 @@ void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len,
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct ieee80211_mgmt *mgmt = (void *)buf;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
trace_cfg80211_tx_mlme_mgmt(dev, buf, len, reconnect);
@@ -262,7 +264,7 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (!req->bss)
return -ENOENT;
@@ -331,7 +333,7 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err, i, j;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
for (i = 1; i < ARRAY_SIZE(req->links); i++) {
if (!req->links[i].bss)
@@ -393,7 +395,7 @@ int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
.local_state_change = local_state_change,
};
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (local_state_change &&
(!wdev->connected ||
@@ -423,7 +425,7 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
};
int err;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (!wdev->connected)
return -ENOTCONN;
@@ -446,7 +448,7 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev = dev->ieee80211_ptr;
u8 bssid[ETH_ALEN];
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (!rdev->ops->deauth)
return;
@@ -726,6 +728,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
const struct ieee80211_mgmt *mgmt;
u16 stype;
+ lockdep_assert_wiphy(&rdev->wiphy);
+
if (!wdev->wiphy->mgmt_stypes)
return -EOPNOTSUPP;
@@ -748,8 +752,6 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) {
int err = 0;
- wdev_lock(wdev);
-
switch (wdev->iftype) {
case NL80211_IFTYPE_ADHOC:
/*
@@ -814,7 +816,6 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
err = -EOPNOTSUPP;
break;
}
- wdev_unlock(wdev);
if (err)
return err;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index de47838aca4f..b09700400d09 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -106,7 +106,7 @@ __cfg80211_wdev_from_attrs(struct cfg80211_registered_device *rdev,
ASSERT_RTNL();
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ for_each_rdev(rdev) {
struct wireless_dev *wdev;
if (wiphy_net(&rdev->wiphy) != netns)
@@ -463,7 +463,7 @@ nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] = {
[NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 },
};
-static struct netlink_range_validation nl80211_punct_bitmap_range = {
+static const struct netlink_range_validation nl80211_punct_bitmap_range = {
.min = 0,
.max = 0xffff,
};
@@ -818,6 +818,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_HW_TIMESTAMP_ENABLED] = { .type = NLA_FLAG },
[NL80211_ATTR_EMA_RNR_ELEMS] = { .type = NLA_NESTED },
[NL80211_ATTR_MLO_LINK_DISABLED] = { .type = NLA_FLAG },
+ [NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA] = { .type = NLA_FLAG },
+ [NL80211_ATTR_MLO_TTLM_DLINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8),
+ [NL80211_ATTR_MLO_TTLM_ULINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8),
};
/* policy for the key attributes */
@@ -1115,6 +1118,10 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_OFFSET, chan->freq_offset))
goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_PSD) &&
+ nla_put_s8(msg, NL80211_FREQUENCY_ATTR_PSD, chan->psd))
+ goto nla_put_failure;
+
if ((chan->flags & IEEE80211_CHAN_DISABLED) &&
nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DISABLED))
goto nla_put_failure;
@@ -1194,6 +1201,15 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
if ((chan->flags & IEEE80211_CHAN_NO_EHT) &&
nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_EHT))
goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_DFS_CONCURRENT) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DFS_CONCURRENT))
+ goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_NO_UHB_VLP_CLIENT) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT))
+ goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_NO_UHB_AFC_CLIENT) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT))
+ goto nla_put_failure;
}
if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
@@ -1544,7 +1560,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
static int nl80211_key_allowed(struct wireless_dev *wdev)
{
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
switch (wdev->iftype) {
case NL80211_IFTYPE_AP:
@@ -1913,20 +1929,20 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg,
struct nlattr *nl_iftype_data =
nla_nest_start_noflag(msg,
NL80211_BAND_ATTR_IFTYPE_DATA);
+ const struct ieee80211_sband_iftype_data *iftd;
int err;
if (!nl_iftype_data)
return -ENOBUFS;
- for (i = 0; i < sband->n_iftype_data; i++) {
+ for_each_sband_iftype_data(sband, i, iftd) {
struct nlattr *iftdata;
iftdata = nla_nest_start_noflag(msg, i + 1);
if (!iftdata)
return -ENOBUFS;
- err = nl80211_send_iftype_data(msg, sband,
- &sband->iftype_data[i]);
+ err = nl80211_send_iftype_data(msg, sband, iftd);
if (err)
return err;
@@ -3075,7 +3091,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[0] = (long)state;
}
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ for_each_rdev(rdev) {
if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk)))
continue;
if (++idx <= state->start)
@@ -3423,13 +3439,8 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
int link_id = nl80211_link_id_or_invalid(info->attrs);
struct net_device *netdev = info->user_ptr[1];
- int ret;
-
- wdev_lock(netdev->ieee80211_ptr);
- ret = __nl80211_set_channel(rdev, netdev, info, link_id);
- wdev_unlock(netdev->ieee80211_ptr);
- return ret;
+ return __nl80211_set_channel(rdev, netdev, info, link_id);
}
static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
@@ -3536,7 +3547,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
txq_params.link_id =
nl80211_link_id_or_invalid(info->attrs);
- wdev_lock(netdev->ieee80211_ptr);
if (txq_params.link_id >= 0 &&
!(netdev->ieee80211_ptr->valid_links &
BIT(txq_params.link_id)))
@@ -3547,7 +3557,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
else
result = rdev_set_txq_params(rdev, netdev,
&txq_params);
- wdev_unlock(netdev->ieee80211_ptr);
if (result)
goto out;
}
@@ -3557,12 +3566,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
int link_id = nl80211_link_id_or_invalid(info->attrs);
if (wdev) {
- wdev_lock(wdev);
result = __nl80211_set_channel(
rdev,
nl80211_can_set_dev_channel(wdev) ? netdev : NULL,
info, link_id);
- wdev_unlock(wdev);
} else {
result = __nl80211_set_channel(rdev, netdev, info, link_id);
}
@@ -3827,6 +3834,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
struct net_device *dev = wdev->netdev;
void *hdr;
+ lockdep_assert_wiphy(&rdev->wiphy);
+
WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE &&
cmd != NL80211_CMD_DEL_INTERFACE &&
cmd != NL80211_CMD_SET_INTERFACE);
@@ -3870,33 +3879,31 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
goto nla_put_failure;
}
- wdev_lock(wdev);
switch (wdev->iftype) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
if (wdev->u.ap.ssid_len &&
nla_put(msg, NL80211_ATTR_SSID, wdev->u.ap.ssid_len,
wdev->u.ap.ssid))
- goto nla_put_failure_locked;
+ goto nla_put_failure;
break;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
if (wdev->u.client.ssid_len &&
nla_put(msg, NL80211_ATTR_SSID, wdev->u.client.ssid_len,
wdev->u.client.ssid))
- goto nla_put_failure_locked;
+ goto nla_put_failure;
break;
case NL80211_IFTYPE_ADHOC:
if (wdev->u.ibss.ssid_len &&
nla_put(msg, NL80211_ATTR_SSID, wdev->u.ibss.ssid_len,
wdev->u.ibss.ssid))
- goto nla_put_failure_locked;
+ goto nla_put_failure;
break;
default:
/* nothing */
break;
}
- wdev_unlock(wdev);
if (rdev->ops->get_txq_stats) {
struct cfg80211_txq_stats txqstats = {};
@@ -3943,8 +3950,6 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
genlmsg_end(msg, hdr);
return 0;
- nla_put_failure_locked:
- wdev_unlock(wdev);
nla_put_failure:
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
@@ -3985,7 +3990,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
filter_wiphy = cb->args[2] - 1;
}
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ for_each_rdev(rdev) {
if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk)))
continue;
if (wp_idx < wp_start) {
@@ -3998,6 +4003,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
if_idx = 0;
+ wiphy_lock(&rdev->wiphy);
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
if (if_idx < if_start) {
if_idx++;
@@ -4007,11 +4013,14 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
cb->nlh->nlmsg_seq, NLM_F_MULTI,
rdev, wdev,
NL80211_CMD_NEW_INTERFACE) < 0) {
+ wiphy_unlock(&rdev->wiphy);
goto out;
}
if_idx++;
}
+ wiphy_unlock(&rdev->wiphy);
+ if_start = 0;
wp_idx++;
}
out:
@@ -4191,7 +4200,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
if (netif_running(dev))
return -EBUSY;
- wdev_lock(wdev);
BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN !=
IEEE80211_MAX_MESH_ID_LEN);
wdev->u.mesh.id_up_len =
@@ -4199,7 +4207,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
memcpy(wdev->u.mesh.id,
nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
wdev->u.mesh.id_up_len);
- wdev_unlock(wdev);
}
if (info->attrs[NL80211_ATTR_4ADDR]) {
@@ -4300,7 +4307,6 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
case NL80211_IFTYPE_MESH_POINT:
if (!info->attrs[NL80211_ATTR_MESH_ID])
break;
- wdev_lock(wdev);
BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN !=
IEEE80211_MAX_MESH_ID_LEN);
wdev->u.mesh.id_up_len =
@@ -4308,7 +4314,6 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
memcpy(wdev->u.mesh.id,
nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
wdev->u.mesh.id_up_len);
- wdev_unlock(wdev);
break;
case NL80211_IFTYPE_NAN:
case NL80211_IFTYPE_P2P_DEVICE:
@@ -4599,79 +4604,67 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
!(key.p.mode == NL80211_KEY_SET_TX))
return -EINVAL;
- wdev_lock(wdev);
-
if (key.def) {
- if (!rdev->ops->set_default_key) {
- err = -EOPNOTSUPP;
- goto out;
- }
+ if (!rdev->ops->set_default_key)
+ return -EOPNOTSUPP;
err = nl80211_key_allowed(wdev);
if (err)
- goto out;
+ return err;
err = nl80211_validate_key_link_id(info, wdev, link_id, false);
if (err)
- goto out;
+ return err;
err = rdev_set_default_key(rdev, dev, link_id, key.idx,
key.def_uni, key.def_multi);
if (err)
- goto out;
+ return err;
#ifdef CONFIG_CFG80211_WEXT
wdev->wext.default_key = key.idx;
#endif
+ return 0;
} else if (key.defmgmt) {
- if (key.def_uni || !key.def_multi) {
- err = -EINVAL;
- goto out;
- }
+ if (key.def_uni || !key.def_multi)
+ return -EINVAL;
- if (!rdev->ops->set_default_mgmt_key) {
- err = -EOPNOTSUPP;
- goto out;
- }
+ if (!rdev->ops->set_default_mgmt_key)
+ return -EOPNOTSUPP;
err = nl80211_key_allowed(wdev);
if (err)
- goto out;
+ return err;
err = nl80211_validate_key_link_id(info, wdev, link_id, false);
if (err)
- goto out;
+ return err;
err = rdev_set_default_mgmt_key(rdev, dev, link_id, key.idx);
if (err)
- goto out;
+ return err;
#ifdef CONFIG_CFG80211_WEXT
wdev->wext.default_mgmt_key = key.idx;
#endif
+ return 0;
} else if (key.defbeacon) {
- if (key.def_uni || !key.def_multi) {
- err = -EINVAL;
- goto out;
- }
+ if (key.def_uni || !key.def_multi)
+ return -EINVAL;
- if (!rdev->ops->set_default_beacon_key) {
- err = -EOPNOTSUPP;
- goto out;
- }
+ if (!rdev->ops->set_default_beacon_key)
+ return -EOPNOTSUPP;
err = nl80211_key_allowed(wdev);
if (err)
- goto out;
+ return err;
err = nl80211_validate_key_link_id(info, wdev, link_id, false);
if (err)
- goto out;
+ return err;
- err = rdev_set_default_beacon_key(rdev, dev, link_id, key.idx);
- if (err)
- goto out;
+ return rdev_set_default_beacon_key(rdev, dev, link_id, key.idx);
} else if (key.p.mode == NL80211_KEY_SET_TX &&
wiphy_ext_feature_isset(&rdev->wiphy,
NL80211_EXT_FEATURE_EXT_KEY_ID)) {
@@ -4680,25 +4673,19 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_MAC])
mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
- if (!mac_addr || key.idx < 0 || key.idx > 1) {
- err = -EINVAL;
- goto out;
- }
+ if (!mac_addr || key.idx < 0 || key.idx > 1)
+ return -EINVAL;
err = nl80211_validate_key_link_id(info, wdev, link_id, true);
if (err)
- goto out;
+ return err;
- err = rdev_add_key(rdev, dev, link_id, key.idx,
- NL80211_KEYTYPE_PAIRWISE,
- mac_addr, &key.p);
- } else {
- err = -EINVAL;
+ return rdev_add_key(rdev, dev, link_id, key.idx,
+ NL80211_KEYTYPE_PAIRWISE,
+ mac_addr, &key.p);
}
- out:
- wdev_unlock(wdev);
- return err;
+ return -EINVAL;
}
static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
@@ -4751,7 +4738,6 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
- wdev_lock(wdev);
err = nl80211_key_allowed(wdev);
if (err)
GENL_SET_ERR_MSG(info, "key not allowed");
@@ -4767,7 +4753,6 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
if (err)
GENL_SET_ERR_MSG(info, "key addition failed");
}
- wdev_unlock(wdev);
return err;
}
@@ -4808,7 +4793,6 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->del_key)
return -EOPNOTSUPP;
- wdev_lock(wdev);
err = nl80211_key_allowed(wdev);
if (key.type == NL80211_KEYTYPE_GROUP && mac_addr &&
@@ -4832,7 +4816,6 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
wdev->wext.default_mgmt_key = -1;
}
#endif
- wdev_unlock(wdev);
return err;
}
@@ -4885,7 +4868,7 @@ static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy,
return ERR_PTR(n_entries);
if (n_entries > wiphy->max_acl_mac_addrs)
- return ERR_PTR(-ENOTSUPP);
+ return ERR_PTR(-EOPNOTSUPP);
acl = kzalloc(struct_size(acl, mac_addrs, n_entries), GFP_KERNEL);
if (!acl)
@@ -5671,11 +5654,10 @@ static int nl80211_parse_he_obss_pd(struct nlattr *attrs,
static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev,
struct nlattr *attrs,
- struct cfg80211_ap_settings *params)
+ struct cfg80211_fils_discovery *fd)
{
struct nlattr *tb[NL80211_FILS_DISCOVERY_ATTR_MAX + 1];
int ret;
- struct cfg80211_fils_discovery *fd = &params->fils_discovery;
if (!wiphy_ext_feature_isset(&rdev->wiphy,
NL80211_EXT_FEATURE_FILS_DISCOVERY))
@@ -5686,6 +5668,13 @@ static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev,
if (ret)
return ret;
+ if (!tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN] &&
+ !tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX] &&
+ !tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]) {
+ fd->update = true;
+ return 0;
+ }
+
if (!tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN] ||
!tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX] ||
!tb[NL80211_FILS_DISCOVERY_ATTR_TMPL])
@@ -5695,19 +5684,17 @@ static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev,
fd->tmpl = nla_data(tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]);
fd->min_interval = nla_get_u32(tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN]);
fd->max_interval = nla_get_u32(tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX]);
-
+ fd->update = true;
return 0;
}
static int
nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev,
struct nlattr *attrs,
- struct cfg80211_ap_settings *params)
+ struct cfg80211_unsol_bcast_probe_resp *presp)
{
struct nlattr *tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX + 1];
int ret;
- struct cfg80211_unsol_bcast_probe_resp *presp =
- &params->unsol_bcast_probe_resp;
if (!wiphy_ext_feature_isset(&rdev->wiphy,
NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP))
@@ -5718,6 +5705,12 @@ nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev,
if (ret)
return ret;
+ if (!tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT] &&
+ !tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]) {
+ presp->update = true;
+ return 0;
+ }
+
if (!tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT] ||
!tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL])
return -EINVAL;
@@ -5725,6 +5718,7 @@ nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev,
presp->tmpl = nla_data(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]);
presp->tmpl_len = nla_len(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]);
presp->interval = nla_get_u32(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT]);
+ presp->update = true;
return 0;
}
@@ -5909,6 +5903,21 @@ out:
nlmsg_free(msg);
}
+static int nl80211_validate_ap_phy_operation(struct cfg80211_ap_settings *params)
+{
+ struct ieee80211_channel *channel = params->chandef.chan;
+
+ if ((params->he_cap || params->he_oper) &&
+ (channel->flags & IEEE80211_CHAN_NO_HE))
+ return -EOPNOTSUPP;
+
+ if ((params->eht_cap || params->eht_oper) &&
+ (channel->flags & IEEE80211_CHAN_NO_EHT))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6072,20 +6081,18 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- wdev_lock(wdev);
-
if (info->attrs[NL80211_ATTR_TX_RATES]) {
err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
NL80211_ATTR_TX_RATES,
&params->beacon_rate,
dev, false, link_id);
if (err)
- goto out_unlock;
+ goto out;
err = validate_beacon_tx_rate(rdev, params->chandef.chan->band,
&params->beacon_rate);
if (err)
- goto out_unlock;
+ goto out;
}
if (info->attrs[NL80211_ATTR_SMPS_MODE]) {
@@ -6098,19 +6105,19 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (!(rdev->wiphy.features &
NL80211_FEATURE_STATIC_SMPS)) {
err = -EINVAL;
- goto out_unlock;
+ goto out;
}
break;
case NL80211_SMPS_DYNAMIC:
if (!(rdev->wiphy.features &
NL80211_FEATURE_DYNAMIC_SMPS)) {
err = -EINVAL;
- goto out_unlock;
+ goto out;
}
break;
default:
err = -EINVAL;
- goto out_unlock;
+ goto out;
}
} else {
params->smps_mode = NL80211_SMPS_OFF;
@@ -6119,7 +6126,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
params->pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
if (params->pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) {
err = -EOPNOTSUPP;
- goto out_unlock;
+ goto out;
}
if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
@@ -6127,7 +6134,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(params->acl)) {
err = PTR_ERR(params->acl);
params->acl = NULL;
- goto out_unlock;
+ goto out;
}
}
@@ -6139,23 +6146,23 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
info->attrs[NL80211_ATTR_HE_OBSS_PD],
&params->he_obss_pd);
if (err)
- goto out_unlock;
+ goto out;
}
if (info->attrs[NL80211_ATTR_FILS_DISCOVERY]) {
err = nl80211_parse_fils_discovery(rdev,
info->attrs[NL80211_ATTR_FILS_DISCOVERY],
- params);
+ &params->fils_discovery);
if (err)
- goto out_unlock;
+ goto out;
}
if (info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]) {
err = nl80211_parse_unsol_bcast_probe_resp(
rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP],
- params);
+ &params->unsol_bcast_probe_resp);
if (err)
- goto out_unlock;
+ goto out;
}
if (info->attrs[NL80211_ATTR_MBSSID_CONFIG]) {
@@ -6166,17 +6173,21 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
params->beacon.mbssid_ies->cnt :
0);
if (err)
- goto out_unlock;
+ goto out;
}
if (!params->mbssid_config.ema && params->beacon.rnr_ies) {
err = -EINVAL;
- goto out_unlock;
+ goto out;
}
err = nl80211_calculate_ap_params(params);
if (err)
- goto out_unlock;
+ goto out;
+
+ err = nl80211_validate_ap_phy_operation(params);
+ if (err)
+ goto out;
if (info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS])
params->flags = nla_get_u32(
@@ -6188,7 +6199,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
info->attrs[NL80211_ATTR_SOCKET_OWNER] &&
wdev->conn_owner_nlportid != info->snd_portid) {
err = -EINVAL;
- goto out_unlock;
+ goto out;
}
/* FIXME: validate MLO/link-id against driver capabilities */
@@ -6206,8 +6217,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
nl80211_send_ap_started(wdev, link_id);
}
-out_unlock:
- wdev_unlock(wdev);
out:
kfree(params->acl);
kfree(params->beacon.mbssid_ies);
@@ -6227,7 +6236,8 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info)
unsigned int link_id = nl80211_link_id(info->attrs);
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_beacon_data params;
+ struct cfg80211_ap_update *params;
+ struct nlattr *attr;
int err;
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
@@ -6240,17 +6250,37 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info)
if (!wdev->links[link_id].ap.beacon_interval)
return -EINVAL;
- err = nl80211_parse_beacon(rdev, info->attrs, &params, info->extack);
+ params = kzalloc(sizeof(*params), GFP_KERNEL);
+ if (!params)
+ return -ENOMEM;
+
+ err = nl80211_parse_beacon(rdev, info->attrs, &params->beacon,
+ info->extack);
if (err)
goto out;
- wdev_lock(wdev);
- err = rdev_change_beacon(rdev, dev, &params);
- wdev_unlock(wdev);
+ attr = info->attrs[NL80211_ATTR_FILS_DISCOVERY];
+ if (attr) {
+ err = nl80211_parse_fils_discovery(rdev, attr,
+ &params->fils_discovery);
+ if (err)
+ goto out;
+ }
+
+ attr = info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP];
+ if (attr) {
+ err = nl80211_parse_unsol_bcast_probe_resp(rdev, attr,
+ &params->unsol_bcast_probe_resp);
+ if (err)
+ goto out;
+ }
+
+ err = rdev_change_beacon(rdev, dev, params);
out:
- kfree(params.mbssid_ies);
- kfree(params.rnr_ies);
+ kfree(params->beacon.mbssid_ies);
+ kfree(params->beacon.rnr_ies);
+ kfree(params);
return err;
}
@@ -7305,9 +7335,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
}
/* driver will call cfg80211_check_station_change() */
- wdev_lock(dev->ieee80211_ptr);
err = rdev_change_station(rdev, dev, mac_addr, &params);
- wdev_unlock(dev->ieee80211_ptr);
out_put_vlan:
dev_put(params.vlan);
@@ -7575,7 +7603,6 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
/* be aware of params.vlan when changing code here */
- wdev_lock(dev->ieee80211_ptr);
if (wdev->valid_links) {
if (params.link_sta_params.link_id < 0) {
err = -EINVAL;
@@ -7593,7 +7620,6 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
}
err = rdev_add_station(rdev, dev, mac_addr, &params);
out:
- wdev_unlock(dev->ieee80211_ptr);
dev_put(params.vlan);
return err;
}
@@ -7603,7 +7629,6 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct station_del_parameters params;
- int ret;
memset(&params, 0, sizeof(params));
@@ -7651,11 +7676,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
params.reason_code = WLAN_REASON_PREV_AUTH_NOT_VALID;
}
- wdev_lock(dev->ieee80211_ptr);
- ret = rdev_del_station(rdev, dev, &params);
- wdev_unlock(dev->ieee80211_ptr);
-
- return ret;
+ return rdev_del_station(rdev, dev, &params);
}
static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq,
@@ -7974,9 +7995,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
- struct wireless_dev *wdev = dev->ieee80211_ptr;
struct bss_parameters params;
- int err;
memset(&params, 0, sizeof(params));
params.link_id = nl80211_link_id_or_invalid(info->attrs);
@@ -8039,11 +8058,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
return -EOPNOTSUPP;
- wdev_lock(wdev);
- err = rdev_change_bss(rdev, dev, &params);
- wdev_unlock(wdev);
-
- return err;
+ return rdev_change_bss(rdev, dev, &params);
}
static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
@@ -8114,13 +8129,11 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
if (!rdev->ops->get_mesh_config)
return -EOPNOTSUPP;
- wdev_lock(wdev);
/* If not connected, get default parameters */
if (!wdev->u.mesh.id_len)
memcpy(&cur_params, &default_mesh_config, sizeof(cur_params));
else
err = rdev_get_mesh_config(rdev, dev, &cur_params);
- wdev_unlock(wdev);
if (err)
return err;
@@ -8482,7 +8495,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb,
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct mesh_config cfg;
+ struct mesh_config cfg = {};
u32 mask;
int err;
@@ -8496,15 +8509,12 @@ static int nl80211_update_mesh_config(struct sk_buff *skb,
if (err)
return err;
- wdev_lock(wdev);
if (!wdev->u.mesh.id_len)
err = -ENOLINK;
if (!err)
err = rdev_update_mesh_config(rdev, dev, mask, &cfg);
- wdev_unlock(wdev);
-
return err;
}
@@ -8559,6 +8569,11 @@ static int nl80211_put_regdom(const struct ieee80211_regdomain *regdom,
reg_rule->dfs_cac_ms))
goto nla_put_failure;
+ if ((reg_rule->flags & NL80211_RRF_PSD) &&
+ nla_put_s8(msg, NL80211_ATTR_POWER_RULE_PSD,
+ reg_rule->psd))
+ goto nla_put_failure;
+
nla_nest_end(msg, nl_reg_rule);
}
@@ -8995,7 +9010,7 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev,
unsigned int link_id;
bool all_ok = true;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (!cfg80211_beaconing_iface_active(wdev))
return true;
@@ -9245,7 +9260,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
request->n_channels = i;
- wdev_lock(wdev);
for (i = 0; i < request->n_channels; i++) {
struct ieee80211_channel *chan = request->channels[i];
@@ -9254,12 +9268,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
continue;
if (!cfg80211_wdev_on_sub_chan(wdev, chan, true)) {
- wdev_unlock(wdev);
err = -EBUSY;
goto out_free;
}
}
- wdev_unlock(wdev);
i = 0;
if (n_ssids) {
@@ -9343,6 +9355,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
else
eth_broadcast_addr(request->bssid);
+ request->tsf_report_link_id = nl80211_link_id_or_invalid(info->attrs);
request->wdev = wdev;
request->wiphy = &rdev->wiphy;
request->scan_start = jiffies;
@@ -10265,9 +10278,7 @@ skip_beacons:
goto free;
}
- wdev_lock(wdev);
err = rdev_channel_switch(rdev, dev, &params);
- wdev_unlock(wdev);
free:
kfree(params.beacon_after.mbssid_ies);
@@ -10290,7 +10301,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
void *hdr;
struct nlattr *bss;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags,
NL80211_CMD_NEW_SCAN_RESULTS);
@@ -10353,7 +10364,6 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
nla_put_u32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq) ||
nla_put_u32(msg, NL80211_BSS_FREQUENCY_OFFSET,
res->channel->freq_offset) ||
- nla_put_u32(msg, NL80211_BSS_CHAN_WIDTH, res->scan_width) ||
nla_put_u32(msg, NL80211_BSS_SEEN_MS_AGO,
jiffies_to_msecs(jiffies - intbss->ts)))
goto nla_put_failure;
@@ -10413,6 +10423,15 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
break;
}
+ if (nla_put_u32(msg, NL80211_BSS_USE_FOR, res->use_for))
+ goto nla_put_failure;
+
+ if (res->cannot_use_reasons &&
+ nla_put_u64_64bit(msg, NL80211_BSS_CANNOT_USE_REASONS,
+ res->cannot_use_reasons,
+ NL80211_BSS_PAD))
+ goto nla_put_failure;
+
nla_nest_end(msg, bss);
genlmsg_end(msg, hdr);
@@ -10430,16 +10449,27 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
struct cfg80211_registered_device *rdev;
struct cfg80211_internal_bss *scan;
struct wireless_dev *wdev;
+ struct nlattr **attrbuf;
int start = cb->args[2], idx = 0;
+ bool dump_include_use_data;
int err;
- err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
- if (err)
+ attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL);
+ if (!attrbuf)
+ return -ENOMEM;
+
+ err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, attrbuf);
+ if (err) {
+ kfree(attrbuf);
return err;
+ }
/* nl80211_prepare_wdev_dump acquired it in the successful case */
__acquire(&rdev->wiphy.mtx);
- wdev_lock(wdev);
+ dump_include_use_data =
+ attrbuf[NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA];
+ kfree(attrbuf);
+
spin_lock_bh(&rdev->bss_lock);
/*
@@ -10456,6 +10486,9 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
list_for_each_entry(scan, &rdev->bss_list, list) {
if (++idx <= start)
continue;
+ if (!dump_include_use_data &&
+ !(scan->pub.use_for & NL80211_BSS_USE_FOR_NORMAL))
+ continue;
if (nl80211_send_bss(skb, cb,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
rdev, wdev, scan) < 0) {
@@ -10465,7 +10498,6 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
}
spin_unlock_bh(&rdev->bss_lock);
- wdev_unlock(wdev);
cb->args[2] = idx;
wiphy_unlock(&rdev->wiphy);
@@ -10588,9 +10620,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
}
while (1) {
- wdev_lock(wdev);
res = rdev_dump_survey(rdev, wdev->netdev, survey_idx, &survey);
- wdev_unlock(wdev);
if (res == -ENOENT)
break;
if (res)
@@ -10763,9 +10793,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
if (!req.bss)
return -ENOENT;
- wdev_lock(dev->ieee80211_ptr);
err = cfg80211_mlme_auth(rdev, dev, &req);
- wdev_unlock(dev->ieee80211_ptr);
cfg80211_put_bss(&rdev->wiphy, req.bss);
@@ -10912,12 +10940,13 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device *rdev,
const u8 *ssid, int ssid_len,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ int assoc_link_id, int link_id)
{
struct ieee80211_channel *chan;
struct cfg80211_bss *bss;
const u8 *bssid;
- u32 freq;
+ u32 freq, use_for = 0;
if (!attrs[NL80211_ATTR_MAC] || !attrs[NL80211_ATTR_WIPHY_FREQ])
return ERR_PTR(-EINVAL);
@@ -10932,10 +10961,16 @@ static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device
if (!chan)
return ERR_PTR(-EINVAL);
- bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid,
- ssid, ssid_len,
- IEEE80211_BSS_TYPE_ESS,
- IEEE80211_PRIVACY_ANY);
+ if (assoc_link_id >= 0)
+ use_for = NL80211_BSS_USE_FOR_MLD_LINK;
+ if (assoc_link_id == link_id)
+ use_for |= NL80211_BSS_USE_FOR_NORMAL;
+
+ bss = __cfg80211_get_bss(&rdev->wiphy, chan, bssid,
+ ssid, ssid_len,
+ IEEE80211_BSS_TYPE_ESS,
+ IEEE80211_PRIVACY_ANY,
+ use_for);
if (!bss)
return ERR_PTR(-ENOENT);
@@ -10975,8 +11010,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
if (cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
req.ie, req.ie_len)) {
- GENL_SET_ERR_MSG(info,
- "non-inheritance makes no sense");
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ info->attrs[NL80211_ATTR_IE],
+ "non-inheritance makes no sense");
return -EINVAL;
}
}
@@ -11101,6 +11137,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
if (!attrs[NL80211_ATTR_MLO_LINK_ID]) {
err = -EINVAL;
+ NL_SET_BAD_ATTR(info->extack, link);
goto free;
}
@@ -11108,13 +11145,17 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
/* cannot use the same link ID again */
if (req.links[link_id].bss) {
err = -EINVAL;
+ NL_SET_BAD_ATTR(info->extack, link);
goto free;
}
req.links[link_id].bss =
- nl80211_assoc_bss(rdev, ssid, ssid_len, attrs);
+ nl80211_assoc_bss(rdev, ssid, ssid_len, attrs,
+ req.link_id, link_id);
if (IS_ERR(req.links[link_id].bss)) {
err = PTR_ERR(req.links[link_id].bss);
req.links[link_id].bss = NULL;
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ link, "Error fetching BSS for link");
goto free;
}
@@ -11127,8 +11168,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
if (cfg80211_find_elem(WLAN_EID_FRAGMENT,
req.links[link_id].elems,
req.links[link_id].elems_len)) {
- GENL_SET_ERR_MSG(info,
- "cannot deal with fragmentation");
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ attrs[NL80211_ATTR_IE],
+ "cannot deal with fragmentation");
err = -EINVAL;
goto free;
}
@@ -11136,8 +11178,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
if (cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
req.links[link_id].elems,
req.links[link_id].elems_len)) {
- GENL_SET_ERR_MSG(info,
- "cannot deal with non-inheritance");
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ attrs[NL80211_ATTR_IE],
+ "cannot deal with non-inheritance");
err = -EINVAL;
goto free;
}
@@ -11172,7 +11215,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
if (req.link_id >= 0)
return -EINVAL;
- req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs);
+ req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs,
+ -1, -1);
if (IS_ERR(req.bss))
return PTR_ERR(req.bss);
ap_addr = req.bss->bssid;
@@ -11180,7 +11224,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
if (!err) {
- wdev_lock(dev->ieee80211_ptr);
+ struct nlattr *link;
+ int rem = 0;
err = cfg80211_mlme_assoc(rdev, dev, &req);
@@ -11191,7 +11236,33 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
ap_addr, ETH_ALEN);
}
- wdev_unlock(dev->ieee80211_ptr);
+ /* Report error from first problematic link */
+ if (info->attrs[NL80211_ATTR_MLO_LINKS]) {
+ nla_for_each_nested(link,
+ info->attrs[NL80211_ATTR_MLO_LINKS],
+ rem) {
+ struct nlattr *link_id_attr =
+ nla_find_nested(link, NL80211_ATTR_MLO_LINK_ID);
+
+ if (!link_id_attr)
+ continue;
+
+ link_id = nla_get_u8(link_id_attr);
+
+ if (link_id == req.link_id)
+ continue;
+
+ if (!req.links[link_id].error ||
+ WARN_ON(req.links[link_id].error > 0))
+ continue;
+
+ WARN_ON(err >= 0);
+
+ NL_SET_BAD_ATTR(info->extack, link);
+ err = req.links[link_id].error;
+ break;
+ }
+ }
}
free:
@@ -11208,7 +11279,7 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
const u8 *ie = NULL, *bssid;
- int ie_len = 0, err;
+ int ie_len = 0;
u16 reason_code;
bool local_state_change;
@@ -11244,11 +11315,8 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
- wdev_lock(dev->ieee80211_ptr);
- err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code,
- local_state_change);
- wdev_unlock(dev->ieee80211_ptr);
- return err;
+ return cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code,
+ local_state_change);
}
static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
@@ -11256,7 +11324,7 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
const u8 *ie = NULL, *bssid;
- int ie_len = 0, err;
+ int ie_len = 0;
u16 reason_code;
bool local_state_change;
@@ -11292,11 +11360,8 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
- wdev_lock(dev->ieee80211_ptr);
- err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code,
- local_state_change);
- wdev_unlock(dev->ieee80211_ptr);
- return err;
+ return cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code,
+ local_state_change);
}
static bool
@@ -11474,13 +11539,11 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
ibss.userspace_handles_dfs =
nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]);
- wdev_lock(dev->ieee80211_ptr);
err = __cfg80211_join_ibss(rdev, dev, &ibss, connkeys);
if (err)
kfree_sensitive(connkeys);
else if (info->attrs[NL80211_ATTR_SOCKET_OWNER])
dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid;
- wdev_unlock(dev->ieee80211_ptr);
return err;
}
@@ -12013,8 +12076,6 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
if (nla_get_flag(info->attrs[NL80211_ATTR_MLO_SUPPORT]))
connect.flags |= CONNECT_REQ_MLO_SUPPORT;
- wdev_lock(dev->ieee80211_ptr);
-
err = cfg80211_connect(rdev, dev, &connect, connkeys,
connect.prev_bssid);
if (err)
@@ -12029,8 +12090,6 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
eth_zero_addr(dev->ieee80211_ptr->disconnect_bssid);
}
- wdev_unlock(dev->ieee80211_ptr);
-
return err;
}
@@ -12044,7 +12103,6 @@ static int nl80211_update_connect_params(struct sk_buff *skb,
bool fils_sk_offload;
u32 auth_type;
u32 changed = 0;
- int ret;
if (!rdev->ops->update_connect_params)
return -EOPNOTSUPP;
@@ -12105,14 +12163,10 @@ static int nl80211_update_connect_params(struct sk_buff *skb,
changed |= UPDATE_AUTH_TYPE;
}
- wdev_lock(dev->ieee80211_ptr);
if (!wdev->connected)
- ret = -ENOLINK;
- else
- ret = rdev_update_connect_params(rdev, dev, &connect, changed);
- wdev_unlock(dev->ieee80211_ptr);
+ return -ENOLINK;
- return ret;
+ return rdev_update_connect_params(rdev, dev, &connect, changed);
}
static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
@@ -12120,7 +12174,6 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
u16 reason;
- int ret;
if (dev->ieee80211_ptr->conn_owner_nlportid &&
dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid)
@@ -12138,10 +12191,7 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
return -EOPNOTSUPP;
- wdev_lock(dev->ieee80211_ptr);
- ret = cfg80211_disconnect(rdev, dev, reason, true);
- wdev_unlock(dev->ieee80211_ptr);
- return ret;
+ return cfg80211_disconnect(rdev, dev, reason, true);
}
static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info)
@@ -12175,16 +12225,18 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info)
return err;
}
-static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
+static int nl80211_set_pmksa(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
- int (*rdev_ops)(struct wiphy *wiphy, struct net_device *dev,
- struct cfg80211_pmksa *pmksa) = NULL;
struct net_device *dev = info->user_ptr[1];
struct cfg80211_pmksa pmksa;
+ bool ap_pmksa_caching_support = false;
memset(&pmksa, 0, sizeof(struct cfg80211_pmksa));
+ ap_pmksa_caching_support = wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_AP_PMKSA_CACHING);
+
if (!info->attrs[NL80211_ATTR_PMKID])
return -EINVAL;
@@ -12193,16 +12245,15 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_MAC]) {
pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
} else if (info->attrs[NL80211_ATTR_SSID] &&
- info->attrs[NL80211_ATTR_FILS_CACHE_ID] &&
- (info->genlhdr->cmd == NL80211_CMD_DEL_PMKSA ||
- info->attrs[NL80211_ATTR_PMK])) {
+ info->attrs[NL80211_ATTR_FILS_CACHE_ID] &&
+ info->attrs[NL80211_ATTR_PMK]) {
pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
- pmksa.cache_id =
- nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]);
+ pmksa.cache_id = nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]);
} else {
return -EINVAL;
}
+
if (info->attrs[NL80211_ATTR_PMK]) {
pmksa.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]);
pmksa.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]);
@@ -12214,32 +12265,71 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD])
pmksa.pmk_reauth_threshold =
- nla_get_u8(
- info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]);
+ nla_get_u8(info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]);
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT &&
- !(dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP &&
- wiphy_ext_feature_isset(&rdev->wiphy,
- NL80211_EXT_FEATURE_AP_PMKSA_CACHING)))
+ !((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP ||
+ dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO) &&
+ ap_pmksa_caching_support))
return -EOPNOTSUPP;
- switch (info->genlhdr->cmd) {
- case NL80211_CMD_SET_PMKSA:
- rdev_ops = rdev->ops->set_pmksa;
- break;
- case NL80211_CMD_DEL_PMKSA:
- rdev_ops = rdev->ops->del_pmksa;
- break;
- default:
- WARN_ON(1);
- break;
+ if (!rdev->ops->set_pmksa)
+ return -EOPNOTSUPP;
+
+ return rdev_set_pmksa(rdev, dev, &pmksa);
+}
+
+static int nl80211_del_pmksa(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct cfg80211_pmksa pmksa;
+ bool sae_offload_support = false;
+ bool owe_offload_support = false;
+ bool ap_pmksa_caching_support = false;
+
+ memset(&pmksa, 0, sizeof(struct cfg80211_pmksa));
+
+ sae_offload_support = wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_SAE_OFFLOAD);
+ owe_offload_support = wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_OWE_OFFLOAD);
+ ap_pmksa_caching_support = wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_AP_PMKSA_CACHING);
+
+ if (info->attrs[NL80211_ATTR_PMKID])
+ pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]);
+
+ if (info->attrs[NL80211_ATTR_MAC]) {
+ pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
+ } else if (info->attrs[NL80211_ATTR_SSID]) {
+ /* SSID based pmksa flush suppported only for FILS,
+ * OWE/SAE OFFLOAD cases
+ */
+ if (info->attrs[NL80211_ATTR_FILS_CACHE_ID] &&
+ info->attrs[NL80211_ATTR_PMK]) {
+ pmksa.cache_id = nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]);
+ } else if (!sae_offload_support && !owe_offload_support) {
+ return -EINVAL;
+ }
+ pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
+ pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+ } else {
+ return -EINVAL;
}
- if (!rdev_ops)
+ if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+ dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT &&
+ !((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP ||
+ dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO) &&
+ ap_pmksa_caching_support))
return -EOPNOTSUPP;
- return rdev_ops(&rdev->wiphy, dev, &pmksa);
+ if (!rdev->ops->del_pmksa)
+ return -EOPNOTSUPP;
+
+ return rdev_del_pmksa(rdev, dev, &pmksa);
}
static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info)
@@ -12352,7 +12442,6 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
if (err)
return err;
- wdev_lock(wdev);
if (!cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) {
const struct cfg80211_chan_def *oper_chandef, *compat_chandef;
@@ -12361,7 +12450,6 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
if (WARN_ON(!oper_chandef)) {
/* cannot happen since we must beacon to get here */
WARN_ON(1);
- wdev_unlock(wdev);
return -EBUSY;
}
@@ -12369,12 +12457,9 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
compat_chandef = cfg80211_chandef_compatible(&chandef,
oper_chandef);
- if (compat_chandef != &chandef) {
- wdev_unlock(wdev);
+ if (compat_chandef != &chandef)
return -EBUSY;
- }
}
- wdev_unlock(wdev);
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -12433,23 +12518,18 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
unsigned int link_id = nl80211_link_id(info->attrs);
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
- struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
if (!rdev->ops->set_bitrate_mask)
return -EOPNOTSUPP;
- wdev_lock(wdev);
err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
NL80211_ATTR_TX_RATES, &mask,
dev, true, link_id);
if (err)
- goto out;
+ return err;
- err = rdev_set_bitrate_mask(rdev, dev, link_id, NULL, &mask);
-out:
- wdev_unlock(wdev);
- return err;
+ return rdev_set_bitrate_mask(rdev, dev, link_id, NULL, &mask);
}
static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
@@ -12578,12 +12658,9 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
if (!chandef.chan && params.offchan)
return -EINVAL;
- wdev_lock(wdev);
if (params.offchan &&
- !cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) {
- wdev_unlock(wdev);
+ !cfg80211_off_channel_oper_allowed(wdev, chandef.chan))
return -EBUSY;
- }
params.link_id = nl80211_link_id_or_invalid(info->attrs);
/*
@@ -12592,11 +12669,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
* to the driver.
*/
if (params.link_id >= 0 &&
- !(wdev->valid_links & BIT(params.link_id))) {
- wdev_unlock(wdev);
+ !(wdev->valid_links & BIT(params.link_id)))
return -EINVAL;
- }
- wdev_unlock(wdev);
params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
@@ -12796,7 +12870,8 @@ static int nl80211_set_cqm_txe(struct genl_info *info,
}
static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
+ struct net_device *dev,
+ struct cfg80211_cqm_config *cqm_config)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
s32 last, low, high;
@@ -12804,17 +12879,13 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
int i, n, low_index;
int err;
- /* RSSI reporting disabled? */
- if (!wdev->cqm_config)
- return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0);
-
/*
* Obtain current RSSI value if possible, if not and no RSSI threshold
* event has been received yet, we should receive an event after a
* connection is established and enough beacons received to calculate
* the average.
*/
- if (!wdev->cqm_config->last_rssi_event_value &&
+ if (!cqm_config->last_rssi_event_value &&
wdev->links[0].client.current_bss &&
rdev->ops->get_station) {
struct station_info sinfo = {};
@@ -12828,30 +12899,30 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
cfg80211_sinfo_release_content(&sinfo);
if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG))
- wdev->cqm_config->last_rssi_event_value =
+ cqm_config->last_rssi_event_value =
(s8) sinfo.rx_beacon_signal_avg;
}
- last = wdev->cqm_config->last_rssi_event_value;
- hyst = wdev->cqm_config->rssi_hyst;
- n = wdev->cqm_config->n_rssi_thresholds;
+ last = cqm_config->last_rssi_event_value;
+ hyst = cqm_config->rssi_hyst;
+ n = cqm_config->n_rssi_thresholds;
for (i = 0; i < n; i++) {
i = array_index_nospec(i, n);
- if (last < wdev->cqm_config->rssi_thresholds[i])
+ if (last < cqm_config->rssi_thresholds[i])
break;
}
low_index = i - 1;
if (low_index >= 0) {
low_index = array_index_nospec(low_index, n);
- low = wdev->cqm_config->rssi_thresholds[low_index] - hyst;
+ low = cqm_config->rssi_thresholds[low_index] - hyst;
} else {
low = S32_MIN;
}
if (i < n) {
i = array_index_nospec(i, n);
- high = wdev->cqm_config->rssi_thresholds[i] + hyst - 1;
+ high = cqm_config->rssi_thresholds[i] + hyst - 1;
} else {
high = S32_MAX;
}
@@ -12864,10 +12935,11 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
u32 hysteresis)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct cfg80211_cqm_config *cqm_config = NULL, *old;
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
- int i, err;
s32 prev = S32_MIN;
+ int i, err;
/* Check all values negative and sorted */
for (i = 0; i < n_thresholds; i++) {
@@ -12881,50 +12953,63 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
return -EOPNOTSUPP;
- wdev_lock(wdev);
- cfg80211_cqm_config_free(wdev);
- wdev_unlock(wdev);
-
- if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) {
- if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */
- return rdev_set_cqm_rssi_config(rdev, dev, 0, 0);
+ if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */
+ n_thresholds = 0;
- return rdev_set_cqm_rssi_config(rdev, dev,
- thresholds[0], hysteresis);
- }
+ old = wiphy_dereference(wdev->wiphy, wdev->cqm_config);
- if (!wiphy_ext_feature_isset(&rdev->wiphy,
- NL80211_EXT_FEATURE_CQM_RSSI_LIST))
- return -EOPNOTSUPP;
+ /* if already disabled just succeed */
+ if (!n_thresholds && !old)
+ return 0;
- if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */
- n_thresholds = 0;
+ if (n_thresholds > 1) {
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_CQM_RSSI_LIST) ||
+ !rdev->ops->set_cqm_rssi_range_config)
+ return -EOPNOTSUPP;
+ } else {
+ if (!rdev->ops->set_cqm_rssi_config)
+ return -EOPNOTSUPP;
+ }
- wdev_lock(wdev);
if (n_thresholds) {
- struct cfg80211_cqm_config *cqm_config;
-
cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds,
n_thresholds),
GFP_KERNEL);
- if (!cqm_config) {
- err = -ENOMEM;
- goto unlock;
- }
+ if (!cqm_config)
+ return -ENOMEM;
cqm_config->rssi_hyst = hysteresis;
cqm_config->n_rssi_thresholds = n_thresholds;
memcpy(cqm_config->rssi_thresholds, thresholds,
flex_array_size(cqm_config, rssi_thresholds,
n_thresholds));
+ cqm_config->use_range_api = n_thresholds > 1 ||
+ !rdev->ops->set_cqm_rssi_config;
- wdev->cqm_config = cqm_config;
- }
+ rcu_assign_pointer(wdev->cqm_config, cqm_config);
- err = cfg80211_cqm_rssi_update(rdev, dev);
+ if (cqm_config->use_range_api)
+ err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config);
+ else
+ err = rdev_set_cqm_rssi_config(rdev, dev,
+ thresholds[0],
+ hysteresis);
+ } else {
+ RCU_INIT_POINTER(wdev->cqm_config, NULL);
+ /* if enabled as range also disable via range */
+ if (old->use_range_api)
+ err = rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0);
+ else
+ err = rdev_set_cqm_rssi_config(rdev, dev, 0, 0);
+ }
-unlock:
- wdev_unlock(wdev);
+ if (err) {
+ rcu_assign_pointer(wdev->cqm_config, old);
+ kfree_rcu(cqm_config, rcu_head);
+ } else {
+ kfree_rcu(old, rcu_head);
+ }
return err;
}
@@ -13108,11 +13193,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
setup.control_port_over_nl80211 = true;
}
- wdev_lock(dev->ieee80211_ptr);
err = __cfg80211_join_mesh(rdev, dev, &setup, &cfg);
if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER])
dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid;
- wdev_unlock(dev->ieee80211_ptr);
return err;
}
@@ -14056,21 +14139,13 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
if (tb[NL80211_REKEY_DATA_AKM])
rekey_data.akm = nla_get_u32(tb[NL80211_REKEY_DATA_AKM]);
- wdev_lock(wdev);
- if (!wdev->connected) {
- err = -ENOTCONN;
- goto out;
- }
+ if (!wdev->connected)
+ return -ENOTCONN;
- if (!rdev->ops->set_rekey_data) {
- err = -EOPNOTSUPP;
- goto out;
- }
+ if (!rdev->ops->set_rekey_data)
+ return -EOPNOTSUPP;
- err = rdev_set_rekey_data(rdev, dev, &rekey_data);
- out:
- wdev_unlock(wdev);
- return err;
+ return rdev_set_rekey_data(rdev, dev, &rekey_data);
}
static int nl80211_register_unexpected_frame(struct sk_buff *skb,
@@ -15274,11 +15349,9 @@ static int nl80211_set_qos_map(struct sk_buff *skb,
memcpy(qos_map->up, pos, IEEE80211_QOS_MAP_LEN_MIN);
}
- wdev_lock(dev->ieee80211_ptr);
ret = nl80211_key_allowed(dev->ieee80211_ptr);
if (!ret)
ret = rdev_set_qos_map(rdev, dev, qos_map);
- wdev_unlock(dev->ieee80211_ptr);
kfree(qos_map);
return ret;
@@ -15292,7 +15365,6 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info)
const u8 *peer;
u8 tsid, up;
u16 admitted_time = 0;
- int err;
if (!(rdev->wiphy.features & NL80211_FEATURE_SUPPORTS_WMM_ADMISSION))
return -EOPNOTSUPP;
@@ -15322,34 +15394,25 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
- wdev_lock(wdev);
switch (wdev->iftype) {
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
if (wdev->connected)
break;
- err = -ENOTCONN;
- goto out;
+ return -ENOTCONN;
default:
- err = -EOPNOTSUPP;
- goto out;
+ return -EOPNOTSUPP;
}
- err = rdev_add_tx_ts(rdev, dev, tsid, peer, up, admitted_time);
-
- out:
- wdev_unlock(wdev);
- return err;
+ return rdev_add_tx_ts(rdev, dev, tsid, peer, up, admitted_time);
}
static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
- struct wireless_dev *wdev = dev->ieee80211_ptr;
const u8 *peer;
u8 tsid;
- int err;
if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC])
return -EINVAL;
@@ -15357,11 +15420,7 @@ static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info)
tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]);
peer = nla_data(info->attrs[NL80211_ATTR_MAC]);
- wdev_lock(wdev);
- err = rdev_del_tx_ts(rdev, dev, tsid, peer);
- wdev_unlock(wdev);
-
- return err;
+ return rdev_del_tx_ts(rdev, dev, tsid, peer);
}
static int nl80211_tdls_channel_switch(struct sk_buff *skb,
@@ -15417,11 +15476,7 @@ static int nl80211_tdls_channel_switch(struct sk_buff *skb,
addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
oper_class = nla_get_u8(info->attrs[NL80211_ATTR_OPER_CLASS]);
- wdev_lock(wdev);
- err = rdev_tdls_channel_switch(rdev, dev, addr, oper_class, &chandef);
- wdev_unlock(wdev);
-
- return err;
+ return rdev_tdls_channel_switch(rdev, dev, addr, oper_class, &chandef);
}
static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb,
@@ -15429,7 +15484,6 @@ static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb,
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
- struct wireless_dev *wdev = dev->ieee80211_ptr;
const u8 *addr;
if (!rdev->ops->tdls_channel_switch ||
@@ -15450,9 +15504,7 @@ static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb,
addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
- wdev_lock(wdev);
rdev_tdls_cancel_channel_switch(rdev, dev, addr);
- wdev_unlock(wdev);
return 0;
}
@@ -15485,7 +15537,6 @@ static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info)
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_pmk_conf pmk_conf = {};
- int ret;
if (wdev->iftype != NL80211_IFTYPE_STATION &&
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
@@ -15498,34 +15549,24 @@ static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_PMK])
return -EINVAL;
- wdev_lock(wdev);
- if (!wdev->connected) {
- ret = -ENOTCONN;
- goto out;
- }
+ if (!wdev->connected)
+ return -ENOTCONN;
pmk_conf.aa = nla_data(info->attrs[NL80211_ATTR_MAC]);
- if (memcmp(pmk_conf.aa, wdev->u.client.connected_addr, ETH_ALEN)) {
- ret = -EINVAL;
- goto out;
- }
+ if (memcmp(pmk_conf.aa, wdev->u.client.connected_addr, ETH_ALEN))
+ return -EINVAL;
pmk_conf.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]);
pmk_conf.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]);
if (pmk_conf.pmk_len != WLAN_PMK_LEN &&
- pmk_conf.pmk_len != WLAN_PMK_LEN_SUITE_B_192) {
- ret = -EINVAL;
- goto out;
- }
+ pmk_conf.pmk_len != WLAN_PMK_LEN_SUITE_B_192)
+ return -EINVAL;
if (info->attrs[NL80211_ATTR_PMKR0_NAME])
pmk_conf.pmk_r0_name =
nla_data(info->attrs[NL80211_ATTR_PMKR0_NAME]);
- ret = rdev_set_pmk(rdev, dev, &pmk_conf);
-out:
- wdev_unlock(wdev);
- return ret;
+ return rdev_set_pmk(rdev, dev, &pmk_conf);
}
static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info)
@@ -15534,7 +15575,6 @@ static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info)
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
const u8 *aa;
- int ret;
if (wdev->iftype != NL80211_IFTYPE_STATION &&
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
@@ -15547,12 +15587,8 @@ static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[NL80211_ATTR_MAC])
return -EINVAL;
- wdev_lock(wdev);
aa = nla_data(info->attrs[NL80211_ATTR_MAC]);
- ret = rdev_del_pmk(rdev, dev, aa);
- wdev_unlock(wdev);
-
- return ret;
+ return rdev_del_pmk(rdev, dev, aa);
}
static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info)
@@ -15626,8 +15662,6 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
- wdev_lock(wdev);
-
switch (wdev->iftype) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
@@ -15636,21 +15670,16 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
case NL80211_IFTYPE_ADHOC:
if (wdev->u.ibss.current_bss)
break;
- err = -ENOTCONN;
- goto out;
+ return -ENOTCONN;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
if (wdev->connected)
break;
- err = -ENOTCONN;
- goto out;
+ return -ENOTCONN;
default:
- err = -EOPNOTSUPP;
- goto out;
+ return -EOPNOTSUPP;
}
- wdev_unlock(wdev);
-
buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
dest = nla_data(info->attrs[NL80211_ATTR_MAC]);
@@ -15666,9 +15695,6 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
if (!err && !dont_wait_for_ack)
nl_set_extack_cookie_u64(info->extack, cookie);
return err;
- out:
- wdev_unlock(wdev);
- return err;
}
static int nl80211_get_ftm_responder_stats(struct sk_buff *skb,
@@ -15907,7 +15933,7 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev,
if (tid_conf->mask & ~mask) {
NL_SET_ERR_MSG(extack, "unsupported TID configuration");
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
return 0;
@@ -15946,8 +15972,6 @@ static int nl80211_set_tid_config(struct sk_buff *skb,
if (info->attrs[NL80211_ATTR_MAC])
tid_config->peer = nla_data(info->attrs[NL80211_ATTR_MAC]);
- wdev_lock(dev->ieee80211_ptr);
-
nla_for_each_nested(tid, info->attrs[NL80211_ATTR_TID_CONFIG],
rem_conf) {
ret = nla_parse_nested(attrs, NL80211_TID_CONFIG_ATTR_MAX,
@@ -15969,7 +15993,6 @@ static int nl80211_set_tid_config(struct sk_buff *skb,
bad_tid_conf:
kfree(tid_config);
- wdev_unlock(dev->ieee80211_ptr);
return ret;
}
@@ -16066,9 +16089,7 @@ static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info)
params.counter_offset_presp = offset;
}
- wdev_lock(wdev);
err = rdev_color_change(rdev, dev, &params);
- wdev_unlock(wdev);
out:
kfree(params.beacon_next.mbssid_ies);
@@ -16124,7 +16145,6 @@ static int nl80211_add_link(struct sk_buff *skb, struct genl_info *info)
!is_valid_ether_addr(nla_data(info->attrs[NL80211_ATTR_MAC])))
return -EINVAL;
- wdev_lock(wdev);
wdev->valid_links |= BIT(link_id);
ether_addr_copy(wdev->links[link_id].addr,
nla_data(info->attrs[NL80211_ATTR_MAC]));
@@ -16134,7 +16154,6 @@ static int nl80211_add_link(struct sk_buff *skb, struct genl_info *info)
wdev->valid_links &= ~BIT(link_id);
eth_zero_addr(wdev->links[link_id].addr);
}
- wdev_unlock(wdev);
return ret;
}
@@ -16156,9 +16175,7 @@ static int nl80211_remove_link(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
- wdev_lock(wdev);
cfg80211_remove_link(wdev, link_id);
- wdev_unlock(wdev);
return 0;
}
@@ -16248,14 +16265,10 @@ nl80211_add_mod_link_station(struct sk_buff *skb, struct genl_info *info,
if (err)
return err;
- wdev_lock(dev->ieee80211_ptr);
if (add)
- err = rdev_add_link_station(rdev, dev, &params);
- else
- err = rdev_mod_link_station(rdev, dev, &params);
- wdev_unlock(dev->ieee80211_ptr);
+ return rdev_add_link_station(rdev, dev, &params);
- return err;
+ return rdev_mod_link_station(rdev, dev, &params);
}
static int
@@ -16276,7 +16289,6 @@ nl80211_remove_link_station(struct sk_buff *skb, struct genl_info *info)
struct link_station_del_parameters params = {};
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
- int ret;
if (!rdev->ops->del_link_station)
return -EOPNOTSUPP;
@@ -16288,11 +16300,7 @@ nl80211_remove_link_station(struct sk_buff *skb, struct genl_info *info)
params.mld_mac = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]);
params.link_id = nla_get_u8(info->attrs[NL80211_ATTR_MLO_LINK_ID]);
- wdev_lock(dev->ieee80211_ptr);
- ret = rdev_del_link_station(rdev, dev, &params);
- wdev_unlock(dev->ieee80211_ptr);
-
- return ret;
+ return rdev_del_link_station(rdev, dev, &params);
}
static int nl80211_set_hw_timestamp(struct sk_buff *skb,
@@ -16318,6 +16326,35 @@ static int nl80211_set_hw_timestamp(struct sk_buff *skb,
return rdev_set_hw_timestamp(rdev, dev, &hwts);
}
+static int
+nl80211_set_ttlm(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_ttlm_params params = {};
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+ if (wdev->iftype != NL80211_IFTYPE_STATION &&
+ wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
+ return -EOPNOTSUPP;
+
+ if (!wdev->connected)
+ return -ENOLINK;
+
+ if (!info->attrs[NL80211_ATTR_MLO_TTLM_DLINK] ||
+ !info->attrs[NL80211_ATTR_MLO_TTLM_ULINK])
+ return -EINVAL;
+
+ nla_memcpy(params.dlink,
+ info->attrs[NL80211_ATTR_MLO_TTLM_DLINK],
+ sizeof(params.dlink));
+ nla_memcpy(params.ulink,
+ info->attrs[NL80211_ATTR_MLO_TTLM_ULINK],
+ sizeof(params.ulink));
+
+ return rdev_set_ttlm(rdev, dev, &params);
+}
+
#define NL80211_FLAG_NEED_WIPHY 0x01
#define NL80211_FLAG_NEED_NETDEV 0x02
#define NL80211_FLAG_NEED_RTNL 0x04
@@ -17006,7 +17043,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
{
.cmd = NL80211_CMD_SET_PMKSA,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = nl80211_setdel_pmksa,
+ .doit = nl80211_set_pmksa,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_CLEAR_SKB),
@@ -17014,7 +17051,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
{
.cmd = NL80211_CMD_DEL_PMKSA,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = nl80211_setdel_pmksa,
+ .doit = nl80211_del_pmksa,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
@@ -17499,6 +17536,12 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
+ {
+ .cmd = NL80211_CMD_SET_TID_TO_LINK_MAPPING,
+ .doit = nl80211_set_ttlm,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
+ },
};
static struct genl_family nl80211_fam __ro_after_init = {
@@ -17830,21 +17873,29 @@ nla_put_failure:
nlmsg_free(msg);
}
+struct nl80211_mlme_event {
+ enum nl80211_commands cmd;
+ const u8 *buf;
+ size_t buf_len;
+ int uapsd_queues;
+ const u8 *req_ies;
+ size_t req_ies_len;
+ bool reconnect;
+};
+
static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
- const u8 *buf, size_t len,
- enum nl80211_commands cmd, gfp_t gfp,
- int uapsd_queues, const u8 *req_ies,
- size_t req_ies_len, bool reconnect)
+ const struct nl80211_mlme_event *event,
+ gfp_t gfp)
{
struct sk_buff *msg;
void *hdr;
- msg = nlmsg_new(100 + len + req_ies_len, gfp);
+ msg = nlmsg_new(100 + event->buf_len + event->req_ies_len, gfp);
if (!msg)
return;
- hdr = nl80211hdr_put(msg, 0, 0, 0, cmd);
+ hdr = nl80211hdr_put(msg, 0, 0, 0, event->cmd);
if (!hdr) {
nlmsg_free(msg);
return;
@@ -17852,22 +17903,24 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
- nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
- (req_ies &&
- nla_put(msg, NL80211_ATTR_REQ_IE, req_ies_len, req_ies)))
+ nla_put(msg, NL80211_ATTR_FRAME, event->buf_len, event->buf) ||
+ (event->req_ies &&
+ nla_put(msg, NL80211_ATTR_REQ_IE, event->req_ies_len,
+ event->req_ies)))
goto nla_put_failure;
- if (reconnect && nla_put_flag(msg, NL80211_ATTR_RECONNECT_REQUESTED))
+ if (event->reconnect &&
+ nla_put_flag(msg, NL80211_ATTR_RECONNECT_REQUESTED))
goto nla_put_failure;
- if (uapsd_queues >= 0) {
+ if (event->uapsd_queues >= 0) {
struct nlattr *nla_wmm =
nla_nest_start_noflag(msg, NL80211_ATTR_STA_WME);
if (!nla_wmm)
goto nla_put_failure;
if (nla_put_u8(msg, NL80211_STA_WME_UAPSD_QUEUES,
- uapsd_queues))
+ event->uapsd_queues))
goto nla_put_failure;
nla_nest_end(msg, nla_wmm);
@@ -17887,37 +17940,60 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
struct net_device *netdev, const u8 *buf,
size_t len, gfp_t gfp)
{
- nl80211_send_mlme_event(rdev, netdev, buf, len,
- NL80211_CMD_AUTHENTICATE, gfp, -1, NULL, 0,
- false);
+ struct nl80211_mlme_event event = {
+ .cmd = NL80211_CMD_AUTHENTICATE,
+ .buf = buf,
+ .buf_len = len,
+ .uapsd_queues = -1,
+ };
+
+ nl80211_send_mlme_event(rdev, netdev, &event, gfp);
}
void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
- struct cfg80211_rx_assoc_resp *data)
+ const struct cfg80211_rx_assoc_resp_data *data)
{
- nl80211_send_mlme_event(rdev, netdev, data->buf, data->len,
- NL80211_CMD_ASSOCIATE, GFP_KERNEL,
- data->uapsd_queues,
- data->req_ies, data->req_ies_len, false);
+ struct nl80211_mlme_event event = {
+ .cmd = NL80211_CMD_ASSOCIATE,
+ .buf = data->buf,
+ .buf_len = data->len,
+ .uapsd_queues = data->uapsd_queues,
+ .req_ies = data->req_ies,
+ .req_ies_len = data->req_ies_len,
+ };
+
+ nl80211_send_mlme_event(rdev, netdev, &event, GFP_KERNEL);
}
void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
struct net_device *netdev, const u8 *buf,
size_t len, bool reconnect, gfp_t gfp)
{
- nl80211_send_mlme_event(rdev, netdev, buf, len,
- NL80211_CMD_DEAUTHENTICATE, gfp, -1, NULL, 0,
- reconnect);
+ struct nl80211_mlme_event event = {
+ .cmd = NL80211_CMD_DEAUTHENTICATE,
+ .buf = buf,
+ .buf_len = len,
+ .reconnect = reconnect,
+ .uapsd_queues = -1,
+ };
+
+ nl80211_send_mlme_event(rdev, netdev, &event, gfp);
}
void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
struct net_device *netdev, const u8 *buf,
size_t len, bool reconnect, gfp_t gfp)
{
- nl80211_send_mlme_event(rdev, netdev, buf, len,
- NL80211_CMD_DISASSOCIATE, gfp, -1, NULL, 0,
- reconnect);
+ struct nl80211_mlme_event event = {
+ .cmd = NL80211_CMD_DISASSOCIATE,
+ .buf = buf,
+ .buf_len = len,
+ .reconnect = reconnect,
+ .uapsd_queues = -1,
+ };
+
+ nl80211_send_mlme_event(rdev, netdev, &event, gfp);
}
void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
@@ -17927,28 +18003,31 @@ void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
const struct ieee80211_mgmt *mgmt = (void *)buf;
- u32 cmd;
+ struct nl80211_mlme_event event = {
+ .buf = buf,
+ .buf_len = len,
+ .uapsd_queues = -1,
+ };
if (WARN_ON(len < 2))
return;
if (ieee80211_is_deauth(mgmt->frame_control)) {
- cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE;
+ event.cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE;
} else if (ieee80211_is_disassoc(mgmt->frame_control)) {
- cmd = NL80211_CMD_UNPROT_DISASSOCIATE;
+ event.cmd = NL80211_CMD_UNPROT_DISASSOCIATE;
} else if (ieee80211_is_beacon(mgmt->frame_control)) {
if (wdev->unprot_beacon_reported &&
elapsed_jiffies_msecs(wdev->unprot_beacon_reported) < 10000)
return;
- cmd = NL80211_CMD_UNPROT_BEACON;
+ event.cmd = NL80211_CMD_UNPROT_BEACON;
wdev->unprot_beacon_reported = jiffies;
} else {
return;
}
trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len);
- nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC, -1,
- NULL, 0, false);
+ nl80211_send_mlme_event(rdev, dev, &event, GFP_ATOMIC);
}
EXPORT_SYMBOL(cfg80211_rx_unprot_mlme_mgmt);
@@ -18219,7 +18298,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
}
void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *bssid,
+ struct net_device *netdev, const u8 *peer_addr,
const u8 *td_bitmap, u8 td_bitmap_len)
{
struct sk_buff *msg;
@@ -18237,7 +18316,7 @@ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
- nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid))
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer_addr))
goto nla_put_failure;
if ((td_bitmap_len > 0) && td_bitmap)
@@ -18300,7 +18379,7 @@ void cfg80211_links_removed(struct net_device *dev, u16 link_mask)
struct nlattr *links;
void *hdr;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
trace_cfg80211_links_removed(dev, link_mask);
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
@@ -19073,9 +19152,8 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
enum nl80211_cqm_rssi_threshold_event rssi_event,
s32 rssi_level, gfp_t gfp)
{
- struct sk_buff *msg;
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct cfg80211_cqm_config *cqm_config;
trace_cfg80211_cqm_rssi_notify(dev, rssi_event, rssi_level);
@@ -19083,16 +19161,38 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH))
return;
- if (wdev->cqm_config) {
- wdev->cqm_config->last_rssi_event_value = rssi_level;
+ rcu_read_lock();
+ cqm_config = rcu_dereference(wdev->cqm_config);
+ if (cqm_config) {
+ cqm_config->last_rssi_event_value = rssi_level;
+ cqm_config->last_rssi_event_type = rssi_event;
+ wiphy_work_queue(wdev->wiphy, &wdev->cqm_rssi_work);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
+
+void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work)
+{
+ struct wireless_dev *wdev = container_of(work, struct wireless_dev,
+ cqm_rssi_work);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ enum nl80211_cqm_rssi_threshold_event rssi_event;
+ struct cfg80211_cqm_config *cqm_config;
+ struct sk_buff *msg;
+ s32 rssi_level;
+
+ cqm_config = wiphy_dereference(wdev->wiphy, wdev->cqm_config);
+ if (!cqm_config)
+ return;
- cfg80211_cqm_rssi_update(rdev, dev);
+ if (cqm_config->use_range_api)
+ cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config);
- if (rssi_level == 0)
- rssi_level = wdev->cqm_config->last_rssi_event_value;
- }
+ rssi_level = cqm_config->last_rssi_event_value;
+ rssi_event = cqm_config->last_rssi_event_type;
- msg = cfg80211_prepare_cqm(dev, NULL, gfp);
+ msg = cfg80211_prepare_cqm(wdev->netdev, NULL, GFP_KERNEL);
if (!msg)
return;
@@ -19104,14 +19204,13 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
rssi_level))
goto nla_put_failure;
- cfg80211_send_cqm(msg, gfp);
+ cfg80211_send_cqm(msg, GFP_KERNEL);
return;
nla_put_failure:
nlmsg_free(msg);
}
-EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
void cfg80211_cqm_txe_notify(struct net_device *dev,
const u8 *peer, u32 num_packets,
@@ -19354,7 +19453,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
WARN_INVALID_LINK_ID(wdev, link_id);
trace_cfg80211_ch_switch_notify(dev, chandef, link_id, punct_bitmap);
@@ -19382,6 +19481,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
break;
}
+ cfg80211_schedule_channels_check(wdev);
cfg80211_sched_dfs_chan_update(rdev);
nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL,
@@ -19399,7 +19499,7 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev,
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
WARN_INVALID_LINK_ID(wdev, link_id);
trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id,
@@ -19422,7 +19522,7 @@ int cfg80211_bss_color_notify(struct net_device *dev,
struct sk_buff *msg;
void *hdr;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
trace_cfg80211_bss_color_notify(dev, cmd, count, color_bitmap);
@@ -20139,6 +20239,20 @@ nla_put_failure:
}
EXPORT_SYMBOL(cfg80211_update_owe_info_event);
+void cfg80211_schedule_channels_check(struct wireless_dev *wdev)
+{
+ struct wiphy *wiphy = wdev->wiphy;
+
+ /* Schedule channels check if NO_IR or DFS relaxations are supported */
+ if (wdev->iftype == NL80211_IFTYPE_STATION &&
+ (wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_DFS_CONCURRENT) ||
+ (IS_ENABLED(CONFIG_CFG80211_REG_RELAX_NO_IR) &&
+ wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR)))
+ reg_check_channels();
+}
+EXPORT_SYMBOL(cfg80211_schedule_channels_check);
+
/* initialisation/exit functions */
int __init nl80211_init(void)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index b4af53f9b227..6376f3a87f8a 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -60,7 +60,7 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
const u8 *buf, size_t len, gfp_t gfp);
void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
- struct cfg80211_rx_assoc_resp *data);
+ const struct cfg80211_rx_assoc_resp_data *data);
void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
const u8 *buf, size_t len,
@@ -82,8 +82,11 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
struct cfg80211_roam_info *info, gfp_t gfp);
+/* For STA/GC, indicate port authorized with AP/GO bssid.
+ * For GO/AP, use peer GC/STA mac_addr.
+ */
void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *bssid,
+ struct net_device *netdev, const u8 *peer_addr,
const u8 *td_bitmap, u8 td_bitmap_len);
void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
struct net_device *netdev, u16 reason,
diff --git a/net/wireless/ocb.c b/net/wireless/ocb.c
index 29afaf3da54f..7d2d67f13ad9 100644
--- a/net/wireless/ocb.c
+++ b/net/wireless/ocb.c
@@ -4,7 +4,7 @@
*
* Copyright: (c) 2014 Czech Technical University in Prague
* (c) 2014 Volkswagen Group Research
- * Copyright (C) 2022 Intel Corporation
+ * Copyright (C) 2022-2023 Intel Corporation
* Author: Rostislav Lisovy <rostislav.lisovy@fel.cvut.cz>
* Funded by: Volkswagen Group Research
*/
@@ -15,14 +15,14 @@
#include "core.h"
#include "rdev-ops.h"
-int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev,
- struct net_device *dev,
- struct ocb_setup *setup)
+int cfg80211_join_ocb(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct ocb_setup *setup)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_OCB)
return -EOPNOTSUPP;
@@ -40,27 +40,13 @@ int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev,
return err;
}
-int cfg80211_join_ocb(struct cfg80211_registered_device *rdev,
- struct net_device *dev,
- struct ocb_setup *setup)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- int err;
-
- wdev_lock(wdev);
- err = __cfg80211_join_ocb(rdev, dev, setup);
- wdev_unlock(wdev);
-
- return err;
-}
-
-int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
+int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev,
+ struct net_device *dev)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_OCB)
return -EOPNOTSUPP;
@@ -77,16 +63,3 @@ int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev,
return err;
}
-
-int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- int err;
-
- wdev_lock(wdev);
- err = __cfg80211_leave_ocb(rdev, dev);
- wdev_unlock(wdev);
-
- return err;
-}
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index 9611aa0bd051..e106dcea3977 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -600,7 +600,7 @@ static void cfg80211_pmsr_process_abort(struct wireless_dev *wdev)
struct cfg80211_pmsr_request *req, *tmp;
LIST_HEAD(free_list);
- lockdep_assert_held(&wdev->mtx);
+ lockdep_assert_wiphy(wdev->wiphy);
spin_lock_bh(&wdev->pmsr_lock);
list_for_each_entry_safe(req, tmp, &wdev->pmsr_list, list) {
@@ -623,9 +623,7 @@ void cfg80211_pmsr_free_wk(struct work_struct *work)
pmsr_free_wk);
wiphy_lock(wdev->wiphy);
- wdev_lock(wdev);
cfg80211_pmsr_process_abort(wdev);
- wdev_unlock(wdev);
wiphy_unlock(wdev->wiphy);
}
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 90bb7ac4b930..43897a5269b6 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -173,7 +173,7 @@ static inline int rdev_start_ap(struct cfg80211_registered_device *rdev,
static inline int rdev_change_beacon(struct cfg80211_registered_device *rdev,
struct net_device *dev,
- struct cfg80211_beacon_data *info)
+ struct cfg80211_ap_update *info)
{
int ret;
trace_rdev_change_beacon(&rdev->wiphy, dev, info);
@@ -1046,7 +1046,7 @@ rdev_nan_change_conf(struct cfg80211_registered_device *rdev,
ret = rdev->ops->nan_change_conf(&rdev->wiphy, wdev, conf,
changes);
else
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -1200,7 +1200,7 @@ rdev_start_radar_detection(struct cfg80211_registered_device *rdev,
struct cfg80211_chan_def *chandef,
u32 cac_time_ms)
{
- int ret = -ENOTSUPP;
+ int ret = -EOPNOTSUPP;
trace_rdev_start_radar_detection(&rdev->wiphy, dev, chandef,
cac_time_ms);
@@ -1226,7 +1226,7 @@ rdev_set_mcast_rate(struct cfg80211_registered_device *rdev,
struct net_device *dev,
int mcast_rate[NUM_NL80211_BANDS])
{
- int ret = -ENOTSUPP;
+ int ret = -EOPNOTSUPP;
trace_rdev_set_mcast_rate(&rdev->wiphy, dev, mcast_rate);
if (rdev->ops->set_mcast_rate)
@@ -1239,7 +1239,7 @@ static inline int
rdev_set_coalesce(struct cfg80211_registered_device *rdev,
struct cfg80211_coalesce *coalesce)
{
- int ret = -ENOTSUPP;
+ int ret = -EOPNOTSUPP;
trace_rdev_set_coalesce(&rdev->wiphy, coalesce);
if (rdev->ops->set_coalesce)
@@ -1524,4 +1524,22 @@ rdev_set_hw_timestamp(struct cfg80211_registered_device *rdev,
return ret;
}
+
+static inline int
+rdev_set_ttlm(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_ttlm_params *params)
+{
+ struct wiphy *wiphy = &rdev->wiphy;
+ int ret;
+
+ if (!rdev->ops->set_ttlm)
+ return -EOPNOTSUPP;
+
+ trace_rdev_set_ttlm(wiphy, dev, params);
+ ret = rdev->ops->set_ttlm(wiphy, dev, params);
+ trace_rdev_return_int(wiphy, ret);
+
+ return ret;
+}
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 0317cf9da307..2741b626919a 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1283,7 +1283,9 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd)
* 60 GHz band.
* This resolution can be lowered and should be considered as we add
* regulatory rule support for other "bands".
- **/
+ *
+ * Returns: whether or not the frequency is in the range
+ */
static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
u32 freq_khz)
{
@@ -1492,6 +1494,8 @@ static void add_rule(struct ieee80211_reg_rule *rule,
* Returns a pointer to the regulatory domain structure which will hold the
* resulting intersection of rules between rd1 and rd2. We will
* kzalloc() this structure for you.
+ *
+ * Returns: the intersected regdomain
*/
static struct ieee80211_regdomain *
regdom_intersect(const struct ieee80211_regdomain *rd1,
@@ -1589,6 +1593,14 @@ static u32 map_regdom_flags(u32 rd_flags)
channel_flags |= IEEE80211_CHAN_NO_320MHZ;
if (rd_flags & NL80211_RRF_NO_EHT)
channel_flags |= IEEE80211_CHAN_NO_EHT;
+ if (rd_flags & NL80211_RRF_DFS_CONCURRENT)
+ channel_flags |= IEEE80211_CHAN_DFS_CONCURRENT;
+ if (rd_flags & NL80211_RRF_NO_UHB_VLP_CLIENT)
+ channel_flags |= IEEE80211_CHAN_NO_UHB_VLP_CLIENT;
+ if (rd_flags & NL80211_RRF_NO_UHB_AFC_CLIENT)
+ channel_flags |= IEEE80211_CHAN_NO_UHB_AFC_CLIENT;
+ if (rd_flags & NL80211_RRF_PSD)
+ channel_flags |= IEEE80211_CHAN_PSD;
return channel_flags;
}
@@ -1795,6 +1807,9 @@ static void handle_channel_single_rule(struct wiphy *wiphy,
chan->dfs_cac_ms = reg_rule->dfs_cac_ms;
}
+ if (chan->flags & IEEE80211_CHAN_PSD)
+ chan->psd = reg_rule->psd;
+
return;
}
@@ -1815,6 +1830,9 @@ static void handle_channel_single_rule(struct wiphy *wiphy,
chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
}
+ if (chan->flags & IEEE80211_CHAN_PSD)
+ chan->psd = reg_rule->psd;
+
if (chan->orig_mpwr) {
/*
* Devices that use REGULATORY_COUNTRY_IE_FOLLOW_POWER
@@ -1884,6 +1902,12 @@ static void handle_channel_adjacent_rules(struct wiphy *wiphy,
rrule2->dfs_cac_ms);
}
+ if ((rrule1->flags & NL80211_RRF_PSD) &&
+ (rrule2->flags & NL80211_RRF_PSD))
+ chan->psd = min_t(s8, rrule1->psd, rrule2->psd);
+ else
+ chan->flags &= ~NL80211_RRF_PSD;
+
return;
}
@@ -2151,6 +2175,13 @@ static bool reg_is_world_roaming(struct wiphy *wiphy)
return false;
}
+static void reg_call_notifier(struct wiphy *wiphy,
+ struct regulatory_request *request)
+{
+ if (wiphy->reg_notifier)
+ wiphy->reg_notifier(wiphy, request);
+}
+
static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx,
struct reg_beacon *reg_beacon)
{
@@ -2158,6 +2189,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx,
struct ieee80211_channel *chan;
bool channel_changed = false;
struct ieee80211_channel chan_before;
+ struct regulatory_request *lr = get_last_request();
sband = wiphy->bands[reg_beacon->chan.band];
chan = &sband->channels[chan_idx];
@@ -2183,8 +2215,11 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx,
channel_changed = true;
}
- if (channel_changed)
+ if (channel_changed) {
nl80211_send_beacon_hint_event(wiphy, &chan_before, chan);
+ if (wiphy->flags & WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON)
+ reg_call_notifier(wiphy, lr);
+ }
}
/*
@@ -2327,13 +2362,6 @@ static void reg_process_ht_flags(struct wiphy *wiphy)
reg_process_ht_flags_band(wiphy, wiphy->bands[band]);
}
-static void reg_call_notifier(struct wiphy *wiphy,
- struct regulatory_request *request)
-{
- if (wiphy->reg_notifier)
- wiphy->reg_notifier(wiphy, request);
-}
-
static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
{
struct cfg80211_chan_def chandef = {};
@@ -2342,12 +2370,11 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
bool ret;
int link;
- wdev_lock(wdev);
iftype = wdev->iftype;
/* make sure the interface is active */
if (!wdev->netdev || !netif_running(wdev->netdev))
- goto wdev_inactive_unlock;
+ return true;
for (link = 0; link < ARRAY_SIZE(wdev->links); link++) {
struct ieee80211_channel *chan;
@@ -2407,8 +2434,6 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
break;
}
- wdev_unlock(wdev);
-
switch (iftype) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
@@ -2429,16 +2454,8 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
default:
break;
}
-
- wdev_lock(wdev);
}
- wdev_unlock(wdev);
-
- return true;
-
-wdev_inactive_unlock:
- wdev_unlock(wdev);
return true;
}
@@ -2461,13 +2478,13 @@ static void reg_check_chans_work(struct work_struct *work)
pr_debug("Verifying active interfaces after reg change\n");
rtnl_lock();
- list_for_each_entry(rdev, &cfg80211_rdev_list, list)
+ for_each_rdev(rdev)
reg_leave_invalid_chans(&rdev->wiphy);
rtnl_unlock();
}
-static void reg_check_channels(void)
+void reg_check_channels(void)
{
/*
* Give usermode a chance to do something nicer (move to another
@@ -2515,7 +2532,7 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator)
ASSERT_RTNL();
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ for_each_rdev(rdev) {
wiphy = &rdev->wiphy;
wiphy_update_regulatory(wiphy, initiator);
}
@@ -2577,6 +2594,9 @@ static void handle_channel_custom(struct wiphy *wiphy,
chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
}
+ if (chan->flags & IEEE80211_CHAN_PSD)
+ chan->psd = reg_rule->psd;
+
chan->max_power = chan->max_reg_power;
}
@@ -2663,6 +2683,9 @@ static void reg_set_request_processed(void)
*
* The wireless subsystem can use this function to process
* a regulatory request issued by the regulatory core.
+ *
+ * Returns: %REG_REQ_OK or %REG_REQ_IGNORE, indicating if the
+ * hint was processed or ignored
*/
static enum reg_request_treatment
reg_process_hint_core(struct regulatory_request *core_request)
@@ -2719,6 +2742,9 @@ __reg_process_hint_user(struct regulatory_request *user_request)
*
* The wireless subsystem can use this function to process
* a regulatory request initiated by userspace.
+ *
+ * Returns: %REG_REQ_OK or %REG_REQ_IGNORE, indicating if the
+ * hint was processed or ignored
*/
static enum reg_request_treatment
reg_process_hint_user(struct regulatory_request *user_request)
@@ -2774,7 +2800,7 @@ __reg_process_hint_driver(struct regulatory_request *driver_request)
* The wireless subsystem can use this function to process
* a regulatory request issued by an 802.11 driver.
*
- * Returns one of the different reg request treatment values.
+ * Returns: one of the different reg request treatment values.
*/
static enum reg_request_treatment
reg_process_hint_driver(struct wiphy *wiphy,
@@ -2878,7 +2904,7 @@ __reg_process_hint_country_ie(struct wiphy *wiphy,
* The wireless subsystem can use this function to process
* a regulatory request issued by a country Information Element.
*
- * Returns one of the different reg request treatment values.
+ * Returns: one of the different reg request treatment values.
*/
static enum reg_request_treatment
reg_process_hint_country_ie(struct wiphy *wiphy,
@@ -2991,7 +3017,7 @@ static void wiphy_all_share_dfs_chan_state(struct wiphy *wiphy)
ASSERT_RTNL();
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ for_each_rdev(rdev) {
if (wiphy == &rdev->wiphy)
continue;
wiphy_share_dfs_chan_state(wiphy, &rdev->wiphy);
@@ -3057,7 +3083,7 @@ static void notify_self_managed_wiphys(struct regulatory_request *request)
struct cfg80211_registered_device *rdev;
struct wiphy *wiphy;
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ for_each_rdev(rdev) {
wiphy = &rdev->wiphy;
if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED &&
request->initiator == NL80211_REGDOM_SET_BY_USER)
@@ -3122,7 +3148,7 @@ static void reg_process_pending_beacon_hints(void)
list_del_init(&pending_beacon->list);
/* Applies the beacon hint to current wiphys */
- list_for_each_entry(rdev, &cfg80211_rdev_list, list)
+ for_each_rdev(rdev)
wiphy_update_new_beacon(&rdev->wiphy, pending_beacon);
/* Remembers the beacon hint for new wiphys or reg changes */
@@ -3177,7 +3203,7 @@ static void reg_process_self_managed_hints(void)
ASSERT_RTNL();
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ for_each_rdev(rdev) {
wiphy_lock(&rdev->wiphy);
reg_process_self_managed_hint(&rdev->wiphy);
wiphy_unlock(&rdev->wiphy);
@@ -3517,7 +3543,7 @@ static void restore_regulatory_settings(bool reset_user, bool cached)
world_alpha2[0] = cfg80211_world_regdom->alpha2[0];
world_alpha2[1] = cfg80211_world_regdom->alpha2[1];
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ for_each_rdev(rdev) {
if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
continue;
if (rdev->wiphy.regulatory_flags & REGULATORY_CUSTOM_REG)
@@ -3574,15 +3600,15 @@ static bool is_wiphy_all_set_reg_flag(enum ieee80211_regulatory_flags flag)
struct cfg80211_registered_device *rdev;
struct wireless_dev *wdev;
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ for_each_rdev(rdev) {
+ wiphy_lock(&rdev->wiphy);
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
- wdev_lock(wdev);
if (!(wdev->wiphy->regulatory_flags & flag)) {
- wdev_unlock(wdev);
+ wiphy_unlock(&rdev->wiphy);
return false;
}
- wdev_unlock(wdev);
}
+ wiphy_unlock(&rdev->wiphy);
}
return true;
@@ -3838,7 +3864,7 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd,
{
const struct ieee80211_regdomain *regd;
const struct ieee80211_regdomain *intersected_rd = NULL;
- const struct ieee80211_regdomain *tmp;
+ const struct ieee80211_regdomain *tmp = NULL;
struct wiphy *request_wiphy;
if (is_world_regdom(rd->alpha2))
@@ -3861,10 +3887,8 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd,
if (!driver_request->intersect) {
ASSERT_RTNL();
wiphy_lock(request_wiphy);
- if (request_wiphy->regd) {
- wiphy_unlock(request_wiphy);
- return -EALREADY;
- }
+ if (request_wiphy->regd)
+ tmp = get_wiphy_regdom(request_wiphy);
regd = reg_copy_regd(rd);
if (IS_ERR(regd)) {
@@ -3873,6 +3897,7 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd,
}
rcu_assign_pointer(request_wiphy->regd, regd);
+ rcu_free_regdom(tmp);
wiphy_unlock(request_wiphy);
reset_regdomains(false, rd);
return 0;
@@ -4244,7 +4269,7 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy,
if (WARN_ON(!cfg80211_chandef_valid(chandef)))
return;
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ for_each_rdev(rdev) {
if (wiphy == &rdev->wiphy)
continue;
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index f3707f729024..a02ef5609f52 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -5,7 +5,7 @@
/*
* Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019, 2023 Intel Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -133,7 +133,7 @@ void regulatory_hint_disconnect(void);
/**
* cfg80211_get_unii - get the U-NII band for the frequency
* @freq: the frequency for which we want to get the UNII band.
-
+ *
* Get a value specifying the U-NII band frequency belongs to.
* U-NII bands are defined by the FCC in C.F.R 47 part 15.
*
@@ -156,11 +156,11 @@ bool regulatory_indoor_allowed(void);
/**
* regulatory_propagate_dfs_state - Propagate DFS channel state to other wiphys
- * @wiphy - wiphy on which radar is detected and the event will be propagated
+ * @wiphy: wiphy on which radar is detected and the event will be propagated
* to other available wiphys having the same DFS domain
- * @chandef - Channel definition of radar detected channel
- * @dfs_state - DFS channel state to be set
- * @event - Type of radar event which triggered this DFS state change
+ * @chandef: Channel definition of radar detected channel
+ * @dfs_state: DFS channel state to be set
+ * @event: Type of radar event which triggered this DFS state change
*
* This function should be called with rtnl lock held.
*/
@@ -171,8 +171,8 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy,
/**
* reg_dfs_domain_same - Checks if both wiphy have same DFS domain configured
- * @wiphy1 - wiphy it's dfs_region to be checked against that of wiphy2
- * @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1
+ * @wiphy1: wiphy it's dfs_region to be checked against that of wiphy2
+ * @wiphy2: wiphy it's dfs_region to be checked against that of wiphy1
*/
bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2);
@@ -181,6 +181,11 @@ bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2);
*/
int reg_reload_regdb(void);
+/**
+ * reg_check_channels - schedule regulatory enforcement
+ */
+void reg_check_channels(void);
+
extern const u8 shipped_regdb_certs[];
extern unsigned int shipped_regdb_certs_len;
extern const u8 extra_regdb_certs[];
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 0cf1ce7b6934..389a52c29bfc 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -20,6 +20,7 @@
#include <net/cfg80211.h>
#include <net/cfg80211-wext.h>
#include <net/iw_handler.h>
+#include <kunit/visibility.h>
#include "core.h"
#include "nl80211.h"
#include "wext-compat.h"
@@ -303,9 +304,10 @@ static size_t cfg80211_copy_elem_with_frags(const struct element *elem,
return *pos - buf;
}
-static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
- const u8 *subie, size_t subie_len,
- u8 *new_ie, size_t new_ie_len)
+VISIBLE_IF_CFG80211_KUNIT size_t
+cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
+ const u8 *subie, size_t subie_len,
+ u8 *new_ie, size_t new_ie_len)
{
const struct element *non_inherit_elem, *parent, *sub;
u8 *pos = new_ie;
@@ -413,6 +415,7 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
return pos - new_ie;
}
+EXPORT_SYMBOL_IF_CFG80211_KUNIT(cfg80211_gen_new_ie);
static bool is_bss(struct cfg80211_bss *a, const u8 *bssid,
const u8 *ssid, size_t ssid_len)
@@ -830,10 +833,47 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
list_for_each_entry(intbss, &rdev->bss_list, list) {
struct cfg80211_bss *res = &intbss->pub;
const struct cfg80211_bss_ies *ies;
+ const struct element *ssid_elem;
+ struct cfg80211_colocated_ap *entry;
+ u32 s_ssid_tmp;
+ int ret;
ies = rcu_access_pointer(res->ies);
count += cfg80211_parse_colocated_ap(ies,
&coloc_ap_list);
+
+ /* In case the scan request specified a specific BSSID
+ * and the BSS is found and operating on 6GHz band then
+ * add this AP to the collocated APs list.
+ * This is relevant for ML probe requests when the lower
+ * band APs have not been discovered.
+ */
+ if (is_broadcast_ether_addr(rdev_req->bssid) ||
+ !ether_addr_equal(rdev_req->bssid, res->bssid) ||
+ res->channel->band != NL80211_BAND_6GHZ)
+ continue;
+
+ ret = cfg80211_calc_short_ssid(ies, &ssid_elem,
+ &s_ssid_tmp);
+ if (ret)
+ continue;
+
+ entry = kzalloc(sizeof(*entry) + IEEE80211_MAX_SSID_LEN,
+ GFP_ATOMIC);
+
+ if (!entry)
+ continue;
+
+ memcpy(entry->bssid, res->bssid, ETH_ALEN);
+ entry->short_ssid = s_ssid_tmp;
+ memcpy(entry->ssid, ssid_elem->data,
+ ssid_elem->datalen);
+ entry->ssid_len = ssid_elem->datalen;
+ entry->short_ssid_valid = true;
+ entry->center_freq = res->channel->center_freq;
+
+ list_add_tail(&entry->list, &coloc_ap_list);
+ count++;
}
spin_unlock_bh(&rdev->bss_lock);
}
@@ -908,6 +948,10 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
!cfg80211_find_ssid_match(ap, request))
continue;
+ if (!is_broadcast_ether_addr(request->bssid) &&
+ !ether_addr_equal(request->bssid, ap->bssid))
+ continue;
+
if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid)
continue;
@@ -1494,12 +1538,13 @@ static bool cfg80211_bss_type_match(u16 capability,
}
/* Returned bss is reference counted and must be cleaned up appropriately. */
-struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
- struct ieee80211_channel *channel,
- const u8 *bssid,
- const u8 *ssid, size_t ssid_len,
- enum ieee80211_bss_type bss_type,
- enum ieee80211_privacy privacy)
+struct cfg80211_bss *__cfg80211_get_bss(struct wiphy *wiphy,
+ struct ieee80211_channel *channel,
+ const u8 *bssid,
+ const u8 *ssid, size_t ssid_len,
+ enum ieee80211_bss_type bss_type,
+ enum ieee80211_privacy privacy,
+ u32 use_for)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct cfg80211_internal_bss *bss, *res = NULL;
@@ -1524,6 +1569,8 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
continue;
if (!is_valid_ether_addr(bss->pub.bssid))
continue;
+ if ((bss->pub.use_for & use_for) != use_for)
+ continue;
/* Don't get expired BSS structs */
if (time_after(now, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE) &&
!atomic_read(&bss->hold))
@@ -1541,7 +1588,7 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
trace_cfg80211_return_bss(&res->pub);
return &res->pub;
}
-EXPORT_SYMBOL(cfg80211_get_bss);
+EXPORT_SYMBOL(__cfg80211_get_bss);
static void rb_insert_bss(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *bss)
@@ -1638,8 +1685,6 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev,
continue;
if (bss->pub.channel != new->pub.channel)
continue;
- if (bss->pub.scan_width != new->pub.scan_width)
- continue;
if (rcu_access_pointer(bss->pub.beacon_ies))
continue;
ies = rcu_access_pointer(bss->pub.ies);
@@ -1686,6 +1731,61 @@ static void cfg80211_update_hidden_bsses(struct cfg80211_internal_bss *known,
}
}
+static void cfg80211_check_stuck_ecsa(struct cfg80211_registered_device *rdev,
+ struct cfg80211_internal_bss *known,
+ const struct cfg80211_bss_ies *old)
+{
+ const struct ieee80211_ext_chansw_ie *ecsa;
+ const struct element *elem_new, *elem_old;
+ const struct cfg80211_bss_ies *new, *bcn;
+
+ if (known->pub.proberesp_ecsa_stuck)
+ return;
+
+ new = rcu_dereference_protected(known->pub.proberesp_ies,
+ lockdep_is_held(&rdev->bss_lock));
+ if (WARN_ON(!new))
+ return;
+
+ if (new->tsf - old->tsf < USEC_PER_SEC)
+ return;
+
+ elem_old = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ old->data, old->len);
+ if (!elem_old)
+ return;
+
+ elem_new = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ new->data, new->len);
+ if (!elem_new)
+ return;
+
+ bcn = rcu_dereference_protected(known->pub.beacon_ies,
+ lockdep_is_held(&rdev->bss_lock));
+ if (bcn &&
+ cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ bcn->data, bcn->len))
+ return;
+
+ if (elem_new->datalen != elem_old->datalen)
+ return;
+ if (elem_new->datalen < sizeof(struct ieee80211_ext_chansw_ie))
+ return;
+ if (memcmp(elem_new->data, elem_old->data, elem_new->datalen))
+ return;
+
+ ecsa = (void *)elem_new->data;
+
+ if (!ecsa->mode)
+ return;
+
+ if (ecsa->new_ch_num !=
+ ieee80211_frequency_to_channel(known->pub.channel->center_freq))
+ return;
+
+ known->pub.proberesp_ecsa_stuck = 1;
+}
+
static bool
cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *known,
@@ -1705,9 +1805,13 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
/* Override possible earlier Beacon frame IEs */
rcu_assign_pointer(known->pub.ies,
new->pub.proberesp_ies);
- if (old)
+ if (old) {
+ cfg80211_check_stuck_ecsa(rdev, known, old);
kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
- } else if (rcu_access_pointer(new->pub.beacon_ies)) {
+ }
+ }
+
+ if (rcu_access_pointer(new->pub.beacon_ies)) {
const struct cfg80211_bss_ies *old;
if (known->pub.hidden_beacon_bss &&
@@ -1761,6 +1865,8 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
ether_addr_copy(known->parent_bssid, new->parent_bssid);
known->pub.max_bssid_indicator = new->pub.max_bssid_indicator;
known->pub.bssid_index = new->pub.bssid_index;
+ known->pub.use_for &= new->pub.use_for;
+ known->pub.cannot_use_reasons = new->pub.cannot_use_reasons;
return true;
}
@@ -1772,15 +1878,15 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev,
bool signal_valid, unsigned long ts)
{
struct cfg80211_internal_bss *found = NULL;
+ struct cfg80211_bss_ies *ies;
if (WARN_ON(!tmp->pub.channel))
- return NULL;
+ goto free_ies;
tmp->ts = ts;
- if (WARN_ON(!rcu_access_pointer(tmp->pub.ies))) {
- return NULL;
- }
+ if (WARN_ON(!rcu_access_pointer(tmp->pub.ies)))
+ goto free_ies;
found = rb_find_bss(rdev, tmp, BSS_CMP_REGULAR);
@@ -1790,7 +1896,6 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev,
} else {
struct cfg80211_internal_bss *new;
struct cfg80211_internal_bss *hidden;
- struct cfg80211_bss_ies *ies;
/*
* create a copy -- the "res" variable that is passed in
@@ -1799,15 +1904,8 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev,
*/
new = kzalloc(sizeof(*new) + rdev->wiphy.bss_priv_size,
GFP_ATOMIC);
- if (!new) {
- ies = (void *)rcu_dereference(tmp->pub.beacon_ies);
- if (ies)
- kfree_rcu(ies, rcu_head);
- ies = (void *)rcu_dereference(tmp->pub.proberesp_ies);
- if (ies)
- kfree_rcu(ies, rcu_head);
- return NULL;
- }
+ if (!new)
+ goto free_ies;
memcpy(new, tmp, sizeof(*new));
new->refcount = 1;
INIT_LIST_HEAD(&new->hidden_list);
@@ -1825,8 +1923,12 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev,
list_add(&new->hidden_list,
&hidden->hidden_list);
hidden->refcount++;
+
+ ies = (void *)rcu_access_pointer(new->pub.beacon_ies);
rcu_assign_pointer(new->pub.beacon_ies,
hidden->pub.beacon_ies);
+ if (ies)
+ kfree_rcu(ies, rcu_head);
}
} else {
/*
@@ -1863,6 +1965,16 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev,
bss_ref_get(rdev, found);
return found;
+
+free_ies:
+ ies = (void *)rcu_dereference(tmp->pub.beacon_ies);
+ if (ies)
+ kfree_rcu(ies, rcu_head);
+ ies = (void *)rcu_dereference(tmp->pub.proberesp_ies);
+ if (ies)
+ kfree_rcu(ies, rcu_head);
+
+ return NULL;
}
struct cfg80211_internal_bss *
@@ -1936,8 +2048,7 @@ EXPORT_SYMBOL(cfg80211_get_ies_channel_number);
*/
static struct ieee80211_channel *
cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
- struct ieee80211_channel *channel,
- enum nl80211_bss_scan_width scan_width)
+ struct ieee80211_channel *channel)
{
u32 freq;
int channel_number;
@@ -1977,16 +2088,6 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
return channel;
}
- if (scan_width == NL80211_BSS_CHAN_WIDTH_10 ||
- scan_width == NL80211_BSS_CHAN_WIDTH_5) {
- /*
- * Ignore channel number in 5 and 10 MHz channels where there
- * may not be an n:1 or 1:n mapping between frequencies and
- * channel numbers.
- */
- return channel;
- }
-
/*
* Use the channel determined through the payload channel number
* instead of the RX channel reported by the driver.
@@ -2016,6 +2117,9 @@ struct cfg80211_inform_single_bss_data {
struct cfg80211_bss *source_bss;
u8 max_bssid_indicator;
u8 bssid_index;
+
+ u8 use_for;
+ u64 cannot_use_reasons;
};
/* Returned bss is reference counted and must be cleaned up appropriately. */
@@ -2046,14 +2150,12 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
channel = data->channel;
if (!channel)
channel = cfg80211_get_bss_channel(wiphy, data->ie, data->ielen,
- drv_data->chan,
- drv_data->scan_width);
+ drv_data->chan);
if (!channel)
return NULL;
memcpy(tmp.pub.bssid, data->bssid, ETH_ALEN);
tmp.pub.channel = channel;
- tmp.pub.scan_width = drv_data->scan_width;
if (data->bss_source != BSS_SOURCE_STA_PROFILE)
tmp.pub.signal = drv_data->signal;
else
@@ -2063,6 +2165,8 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
tmp.ts_boottime = drv_data->boottime_ns;
tmp.parent_tsf = drv_data->parent_tsf;
ether_addr_copy(tmp.parent_bssid, drv_data->parent_bssid);
+ tmp.pub.use_for = data->use_for;
+ tmp.pub.cannot_use_reasons = data->cannot_use_reasons;
if (data->bss_source != BSS_SOURCE_DIRECT) {
tmp.pub.transmitted_bss = data->source_bss;
@@ -2121,7 +2225,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
if (!res)
goto drop;
- rdev_inform_bss(rdev, &res->pub, ies, data->drv_data);
+ rdev_inform_bss(rdev, &res->pub, ies, drv_data->drv_data);
if (data->bss_source == BSS_SOURCE_MBSSID) {
/* this is a nontransmitting bss, we need to add it to
@@ -2233,6 +2337,8 @@ cfg80211_parse_mbssid_data(struct wiphy *wiphy,
.beacon_interval = tx_data->beacon_interval,
.source_bss = source_bss,
.bss_source = BSS_SOURCE_MBSSID,
+ .use_for = tx_data->use_for,
+ .cannot_use_reasons = tx_data->cannot_use_reasons,
};
const u8 *mbssid_index_ie;
const struct element *elem, *sub;
@@ -2354,8 +2460,8 @@ ssize_t cfg80211_defragment_element(const struct element *elem, const u8 *ies,
/* elem might be invalid after the memmove */
next = (void *)(elem->data + elem->datalen);
-
elem_datalen = elem->datalen;
+
if (elem->id == WLAN_EID_EXTENSION) {
copied = elem->datalen - 1;
if (copied > data_len)
@@ -2376,7 +2482,7 @@ ssize_t cfg80211_defragment_element(const struct element *elem, const u8 *ies,
for (elem = next;
elem->data < ies + ieslen &&
- elem->data + elem->datalen < ies + ieslen;
+ elem->data + elem->datalen <= ies + ieslen;
elem = next) {
/* elem might be invalid after the memmove */
next = (void *)(elem->data + elem->datalen);
@@ -2495,7 +2601,7 @@ error:
return NULL;
}
-static bool
+static u8
cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
const struct ieee80211_neighbor_ap_info **ap_info,
const u8 **tbtt_info)
@@ -2514,6 +2620,7 @@ cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
u16 params;
u8 length, i, count, mld_params_offset;
u8 type, lid;
+ u32 use_for;
info = (void *)pos;
count = u8_get_bits(info->tbtt_info_hdr,
@@ -2523,20 +2630,22 @@ cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
pos += sizeof(*info);
if (count * length > end - pos)
- return false;
+ return 0;
type = u8_get_bits(info->tbtt_info_hdr,
IEEE80211_AP_INFO_TBTT_HDR_TYPE);
- /* Only accept full TBTT information. NSTR mobile APs
- * use the shortened version, but we ignore them here.
- */
if (type == IEEE80211_TBTT_INFO_TYPE_TBTT &&
length >=
offsetofend(struct ieee80211_tbtt_info_ge_11,
mld_params)) {
mld_params_offset =
offsetof(struct ieee80211_tbtt_info_ge_11, mld_params);
+ use_for = NL80211_BSS_USE_FOR_ALL;
+ } else if (type == IEEE80211_TBTT_INFO_TYPE_MLD &&
+ length >= sizeof(struct ieee80211_rnr_mld_params)) {
+ mld_params_offset = 0;
+ use_for = NL80211_BSS_USE_FOR_MLD_LINK;
} else {
pos += count * length;
continue;
@@ -2554,7 +2663,7 @@ cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
*ap_info = info;
*tbtt_info = pos;
- return true;
+ return use_for;
}
pos += length;
@@ -2562,13 +2671,15 @@ cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
}
}
- return false;
+ return 0;
}
-static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
- struct cfg80211_inform_single_bss_data *tx_data,
- struct cfg80211_bss *source_bss,
- gfp_t gfp)
+static void
+cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
+ struct cfg80211_inform_single_bss_data *tx_data,
+ struct cfg80211_bss *source_bss,
+ const struct element *elem,
+ gfp_t gfp)
{
struct cfg80211_inform_single_bss_data data = {
.drv_data = tx_data->drv_data,
@@ -2577,9 +2688,9 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
.bss_source = BSS_SOURCE_STA_PROFILE,
};
struct ieee80211_multi_link_elem *ml_elem;
- const struct element *elem;
struct cfg80211_mle *mle;
u16 control;
+ u8 ml_common_len;
u8 *new_ie;
struct cfg80211_bss *bss;
int mld_id;
@@ -2587,15 +2698,7 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
const u8 *pos;
u8 i;
- if (!source_bss)
- return;
-
- if (tx_data->ftype != CFG80211_BSS_FTYPE_PRESP)
- return;
-
- elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_MULTI_LINK,
- tx_data->ie, tx_data->ielen);
- if (!elem || !ieee80211_mle_size_ok(elem->data + 1, elem->datalen - 1))
+ if (!ieee80211_mle_size_ok(elem->data + 1, elem->datalen - 1))
return;
ml_elem = (void *)elem->data + 1;
@@ -2610,6 +2713,8 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
!(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP))
return;
+ ml_common_len = ml_elem->variable[0];
+
/* length + MLD MAC address + link ID info + BSS Params Change Count */
pos = ml_elem->variable + 1 + 6 + 1 + 1;
@@ -2621,8 +2726,11 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
/* MLD capabilities and operations */
pos += 2;
- /* Not included when the (nontransmitted) AP is responding itself,
- * but defined to zero then (Draft P802.11be_D3.0, 9.4.2.170.2)
+ /*
+ * The MLD ID of the reporting AP is always zero. It is set if the AP
+ * is part of an MBSSID set and will be non-zero for ML Elements
+ * relating to a nontransmitted BSS (matching the Multi-BSSID Index,
+ * Draft P802.11be_D3.2, 35.3.4.2)
*/
if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_MLD_ID)) {
mld_id = *pos;
@@ -2650,7 +2758,7 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
const u8 *profile;
const u8 *tbtt_info;
ssize_t profile_len;
- u8 link_id;
+ u8 link_id, use_for;
if (!ieee80211_mle_basic_sta_prof_size_ok((u8 *)mle->sta_prof[i],
mle->sta_prof_len[i]))
@@ -2692,9 +2800,11 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
profile_len -= 2;
/* Find in RNR to look up channel information */
- if (!cfg80211_tbtt_info_for_mld_ap(tx_data->ie, tx_data->ielen,
- mld_id, link_id,
- &ap_info, &tbtt_info))
+ use_for = cfg80211_tbtt_info_for_mld_ap(tx_data->ie,
+ tx_data->ielen,
+ mld_id, link_id,
+ &ap_info, &tbtt_info);
+ if (!use_for)
continue;
/* We could sanity check the BSSID is included */
@@ -2706,6 +2816,14 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
freq = ieee80211_channel_to_freq_khz(ap_info->channel, band);
data.channel = ieee80211_get_channel_khz(wiphy, freq);
+ if (use_for == NL80211_BSS_USE_FOR_MLD_LINK &&
+ !(wiphy->flags & WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY)) {
+ use_for = 0;
+ data.cannot_use_reasons =
+ NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY;
+ }
+ data.use_for = use_for;
+
/* Generate new elements */
memset(new_ie, 0, IEEE80211_MAX_DATA_LEN);
data.ie = new_ie;
@@ -2716,6 +2834,34 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
if (!data.ielen)
continue;
+ /* The generated elements do not contain:
+ * - Basic ML element
+ * - A TBTT entry in the RNR for the transmitting AP
+ *
+ * This information is needed both internally and in userspace
+ * as such, we should append it here.
+ */
+ if (data.ielen + 3 + sizeof(*ml_elem) + ml_common_len >
+ IEEE80211_MAX_DATA_LEN)
+ continue;
+
+ /* Copy the Basic Multi-Link element including the common
+ * information, and then fix up the link ID.
+ * Note that the ML element length has been verified and we
+ * also checked that it contains the link ID.
+ */
+ new_ie[data.ielen++] = WLAN_EID_EXTENSION;
+ new_ie[data.ielen++] = 1 + sizeof(*ml_elem) + ml_common_len;
+ new_ie[data.ielen++] = WLAN_EID_EXT_EHT_MULTI_LINK;
+ memcpy(new_ie + data.ielen, ml_elem,
+ sizeof(*ml_elem) + ml_common_len);
+
+ new_ie[data.ielen + sizeof(*ml_elem) + 1 + ETH_ALEN] = link_id;
+
+ data.ielen += sizeof(*ml_elem) + ml_common_len;
+
+ /* TODO: Add an RNR containing only the reporting AP */
+
bss = cfg80211_inform_single_bss_data(wiphy, &data, gfp);
if (!bss)
break;
@@ -2727,6 +2873,25 @@ out:
kfree(mle);
}
+static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
+ struct cfg80211_inform_single_bss_data *tx_data,
+ struct cfg80211_bss *source_bss,
+ gfp_t gfp)
+{
+ const struct element *elem;
+
+ if (!source_bss)
+ return;
+
+ if (tx_data->ftype != CFG80211_BSS_FTYPE_PRESP)
+ return;
+
+ for_each_element_extid(elem, WLAN_EID_EXT_EHT_MULTI_LINK,
+ tx_data->ie, tx_data->ielen)
+ cfg80211_parse_ml_elem_sta_data(wiphy, tx_data, source_bss,
+ elem, gfp);
+}
+
struct cfg80211_bss *
cfg80211_inform_bss_data(struct wiphy *wiphy,
struct cfg80211_inform_bss *data,
@@ -2743,6 +2908,10 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
.beacon_interval = beacon_interval,
.ie = ie,
.ielen = ielen,
+ .use_for = data->restrict_use ?
+ data->use_for :
+ NL80211_BSS_USE_FOR_ALL,
+ .cannot_use_reasons = data->cannot_use_reasons,
};
struct cfg80211_bss *res;
@@ -2760,6 +2929,36 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
}
EXPORT_SYMBOL(cfg80211_inform_bss_data);
+static bool cfg80211_uhb_power_type_valid(const u8 *ie,
+ size_t ielen,
+ const u32 flags)
+{
+ const struct element *tmp;
+ struct ieee80211_he_operation *he_oper;
+
+ tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ielen);
+ if (tmp && tmp->datalen >= sizeof(*he_oper) + 1) {
+ const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
+
+ he_oper = (void *)&tmp->data[1];
+ he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper);
+
+ if (!he_6ghz_oper)
+ return false;
+
+ switch (u8_get_bits(he_6ghz_oper->control,
+ IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
+ case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
+ return true;
+ case IEEE80211_6GHZ_CTRL_REG_SP_AP:
+ return !(flags & IEEE80211_CHAN_NO_UHB_AFC_CLIENT);
+ case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
+ return !(flags & IEEE80211_CHAN_NO_UHB_VLP_CLIENT);
+ }
+ }
+ return false;
+}
+
/* cfg80211_inform_bss_width_frame helper */
static struct cfg80211_bss *
cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
@@ -2814,11 +3013,18 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
variable = ext->u.s1g_beacon.variable;
}
- channel = cfg80211_get_bss_channel(wiphy, variable,
- ielen, data->chan, data->scan_width);
+ channel = cfg80211_get_bss_channel(wiphy, variable, ielen, data->chan);
if (!channel)
return NULL;
+ if (channel->band == NL80211_BAND_6GHZ &&
+ !cfg80211_uhb_power_type_valid(variable, ielen, channel->flags)) {
+ data->restrict_use = 1;
+ data->use_for = 0;
+ data->cannot_use_reasons =
+ NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH;
+ }
+
if (ext) {
const struct ieee80211_s1g_bcn_compat_ie *compat;
const struct element *elem;
@@ -2868,13 +3074,16 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
tmp.pub.beacon_interval = beacon_int;
tmp.pub.capability = capability;
tmp.pub.channel = channel;
- tmp.pub.scan_width = data->scan_width;
tmp.pub.signal = data->signal;
tmp.ts_boottime = data->boottime_ns;
tmp.parent_tsf = data->parent_tsf;
tmp.pub.chains = data->chains;
memcpy(tmp.pub.chain_signal, data->chain_signal, IEEE80211_MAX_CHAINS);
ether_addr_copy(tmp.parent_bssid, data->parent_bssid);
+ tmp.pub.use_for = data->restrict_use ?
+ data->use_for :
+ NL80211_BSS_USE_FOR_ALL;
+ tmp.pub.cannot_use_reasons = data->cannot_use_reasons;
signal_valid = data->chan == channel;
spin_lock_bh(&rdev->bss_lock);
@@ -2906,6 +3115,10 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
.ie = mgmt->u.probe_resp.variable,
.ielen = len - offsetof(struct ieee80211_mgmt,
u.probe_resp.variable),
+ .use_for = data->restrict_use ?
+ data->use_for :
+ NL80211_BSS_USE_FOR_ALL,
+ .cannot_use_reasons = data->cannot_use_reasons,
};
struct cfg80211_bss *res;
@@ -3056,10 +3269,9 @@ void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
if (new) {
/* to save time, update IEs for transmitting bss only */
- if (cfg80211_update_known_bss(rdev, cbss, new, false)) {
- new->pub.proberesp_ies = NULL;
- new->pub.beacon_ies = NULL;
- }
+ cfg80211_update_known_bss(rdev, cbss, new, false);
+ new->pub.proberesp_ies = NULL;
+ new->pub.beacon_ies = NULL;
list_for_each_entry_safe(nontrans_bss, tmp,
&new->pub.nontrans_list,
@@ -3422,59 +3634,63 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info,
cfg = (u8 *)ie + 2;
memset(&iwe, 0, sizeof(iwe));
iwe.cmd = IWEVCUSTOM;
- sprintf(buf, "Mesh Network Path Selection Protocol ID: "
- "0x%02X", cfg[0]);
- iwe.u.data.length = strlen(buf);
+ iwe.u.data.length = sprintf(buf,
+ "Mesh Network Path Selection Protocol ID: 0x%02X",
+ cfg[0]);
current_ev = iwe_stream_add_point_check(info,
current_ev,
end_buf,
&iwe, buf);
if (IS_ERR(current_ev))
goto unlock;
- sprintf(buf, "Path Selection Metric ID: 0x%02X",
- cfg[1]);
- iwe.u.data.length = strlen(buf);
+ iwe.u.data.length = sprintf(buf,
+ "Path Selection Metric ID: 0x%02X",
+ cfg[1]);
current_ev = iwe_stream_add_point_check(info,
current_ev,
end_buf,
&iwe, buf);
if (IS_ERR(current_ev))
goto unlock;
- sprintf(buf, "Congestion Control Mode ID: 0x%02X",
- cfg[2]);
- iwe.u.data.length = strlen(buf);
+ iwe.u.data.length = sprintf(buf,
+ "Congestion Control Mode ID: 0x%02X",
+ cfg[2]);
current_ev = iwe_stream_add_point_check(info,
current_ev,
end_buf,
&iwe, buf);
if (IS_ERR(current_ev))
goto unlock;
- sprintf(buf, "Synchronization ID: 0x%02X", cfg[3]);
- iwe.u.data.length = strlen(buf);
+ iwe.u.data.length = sprintf(buf,
+ "Synchronization ID: 0x%02X",
+ cfg[3]);
current_ev = iwe_stream_add_point_check(info,
current_ev,
end_buf,
&iwe, buf);
if (IS_ERR(current_ev))
goto unlock;
- sprintf(buf, "Authentication ID: 0x%02X", cfg[4]);
- iwe.u.data.length = strlen(buf);
+ iwe.u.data.length = sprintf(buf,
+ "Authentication ID: 0x%02X",
+ cfg[4]);
current_ev = iwe_stream_add_point_check(info,
current_ev,
end_buf,
&iwe, buf);
if (IS_ERR(current_ev))
goto unlock;
- sprintf(buf, "Formation Info: 0x%02X", cfg[5]);
- iwe.u.data.length = strlen(buf);
+ iwe.u.data.length = sprintf(buf,
+ "Formation Info: 0x%02X",
+ cfg[5]);
current_ev = iwe_stream_add_point_check(info,
current_ev,
end_buf,
&iwe, buf);
if (IS_ERR(current_ev))
goto unlock;
- sprintf(buf, "Capabilities: 0x%02X", cfg[6]);
- iwe.u.data.length = strlen(buf);
+ iwe.u.data.length = sprintf(buf,
+ "Capabilities: 0x%02X",
+ cfg[6]);
current_ev = iwe_stream_add_point_check(info,
current_ev,
end_buf,
@@ -3530,17 +3746,16 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info,
memset(&iwe, 0, sizeof(iwe));
iwe.cmd = IWEVCUSTOM;
- sprintf(buf, "tsf=%016llx", (unsigned long long)(ies->tsf));
- iwe.u.data.length = strlen(buf);
+ iwe.u.data.length = sprintf(buf, "tsf=%016llx",
+ (unsigned long long)(ies->tsf));
current_ev = iwe_stream_add_point_check(info, current_ev, end_buf,
&iwe, buf);
if (IS_ERR(current_ev))
goto unlock;
memset(&iwe, 0, sizeof(iwe));
iwe.cmd = IWEVCUSTOM;
- sprintf(buf, " Last beacon: %ums ago",
- elapsed_jiffies_msecs(bss->ts));
- iwe.u.data.length = strlen(buf);
+ iwe.u.data.length = sprintf(buf, " Last beacon: %ums ago",
+ elapsed_jiffies_msecs(bss->ts));
current_ev = iwe_stream_add_point_check(info, current_ev,
end_buf, &iwe, buf);
if (IS_ERR(current_ev))
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 9bba233b5a6e..195c8532734b 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -67,7 +67,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
struct cfg80211_scan_request *request;
int n_channels, err;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (rdev->scan_req || rdev->scan_msg)
return -EBUSY;
@@ -151,7 +151,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev,
struct cfg80211_assoc_request req = {};
int err;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (!wdev->conn)
return 0;
@@ -255,16 +255,13 @@ void cfg80211_conn_work(struct work_struct *work)
if (!wdev->netdev)
continue;
- wdev_lock(wdev);
- if (!netif_running(wdev->netdev)) {
- wdev_unlock(wdev);
+ if (!netif_running(wdev->netdev))
continue;
- }
+
if (!wdev->conn ||
- wdev->conn->state == CFG80211_CONN_CONNECTED) {
- wdev_unlock(wdev);
+ wdev->conn->state == CFG80211_CONN_CONNECTED)
continue;
- }
+
if (wdev->conn->params.bssid) {
memcpy(bssid_buf, wdev->conn->params.bssid, ETH_ALEN);
bssid = bssid_buf;
@@ -279,7 +276,6 @@ void cfg80211_conn_work(struct work_struct *work)
cr.timeout_reason = treason;
__cfg80211_connect_result(wdev->netdev, &cr, false);
}
- wdev_unlock(wdev);
}
wiphy_unlock(&rdev->wiphy);
@@ -300,7 +296,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_bss *bss;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
bss = cfg80211_get_bss(wdev->wiphy, wdev->conn->params.channel,
wdev->conn->params.bssid,
@@ -317,13 +313,13 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
return bss;
}
-static void __cfg80211_sme_scan_done(struct net_device *dev)
+void cfg80211_sme_scan_done(struct net_device *dev)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_bss *bss;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (!wdev->conn)
return;
@@ -339,15 +335,6 @@ static void __cfg80211_sme_scan_done(struct net_device *dev)
schedule_work(&rdev->conn_work);
}
-void cfg80211_sme_scan_done(struct net_device *dev)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
-
- wdev_lock(wdev);
- __cfg80211_sme_scan_done(dev);
- wdev_unlock(wdev);
-}
-
void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len)
{
struct wiphy *wiphy = wdev->wiphy;
@@ -355,7 +342,7 @@ void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len)
struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
u16 status_code = le16_to_cpu(mgmt->u.auth.status_code);
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (!wdev->conn || wdev->conn->state == CFG80211_CONN_CONNECTED)
return;
@@ -702,14 +689,14 @@ static bool cfg80211_is_all_idle(void)
* need not issue a disconnect hint and reset any info such
* as chan dfs state, etc.
*/
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ for_each_rdev(rdev) {
+ wiphy_lock(&rdev->wiphy);
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
- wdev_lock(wdev);
if (wdev->conn || wdev->connected ||
cfg80211_beaconing_iface_active(wdev))
is_all_idle = false;
- wdev_unlock(wdev);
}
+ wiphy_unlock(&rdev->wiphy);
}
return is_all_idle;
@@ -761,7 +748,7 @@ void __cfg80211_connect_result(struct net_device *dev,
const u8 *connected_addr;
bool bss_not_found = false;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
@@ -1093,7 +1080,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev,
unsigned int link;
const u8 *connected_addr;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
@@ -1294,24 +1281,29 @@ out:
}
EXPORT_SYMBOL(cfg80211_roamed);
-void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid,
+void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *peer_addr,
const u8 *td_bitmap, u8 td_bitmap_len)
{
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
- wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
+ wdev->iftype != NL80211_IFTYPE_P2P_CLIENT &&
+ wdev->iftype != NL80211_IFTYPE_AP &&
+ wdev->iftype != NL80211_IFTYPE_P2P_GO))
return;
- if (WARN_ON(!wdev->connected) ||
- WARN_ON(!ether_addr_equal(wdev->u.client.connected_addr, bssid)))
- return;
+ if (wdev->iftype == NL80211_IFTYPE_STATION ||
+ wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) {
+ if (WARN_ON(!wdev->connected) ||
+ WARN_ON(!ether_addr_equal(wdev->u.client.connected_addr, peer_addr)))
+ return;
+ }
nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev,
- bssid, td_bitmap, td_bitmap_len);
+ peer_addr, td_bitmap, td_bitmap_len);
}
-void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid,
+void cfg80211_port_authorized(struct net_device *dev, const u8 *peer_addr,
const u8 *td_bitmap, u8 td_bitmap_len, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -1319,7 +1311,7 @@ void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid,
struct cfg80211_event *ev;
unsigned long flags;
- if (WARN_ON(!bssid))
+ if (WARN_ON(!peer_addr))
return;
ev = kzalloc(sizeof(*ev) + td_bitmap_len, gfp);
@@ -1327,7 +1319,7 @@ void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid,
return;
ev->type = EVENT_PORT_AUTHORIZED;
- memcpy(ev->pa.bssid, bssid, ETH_ALEN);
+ memcpy(ev->pa.peer_addr, peer_addr, ETH_ALEN);
ev->pa.td_bitmap = ((u8 *)ev) + sizeof(*ev);
ev->pa.td_bitmap_len = td_bitmap_len;
memcpy((void *)ev->pa.td_bitmap, td_bitmap, td_bitmap_len);
@@ -1353,7 +1345,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
union iwreq_data wrqu;
#endif
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
@@ -1402,6 +1394,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
#endif
schedule_work(&cfg80211_disconnect_work);
+
+ cfg80211_schedule_channels_check(wdev);
}
void cfg80211_disconnected(struct net_device *dev, u16 reason,
@@ -1443,7 +1437,7 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
/*
* If we have an ssid_len, we're trying to connect or are
@@ -1549,7 +1543,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err = 0;
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
kfree_sensitive(wdev->connect_keys);
wdev->connect_keys = NULL;
@@ -1585,19 +1579,18 @@ void cfg80211_autodisconnect_wk(struct work_struct *work)
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
wiphy_lock(wdev->wiphy);
- wdev_lock(wdev);
if (wdev->conn_owner_nlportid) {
switch (wdev->iftype) {
case NL80211_IFTYPE_ADHOC:
- __cfg80211_leave_ibss(rdev, wdev->netdev, false);
+ cfg80211_leave_ibss(rdev, wdev->netdev, false);
break;
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
- __cfg80211_stop_ap(rdev, wdev->netdev, -1, false);
+ cfg80211_stop_ap(rdev, wdev->netdev, -1, false);
break;
case NL80211_IFTYPE_MESH_POINT:
- __cfg80211_leave_mesh(rdev, wdev->netdev);
+ cfg80211_leave_mesh(rdev, wdev->netdev);
break;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
@@ -1622,6 +1615,5 @@ void cfg80211_autodisconnect_wk(struct work_struct *work)
}
}
- wdev_unlock(wdev);
wiphy_unlock(wdev->wiphy);
}
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index c629bac3f298..565511a3f461 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -105,14 +105,14 @@ static int wiphy_suspend(struct device *dev)
cfg80211_leave_all(rdev);
cfg80211_process_rdev_events(rdev);
}
- cfg80211_process_wiphy_works(rdev);
+ cfg80211_process_wiphy_works(rdev, NULL);
if (rdev->ops->suspend)
ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config);
if (ret == 1) {
/* Driver refuse to configure wowlan */
cfg80211_leave_all(rdev);
cfg80211_process_rdev_events(rdev);
- cfg80211_process_wiphy_works(rdev);
+ cfg80211_process_wiphy_works(rdev, NULL);
ret = rdev_suspend(rdev, NULL);
}
if (ret == 0)
diff --git a/net/wireless/tests/Makefile b/net/wireless/tests/Makefile
new file mode 100644
index 000000000000..1f6622fcb758
--- /dev/null
+++ b/net/wireless/tests/Makefile
@@ -0,0 +1,3 @@
+cfg80211-tests-y += module.o fragmentation.o scan.o util.o
+
+obj-$(CONFIG_CFG80211_KUNIT_TEST) += cfg80211-tests.o
diff --git a/net/wireless/tests/fragmentation.c b/net/wireless/tests/fragmentation.c
new file mode 100644
index 000000000000..49a339ca8880
--- /dev/null
+++ b/net/wireless/tests/fragmentation.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KUnit tests for element fragmentation
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+#include <linux/ieee80211.h>
+#include <net/cfg80211.h>
+#include <kunit/test.h>
+
+static void defragment_0(struct kunit *test)
+{
+ ssize_t ret;
+ static const u8 input[] = {
+ [0] = WLAN_EID_EXTENSION,
+ [1] = 254,
+ [2] = WLAN_EID_EXT_EHT_MULTI_LINK,
+ [27] = 27,
+ [123] = 123,
+ [254 + 2] = WLAN_EID_FRAGMENT,
+ [254 + 3] = 7,
+ [254 + 3 + 7] = 0, /* for size */
+ };
+ u8 *data = kunit_kzalloc(test, sizeof(input), GFP_KERNEL);
+
+ KUNIT_ASSERT_NOT_NULL(test, data);
+
+ ret = cfg80211_defragment_element((void *)input,
+ input, sizeof(input),
+ data, sizeof(input),
+ WLAN_EID_FRAGMENT);
+ KUNIT_EXPECT_EQ(test, ret, 253);
+ KUNIT_EXPECT_MEMEQ(test, data, input + 3, 253);
+}
+
+static void defragment_1(struct kunit *test)
+{
+ ssize_t ret;
+ static const u8 input[] = {
+ [0] = WLAN_EID_EXTENSION,
+ [1] = 255,
+ [2] = WLAN_EID_EXT_EHT_MULTI_LINK,
+ [27] = 27,
+ [123] = 123,
+ [255 + 2] = WLAN_EID_FRAGMENT,
+ [255 + 3] = 7,
+ [255 + 3 + 1] = 0xaa,
+ [255 + 3 + 8] = WLAN_EID_FRAGMENT, /* not used */
+ [255 + 3 + 9] = 1,
+ [255 + 3 + 10] = 0, /* for size */
+ };
+ u8 *data = kunit_kzalloc(test, sizeof(input), GFP_KERNEL);
+ const struct element *elem;
+ int count = 0;
+
+ KUNIT_ASSERT_NOT_NULL(test, data);
+
+ for_each_element(elem, input, sizeof(input))
+ count++;
+
+ /* check the elements are right */
+ KUNIT_ASSERT_EQ(test, count, 3);
+
+ ret = cfg80211_defragment_element((void *)input,
+ input, sizeof(input),
+ data, sizeof(input),
+ WLAN_EID_FRAGMENT);
+ /* this means the last fragment was not used */
+ KUNIT_EXPECT_EQ(test, ret, 254 + 7);
+ KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254);
+ KUNIT_EXPECT_MEMEQ(test, data + 254, input + 255 + 4, 7);
+}
+
+static void defragment_2(struct kunit *test)
+{
+ ssize_t ret;
+ static const u8 input[] = {
+ [0] = WLAN_EID_EXTENSION,
+ [1] = 255,
+ [2] = WLAN_EID_EXT_EHT_MULTI_LINK,
+ [27] = 27,
+ [123] = 123,
+
+ [257 + 0] = WLAN_EID_FRAGMENT,
+ [257 + 1] = 255,
+ [257 + 20] = 0xaa,
+
+ [2 * 257 + 0] = WLAN_EID_FRAGMENT,
+ [2 * 257 + 1] = 1,
+ [2 * 257 + 2] = 0xcc,
+ [2 * 257 + 3] = WLAN_EID_FRAGMENT, /* not used */
+ [2 * 257 + 4] = 1,
+ [2 * 257 + 5] = 0, /* for size */
+ };
+ u8 *data = kunit_kzalloc(test, sizeof(input), GFP_KERNEL);
+ const struct element *elem;
+ int count = 0;
+
+ KUNIT_ASSERT_NOT_NULL(test, data);
+
+ for_each_element(elem, input, sizeof(input))
+ count++;
+
+ /* check the elements are right */
+ KUNIT_ASSERT_EQ(test, count, 4);
+
+ ret = cfg80211_defragment_element((void *)input,
+ input, sizeof(input),
+ data, sizeof(input),
+ WLAN_EID_FRAGMENT);
+ /* this means the last fragment was not used */
+ KUNIT_EXPECT_EQ(test, ret, 254 + 255 + 1);
+ KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254);
+ KUNIT_EXPECT_MEMEQ(test, data + 254, input + 257 + 2, 255);
+ KUNIT_EXPECT_MEMEQ(test, data + 254 + 255, input + 2 * 257 + 2, 1);
+}
+
+static void defragment_at_end(struct kunit *test)
+{
+ ssize_t ret;
+ static const u8 input[] = {
+ [0] = WLAN_EID_EXTENSION,
+ [1] = 255,
+ [2] = WLAN_EID_EXT_EHT_MULTI_LINK,
+ [27] = 27,
+ [123] = 123,
+ [255 + 2] = WLAN_EID_FRAGMENT,
+ [255 + 3] = 7,
+ [255 + 3 + 7] = 0, /* for size */
+ };
+ u8 *data = kunit_kzalloc(test, sizeof(input), GFP_KERNEL);
+
+ KUNIT_ASSERT_NOT_NULL(test, data);
+
+ ret = cfg80211_defragment_element((void *)input,
+ input, sizeof(input),
+ data, sizeof(input),
+ WLAN_EID_FRAGMENT);
+ KUNIT_EXPECT_EQ(test, ret, 254 + 7);
+ KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254);
+ KUNIT_EXPECT_MEMEQ(test, data + 254, input + 255 + 4, 7);
+}
+
+static struct kunit_case element_fragmentation_test_cases[] = {
+ KUNIT_CASE(defragment_0),
+ KUNIT_CASE(defragment_1),
+ KUNIT_CASE(defragment_2),
+ KUNIT_CASE(defragment_at_end),
+ {}
+};
+
+static struct kunit_suite element_fragmentation = {
+ .name = "cfg80211-element-defragmentation",
+ .test_cases = element_fragmentation_test_cases,
+};
+
+kunit_test_suite(element_fragmentation);
diff --git a/net/wireless/tests/module.c b/net/wireless/tests/module.c
new file mode 100644
index 000000000000..9ff7b2c12312
--- /dev/null
+++ b/net/wireless/tests/module.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This is just module boilerplate for the cfg80211 kunit module.
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+#include <linux/module.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("tests for cfg80211");
diff --git a/net/wireless/tests/scan.c b/net/wireless/tests/scan.c
new file mode 100644
index 000000000000..77854161cd22
--- /dev/null
+++ b/net/wireless/tests/scan.c
@@ -0,0 +1,625 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KUnit tests for inform_bss functions
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+#include <linux/ieee80211.h>
+#include <net/cfg80211.h>
+#include <kunit/test.h>
+#include <kunit/skbuff.h>
+#include "../core.h"
+#include "util.h"
+
+/* mac80211 helpers for element building */
+#include "../../mac80211/ieee80211_i.h"
+
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+
+struct test_elem {
+ u8 id;
+ u8 len;
+ union {
+ u8 data[255];
+ struct {
+ u8 eid;
+ u8 edata[254];
+ };
+ };
+};
+
+static struct gen_new_ie_case {
+ const char *desc;
+ struct test_elem parent_ies[16];
+ struct test_elem child_ies[16];
+ struct test_elem result_ies[16];
+} gen_new_ie_cases[] = {
+ {
+ .desc = "ML not inherited",
+ .parent_ies = {
+ { .id = WLAN_EID_EXTENSION, .len = 255,
+ .eid = WLAN_EID_EXT_EHT_MULTI_LINK },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ },
+ {
+ .desc = "fragments are ignored if previous len not 255",
+ .parent_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 254, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_SSID, .len = 2 },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 254, },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ },
+ {
+ .desc = "fragments inherited",
+ .parent_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 255, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 255, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ },
+ {
+ .desc = "fragments copied",
+ .parent_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 255, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 255, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ },
+ {
+ .desc = "multiple elements inherit",
+ .parent_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 255, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 123, },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 255, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 123, },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ },
+ {
+ .desc = "one child element overrides",
+ .parent_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 255, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 123, },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 127, },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 127, },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ },
+ {
+ .desc = "empty elements from parent",
+ .parent_ies = {
+ { .id = 0x1, .len = 0, },
+ { .id = WLAN_EID_EXTENSION, .len = 1, .eid = 0x10 },
+ },
+ .child_ies = {
+ },
+ .result_ies = {
+ { .id = 0x1, .len = 0, },
+ { .id = WLAN_EID_EXTENSION, .len = 1, .eid = 0x10 },
+ },
+ },
+ {
+ .desc = "empty elements from child",
+ .parent_ies = {
+ },
+ .child_ies = {
+ { .id = 0x1, .len = 0, },
+ { .id = WLAN_EID_EXTENSION, .len = 1, .eid = 0x10 },
+ },
+ .result_ies = {
+ { .id = 0x1, .len = 0, },
+ { .id = WLAN_EID_EXTENSION, .len = 1, .eid = 0x10 },
+ },
+ },
+ {
+ .desc = "invalid extended elements ignored",
+ .parent_ies = {
+ { .id = WLAN_EID_EXTENSION, .len = 0 },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_EXTENSION, .len = 0 },
+ },
+ .result_ies = {
+ },
+ },
+ {
+ .desc = "multiple extended elements",
+ .parent_ies = {
+ { .id = WLAN_EID_EXTENSION, .len = 3,
+ .eid = WLAN_EID_EXT_HE_CAPABILITY },
+ { .id = WLAN_EID_EXTENSION, .len = 5,
+ .eid = WLAN_EID_EXT_ASSOC_DELAY_INFO },
+ { .id = WLAN_EID_EXTENSION, .len = 7,
+ .eid = WLAN_EID_EXT_HE_OPERATION },
+ { .id = WLAN_EID_EXTENSION, .len = 11,
+ .eid = WLAN_EID_EXT_FILS_REQ_PARAMS },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_SSID, .len = 13 },
+ { .id = WLAN_EID_EXTENSION, .len = 17,
+ .eid = WLAN_EID_EXT_HE_CAPABILITY },
+ { .id = WLAN_EID_EXTENSION, .len = 11,
+ .eid = WLAN_EID_EXT_FILS_KEY_CONFIRM },
+ { .id = WLAN_EID_EXTENSION, .len = 19,
+ .eid = WLAN_EID_EXT_HE_OPERATION },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_EXTENSION, .len = 17,
+ .eid = WLAN_EID_EXT_HE_CAPABILITY },
+ { .id = WLAN_EID_EXTENSION, .len = 5,
+ .eid = WLAN_EID_EXT_ASSOC_DELAY_INFO },
+ { .id = WLAN_EID_EXTENSION, .len = 19,
+ .eid = WLAN_EID_EXT_HE_OPERATION },
+ { .id = WLAN_EID_EXTENSION, .len = 11,
+ .eid = WLAN_EID_EXT_FILS_REQ_PARAMS },
+ { .id = WLAN_EID_SSID, .len = 13 },
+ { .id = WLAN_EID_EXTENSION, .len = 11,
+ .eid = WLAN_EID_EXT_FILS_KEY_CONFIRM },
+ },
+ },
+ {
+ .desc = "non-inherit element",
+ .parent_ies = {
+ { .id = 0x1, .len = 7, },
+ { .id = 0x2, .len = 11, },
+ { .id = 0x3, .len = 13, },
+ { .id = WLAN_EID_EXTENSION, .len = 17, .eid = 0x10 },
+ { .id = WLAN_EID_EXTENSION, .len = 19, .eid = 0x11 },
+ { .id = WLAN_EID_EXTENSION, .len = 23, .eid = 0x12 },
+ { .id = WLAN_EID_EXTENSION, .len = 29, .eid = 0x14 },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_EXTENSION,
+ .eid = WLAN_EID_EXT_NON_INHERITANCE,
+ .len = 10,
+ .edata = { 0x3, 0x1, 0x2, 0x3,
+ 0x4, 0x10, 0x11, 0x13, 0x14 } },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_EXTENSION, .len = 23, .eid = 0x12 },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ },
+};
+KUNIT_ARRAY_PARAM_DESC(gen_new_ie, gen_new_ie_cases, desc)
+
+static void test_gen_new_ie(struct kunit *test)
+{
+ const struct gen_new_ie_case *params = test->param_value;
+ struct sk_buff *parent = kunit_zalloc_skb(test, 1024, GFP_KERNEL);
+ struct sk_buff *child = kunit_zalloc_skb(test, 1024, GFP_KERNEL);
+ struct sk_buff *reference = kunit_zalloc_skb(test, 1024, GFP_KERNEL);
+ u8 *out = kunit_kzalloc(test, IEEE80211_MAX_DATA_LEN, GFP_KERNEL);
+ size_t len;
+ int i;
+
+ KUNIT_ASSERT_NOT_NULL(test, parent);
+ KUNIT_ASSERT_NOT_NULL(test, child);
+ KUNIT_ASSERT_NOT_NULL(test, reference);
+ KUNIT_ASSERT_NOT_NULL(test, out);
+
+ for (i = 0; i < ARRAY_SIZE(params->parent_ies); i++) {
+ if (params->parent_ies[i].len != 0) {
+ skb_put_u8(parent, params->parent_ies[i].id);
+ skb_put_u8(parent, params->parent_ies[i].len);
+ skb_put_data(parent, params->parent_ies[i].data,
+ params->parent_ies[i].len);
+ }
+
+ if (params->child_ies[i].len != 0) {
+ skb_put_u8(child, params->child_ies[i].id);
+ skb_put_u8(child, params->child_ies[i].len);
+ skb_put_data(child, params->child_ies[i].data,
+ params->child_ies[i].len);
+ }
+
+ if (params->result_ies[i].len != 0) {
+ skb_put_u8(reference, params->result_ies[i].id);
+ skb_put_u8(reference, params->result_ies[i].len);
+ skb_put_data(reference, params->result_ies[i].data,
+ params->result_ies[i].len);
+ }
+ }
+
+ len = cfg80211_gen_new_ie(parent->data, parent->len,
+ child->data, child->len,
+ out, IEEE80211_MAX_DATA_LEN);
+ KUNIT_EXPECT_EQ(test, len, reference->len);
+ KUNIT_EXPECT_MEMEQ(test, out, reference->data, reference->len);
+ memset(out, 0, IEEE80211_MAX_DATA_LEN);
+
+ /* Exactly enough space */
+ len = cfg80211_gen_new_ie(parent->data, parent->len,
+ child->data, child->len,
+ out, reference->len);
+ KUNIT_EXPECT_EQ(test, len, reference->len);
+ KUNIT_EXPECT_MEMEQ(test, out, reference->data, reference->len);
+ memset(out, 0, IEEE80211_MAX_DATA_LEN);
+
+ /* Not enough space (or expected zero length) */
+ len = cfg80211_gen_new_ie(parent->data, parent->len,
+ child->data, child->len,
+ out, reference->len - 1);
+ KUNIT_EXPECT_EQ(test, len, 0);
+}
+
+static void test_gen_new_ie_malformed(struct kunit *test)
+{
+ struct sk_buff *malformed = kunit_zalloc_skb(test, 1024, GFP_KERNEL);
+ u8 *out = kunit_kzalloc(test, IEEE80211_MAX_DATA_LEN, GFP_KERNEL);
+ size_t len;
+
+ KUNIT_ASSERT_NOT_NULL(test, malformed);
+ KUNIT_ASSERT_NOT_NULL(test, out);
+
+ skb_put_u8(malformed, WLAN_EID_SSID);
+ skb_put_u8(malformed, 3);
+ skb_put(malformed, 3);
+ skb_put_u8(malformed, WLAN_EID_REDUCED_NEIGHBOR_REPORT);
+ skb_put_u8(malformed, 10);
+ skb_put(malformed, 9);
+
+ len = cfg80211_gen_new_ie(malformed->data, malformed->len,
+ out, 0,
+ out, IEEE80211_MAX_DATA_LEN);
+ KUNIT_EXPECT_EQ(test, len, 5);
+
+ len = cfg80211_gen_new_ie(out, 0,
+ malformed->data, malformed->len,
+ out, IEEE80211_MAX_DATA_LEN);
+ KUNIT_EXPECT_EQ(test, len, 5);
+}
+
+struct inform_bss {
+ struct kunit *test;
+
+ int inform_bss_count;
+};
+
+static void inform_bss_inc_counter(struct wiphy *wiphy,
+ struct cfg80211_bss *bss,
+ const struct cfg80211_bss_ies *ies,
+ void *drv_data)
+{
+ struct inform_bss *ctx = t_wiphy_ctx(wiphy);
+
+ ctx->inform_bss_count++;
+
+ rcu_read_lock();
+ KUNIT_EXPECT_PTR_EQ(ctx->test, drv_data, ctx);
+ KUNIT_EXPECT_PTR_EQ(ctx->test, ies, rcu_dereference(bss->ies));
+ rcu_read_unlock();
+}
+
+static void test_inform_bss_ssid_only(struct kunit *test)
+{
+ struct inform_bss ctx = {
+ .test = test,
+ };
+ struct wiphy *wiphy = T_WIPHY(test, ctx);
+ struct t_wiphy_priv *w_priv = wiphy_priv(wiphy);
+ struct cfg80211_inform_bss inform_bss = {
+ .signal = 50,
+ .drv_data = &ctx,
+ };
+ const u8 bssid[ETH_ALEN] = { 0x10, 0x22, 0x33, 0x44, 0x55, 0x66 };
+ u64 tsf = 0x1000000000000000ULL;
+ int beacon_int = 100;
+ u16 capability = 0x1234;
+ static const u8 input[] = {
+ [0] = WLAN_EID_SSID,
+ [1] = 4,
+ [2] = 'T', 'E', 'S', 'T'
+ };
+ struct cfg80211_bss *bss, *other;
+ const struct cfg80211_bss_ies *ies;
+
+ w_priv->ops->inform_bss = inform_bss_inc_counter;
+
+ inform_bss.chan = ieee80211_get_channel_khz(wiphy, MHZ_TO_KHZ(2412));
+ KUNIT_ASSERT_NOT_NULL(test, inform_bss.chan);
+
+ bss = cfg80211_inform_bss_data(wiphy, &inform_bss,
+ CFG80211_BSS_FTYPE_PRESP, bssid, tsf,
+ capability, beacon_int,
+ input, sizeof(input),
+ GFP_KERNEL);
+ KUNIT_EXPECT_NOT_NULL(test, bss);
+ KUNIT_EXPECT_EQ(test, ctx.inform_bss_count, 1);
+
+ /* Check values in returned bss are correct */
+ KUNIT_EXPECT_EQ(test, bss->signal, inform_bss.signal);
+ KUNIT_EXPECT_EQ(test, bss->beacon_interval, beacon_int);
+ KUNIT_EXPECT_EQ(test, bss->capability, capability);
+ KUNIT_EXPECT_EQ(test, bss->bssid_index, 0);
+ KUNIT_EXPECT_PTR_EQ(test, bss->channel, inform_bss.chan);
+ KUNIT_EXPECT_MEMEQ(test, bssid, bss->bssid, sizeof(bssid));
+
+ /* Check the IEs have the expected value */
+ rcu_read_lock();
+ ies = rcu_dereference(bss->ies);
+ KUNIT_EXPECT_NOT_NULL(test, ies);
+ KUNIT_EXPECT_EQ(test, ies->tsf, tsf);
+ KUNIT_EXPECT_EQ(test, ies->len, sizeof(input));
+ KUNIT_EXPECT_MEMEQ(test, ies->data, input, sizeof(input));
+ rcu_read_unlock();
+
+ /* Check we can look up the BSS - by SSID */
+ other = cfg80211_get_bss(wiphy, NULL, NULL, "TEST", 4,
+ IEEE80211_BSS_TYPE_ANY,
+ IEEE80211_PRIVACY_ANY);
+ KUNIT_EXPECT_PTR_EQ(test, bss, other);
+ cfg80211_put_bss(wiphy, other);
+
+ /* Check we can look up the BSS - by BSSID */
+ other = cfg80211_get_bss(wiphy, NULL, bssid, NULL, 0,
+ IEEE80211_BSS_TYPE_ANY,
+ IEEE80211_PRIVACY_ANY);
+ KUNIT_EXPECT_PTR_EQ(test, bss, other);
+ cfg80211_put_bss(wiphy, other);
+
+ cfg80211_put_bss(wiphy, bss);
+}
+
+static struct inform_bss_ml_sta_case {
+ const char *desc;
+ int mld_id;
+ bool sta_prof_vendor_elems;
+} inform_bss_ml_sta_cases[] = {
+ { .desc = "no_mld_id", .mld_id = 0, .sta_prof_vendor_elems = false },
+ { .desc = "mld_id_eq_1", .mld_id = 1, .sta_prof_vendor_elems = true },
+};
+KUNIT_ARRAY_PARAM_DESC(inform_bss_ml_sta, inform_bss_ml_sta_cases, desc)
+
+static void test_inform_bss_ml_sta(struct kunit *test)
+{
+ const struct inform_bss_ml_sta_case *params = test->param_value;
+ struct inform_bss ctx = {
+ .test = test,
+ };
+ struct wiphy *wiphy = T_WIPHY(test, ctx);
+ struct t_wiphy_priv *w_priv = wiphy_priv(wiphy);
+ struct cfg80211_inform_bss inform_bss = {
+ .signal = 50,
+ .drv_data = &ctx,
+ };
+ struct cfg80211_bss *bss, *link_bss;
+ const struct cfg80211_bss_ies *ies;
+
+ /* sending station */
+ const u8 bssid[ETH_ALEN] = { 0x10, 0x22, 0x33, 0x44, 0x55, 0x66 };
+ u64 tsf = 0x1000000000000000ULL;
+ int beacon_int = 100;
+ u16 capability = 0x1234;
+
+ /* Building the frame *************************************************/
+ struct sk_buff *input = kunit_zalloc_skb(test, 1024, GFP_KERNEL);
+ u8 *len_mle, *len_prof;
+ u8 link_id = 2;
+ struct {
+ struct ieee80211_neighbor_ap_info info;
+ struct ieee80211_tbtt_info_ge_11 ap;
+ } __packed rnr = {
+ .info = {
+ .tbtt_info_hdr = u8_encode_bits(0, IEEE80211_AP_INFO_TBTT_HDR_COUNT),
+ .tbtt_info_len = sizeof(struct ieee80211_tbtt_info_ge_11),
+ .op_class = 81,
+ .channel = 11,
+ },
+ .ap = {
+ .tbtt_offset = 0xff,
+ .bssid = { 0x10, 0x22, 0x33, 0x44, 0x55, 0x67 },
+ .short_ssid = 0, /* unused */
+ .bss_params = 0,
+ .psd_20 = 0,
+ .mld_params.mld_id = params->mld_id,
+ .mld_params.params =
+ le16_encode_bits(link_id,
+ IEEE80211_RNR_MLD_PARAMS_LINK_ID),
+ }
+ };
+ struct {
+ __le16 control;
+ u8 var_len;
+ u8 mld_mac_addr[ETH_ALEN];
+ u8 link_id_info;
+ u8 params_change_count;
+ __le16 mld_caps_and_ops;
+ u8 mld_id;
+ __le16 ext_mld_caps_and_ops;
+ } __packed mle_basic_common_info = {
+ .control =
+ cpu_to_le16(IEEE80211_ML_CONTROL_TYPE_BASIC |
+ IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT |
+ IEEE80211_MLC_BASIC_PRES_LINK_ID |
+ (params->mld_id ? IEEE80211_MLC_BASIC_PRES_MLD_ID : 0) |
+ IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP),
+ .mld_id = params->mld_id,
+ .mld_caps_and_ops = cpu_to_le16(0x0102),
+ .ext_mld_caps_and_ops = cpu_to_le16(0x0304),
+ .var_len = sizeof(mle_basic_common_info) - 2 -
+ (params->mld_id ? 0 : 1),
+ .mld_mac_addr = { 0x10, 0x22, 0x33, 0x44, 0x55, 0x60 },
+ };
+ struct {
+ __le16 control;
+ u8 var_len;
+ u8 bssid[ETH_ALEN];
+ __le16 beacon_int;
+ __le64 tsf_offset;
+ __le16 capabilities; /* already part of payload */
+ } __packed sta_prof = {
+ .control =
+ cpu_to_le16(IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE |
+ IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT |
+ IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT |
+ IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT |
+ u16_encode_bits(link_id,
+ IEEE80211_MLE_STA_CONTROL_LINK_ID)),
+ .var_len = sizeof(sta_prof) - 2 - 2,
+ .bssid = { *rnr.ap.bssid },
+ .beacon_int = cpu_to_le16(101),
+ .tsf_offset = cpu_to_le64(-123ll),
+ .capabilities = cpu_to_le16(0xdead),
+ };
+
+ KUNIT_ASSERT_NOT_NULL(test, input);
+
+ w_priv->ops->inform_bss = inform_bss_inc_counter;
+
+ inform_bss.chan = ieee80211_get_channel_khz(wiphy, MHZ_TO_KHZ(2412));
+ KUNIT_ASSERT_NOT_NULL(test, inform_bss.chan);
+
+ skb_put_u8(input, WLAN_EID_SSID);
+ skb_put_u8(input, 4);
+ skb_put_data(input, "TEST", 4);
+
+ skb_put_u8(input, WLAN_EID_REDUCED_NEIGHBOR_REPORT);
+ skb_put_u8(input, sizeof(rnr));
+ skb_put_data(input, &rnr, sizeof(rnr));
+
+ /* build a multi-link element */
+ skb_put_u8(input, WLAN_EID_EXTENSION);
+ len_mle = skb_put(input, 1);
+ skb_put_u8(input, WLAN_EID_EXT_EHT_MULTI_LINK);
+ skb_put_data(input, &mle_basic_common_info, sizeof(mle_basic_common_info));
+ if (!params->mld_id)
+ t_skb_remove_member(input, typeof(mle_basic_common_info), mld_id);
+ /* with a STA profile inside */
+ skb_put_u8(input, IEEE80211_MLE_SUBELEM_PER_STA_PROFILE);
+ len_prof = skb_put(input, 1);
+ skb_put_data(input, &sta_prof, sizeof(sta_prof));
+
+ if (params->sta_prof_vendor_elems) {
+ /* Put two (vendor) element into sta_prof */
+ skb_put_u8(input, WLAN_EID_VENDOR_SPECIFIC);
+ skb_put_u8(input, 160);
+ skb_put(input, 160);
+
+ skb_put_u8(input, WLAN_EID_VENDOR_SPECIFIC);
+ skb_put_u8(input, 165);
+ skb_put(input, 165);
+ }
+
+ /* fragment STA profile */
+ ieee80211_fragment_element(input, len_prof,
+ IEEE80211_MLE_SUBELEM_FRAGMENT);
+ /* fragment MLE */
+ ieee80211_fragment_element(input, len_mle, WLAN_EID_FRAGMENT);
+
+ /* Put a (vendor) element after the ML element */
+ skb_put_u8(input, WLAN_EID_VENDOR_SPECIFIC);
+ skb_put_u8(input, 155);
+ skb_put(input, 155);
+
+ /* Submit *************************************************************/
+ bss = cfg80211_inform_bss_data(wiphy, &inform_bss,
+ CFG80211_BSS_FTYPE_PRESP, bssid, tsf,
+ capability, beacon_int,
+ input->data, input->len,
+ GFP_KERNEL);
+ KUNIT_EXPECT_NOT_NULL(test, bss);
+ KUNIT_EXPECT_EQ(test, ctx.inform_bss_count, 2);
+
+ /* Check link_bss *****************************************************/
+ link_bss = cfg80211_get_bss(wiphy, NULL, sta_prof.bssid, NULL, 0,
+ IEEE80211_BSS_TYPE_ANY,
+ IEEE80211_PRIVACY_ANY);
+ KUNIT_ASSERT_NOT_NULL(test, link_bss);
+ KUNIT_EXPECT_EQ(test, link_bss->signal, 0);
+ KUNIT_EXPECT_EQ(test, link_bss->beacon_interval,
+ le16_to_cpu(sta_prof.beacon_int));
+ KUNIT_EXPECT_EQ(test, link_bss->capability,
+ le16_to_cpu(sta_prof.capabilities));
+ KUNIT_EXPECT_EQ(test, link_bss->bssid_index, 0);
+ KUNIT_EXPECT_PTR_EQ(test, link_bss->channel,
+ ieee80211_get_channel_khz(wiphy, MHZ_TO_KHZ(2462)));
+
+ rcu_read_lock();
+ ies = rcu_dereference(link_bss->ies);
+ KUNIT_EXPECT_NOT_NULL(test, ies);
+ KUNIT_EXPECT_EQ(test, ies->tsf, tsf + le64_to_cpu(sta_prof.tsf_offset));
+ /* Resulting length should be:
+ * SSID (inherited) + RNR (inherited) + vendor element(s) +
+ * MLE common info + MLE header and control
+ */
+ if (params->sta_prof_vendor_elems)
+ KUNIT_EXPECT_EQ(test, ies->len,
+ 6 + 2 + sizeof(rnr) + 2 + 160 + 2 + 165 +
+ mle_basic_common_info.var_len + 5);
+ else
+ KUNIT_EXPECT_EQ(test, ies->len,
+ 6 + 2 + sizeof(rnr) + 2 + 155 +
+ mle_basic_common_info.var_len + 5);
+ rcu_read_unlock();
+
+ cfg80211_put_bss(wiphy, bss);
+ cfg80211_put_bss(wiphy, link_bss);
+}
+
+static struct kunit_case gen_new_ie_test_cases[] = {
+ KUNIT_CASE_PARAM(test_gen_new_ie, gen_new_ie_gen_params),
+ KUNIT_CASE(test_gen_new_ie_malformed),
+ {}
+};
+
+static struct kunit_suite gen_new_ie = {
+ .name = "cfg80211-ie-generation",
+ .test_cases = gen_new_ie_test_cases,
+};
+
+kunit_test_suite(gen_new_ie);
+
+static struct kunit_case inform_bss_test_cases[] = {
+ KUNIT_CASE(test_inform_bss_ssid_only),
+ KUNIT_CASE_PARAM(test_inform_bss_ml_sta, inform_bss_ml_sta_gen_params),
+ {}
+};
+
+static struct kunit_suite inform_bss = {
+ .name = "cfg80211-inform-bss",
+ .test_cases = inform_bss_test_cases,
+};
+
+kunit_test_suite(inform_bss);
diff --git a/net/wireless/tests/util.c b/net/wireless/tests/util.c
new file mode 100644
index 000000000000..8abdaeb820ce
--- /dev/null
+++ b/net/wireless/tests/util.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KUnit fixture to have a (configurable) wiphy
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+#include <linux/ieee80211.h>
+#include <net/cfg80211.h>
+#include <kunit/test.h>
+#include <kunit/test-bug.h>
+#include "util.h"
+
+int t_wiphy_init(struct kunit_resource *resource, void *ctx)
+{
+ struct kunit *test = kunit_get_current_test();
+ struct cfg80211_ops *ops;
+ struct wiphy *wiphy;
+ struct t_wiphy_priv *priv;
+
+ ops = kzalloc(sizeof(*ops), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, ops);
+
+ wiphy = wiphy_new_nm(ops, sizeof(*priv), "kunit");
+ KUNIT_ASSERT_NOT_NULL(test, wiphy);
+
+ priv = wiphy_priv(wiphy);
+ priv->ctx = ctx;
+ priv->ops = ops;
+
+ /* Initialize channels, feel free to add more here channels/bands */
+ memcpy(priv->channels_2ghz, channels_2ghz, sizeof(channels_2ghz));
+ wiphy->bands[NL80211_BAND_2GHZ] = &priv->band_2ghz;
+ priv->band_2ghz.channels = priv->channels_2ghz;
+ priv->band_2ghz.n_channels = ARRAY_SIZE(channels_2ghz);
+
+ resource->data = wiphy;
+ resource->name = "wiphy";
+
+ return 0;
+}
+
+void t_wiphy_exit(struct kunit_resource *resource)
+{
+ struct t_wiphy_priv *priv;
+ struct cfg80211_ops *ops;
+
+ priv = wiphy_priv(resource->data);
+ ops = priv->ops;
+
+ /* Should we ensure anything about the state here?
+ * e.g. full destruction or no calls to any ops on destruction?
+ */
+
+ wiphy_free(resource->data);
+ kfree(ops);
+}
diff --git a/net/wireless/tests/util.h b/net/wireless/tests/util.h
new file mode 100644
index 000000000000..6de712e0d432
--- /dev/null
+++ b/net/wireless/tests/util.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Utilities for cfg80211 unit testing
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+#ifndef __CFG80211_UTILS_H
+#define __CFG80211_UTILS_H
+
+#define CHAN2G(_freq) { \
+ .band = NL80211_BAND_2GHZ, \
+ .center_freq = (_freq), \
+ .hw_value = (_freq), \
+}
+
+static const struct ieee80211_channel channels_2ghz[] = {
+ CHAN2G(2412), /* Channel 1 */
+ CHAN2G(2417), /* Channel 2 */
+ CHAN2G(2422), /* Channel 3 */
+ CHAN2G(2427), /* Channel 4 */
+ CHAN2G(2432), /* Channel 5 */
+ CHAN2G(2437), /* Channel 6 */
+ CHAN2G(2442), /* Channel 7 */
+ CHAN2G(2447), /* Channel 8 */
+ CHAN2G(2452), /* Channel 9 */
+ CHAN2G(2457), /* Channel 10 */
+ CHAN2G(2462), /* Channel 11 */
+ CHAN2G(2467), /* Channel 12 */
+ CHAN2G(2472), /* Channel 13 */
+ CHAN2G(2484), /* Channel 14 */
+};
+
+struct t_wiphy_priv {
+ struct kunit *test;
+ struct cfg80211_ops *ops;
+
+ void *ctx;
+
+ struct ieee80211_supported_band band_2ghz;
+ struct ieee80211_channel channels_2ghz[ARRAY_SIZE(channels_2ghz)];
+};
+
+#define T_WIPHY(test, ctx) ({ \
+ struct wiphy *__wiphy = \
+ kunit_alloc_resource(test, t_wiphy_init, \
+ t_wiphy_exit, \
+ GFP_KERNEL, &(ctx)); \
+ \
+ KUNIT_ASSERT_NOT_NULL(test, __wiphy); \
+ __wiphy; \
+ })
+#define t_wiphy_ctx(wiphy) (((struct t_wiphy_priv *)wiphy_priv(wiphy))->ctx)
+
+int t_wiphy_init(struct kunit_resource *resource, void *data);
+void t_wiphy_exit(struct kunit_resource *resource);
+
+#define t_skb_remove_member(skb, type, member) do { \
+ memmove((skb)->data + (skb)->len - sizeof(type) + \
+ offsetof(type, member), \
+ (skb)->data + (skb)->len - sizeof(type) + \
+ offsetofend(type, member), \
+ offsetofend(type, member)); \
+ skb_trim(skb, (skb)->len - sizeof_field(type, member)); \
+ } while (0)
+
+#endif /* __CFG80211_UTILS_H */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 617c0d0dfa96..1f374c8a17a5 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -615,49 +615,47 @@ TRACE_EVENT(rdev_start_ap,
TRACE_EVENT(rdev_change_beacon,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- struct cfg80211_beacon_data *info),
+ struct cfg80211_ap_update *info),
TP_ARGS(wiphy, netdev, info),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
__field(int, link_id)
- __dynamic_array(u8, head, info ? info->head_len : 0)
- __dynamic_array(u8, tail, info ? info->tail_len : 0)
- __dynamic_array(u8, beacon_ies, info ? info->beacon_ies_len : 0)
- __dynamic_array(u8, proberesp_ies,
- info ? info->proberesp_ies_len : 0)
- __dynamic_array(u8, assocresp_ies,
- info ? info->assocresp_ies_len : 0)
- __dynamic_array(u8, probe_resp, info ? info->probe_resp_len : 0)
- ),
- TP_fast_assign(
- WIPHY_ASSIGN;
- NETDEV_ASSIGN;
- if (info) {
- __entry->link_id = info->link_id;
- if (info->head)
- memcpy(__get_dynamic_array(head), info->head,
- info->head_len);
- if (info->tail)
- memcpy(__get_dynamic_array(tail), info->tail,
- info->tail_len);
- if (info->beacon_ies)
- memcpy(__get_dynamic_array(beacon_ies),
- info->beacon_ies, info->beacon_ies_len);
- if (info->proberesp_ies)
- memcpy(__get_dynamic_array(proberesp_ies),
- info->proberesp_ies,
- info->proberesp_ies_len);
- if (info->assocresp_ies)
- memcpy(__get_dynamic_array(assocresp_ies),
- info->assocresp_ies,
- info->assocresp_ies_len);
- if (info->probe_resp)
- memcpy(__get_dynamic_array(probe_resp),
- info->probe_resp, info->probe_resp_len);
- } else {
- __entry->link_id = -1;
- }
+ __dynamic_array(u8, head, info->beacon.head_len)
+ __dynamic_array(u8, tail, info->beacon.tail_len)
+ __dynamic_array(u8, beacon_ies, info->beacon.beacon_ies_len)
+ __dynamic_array(u8, proberesp_ies, info->beacon.proberesp_ies_len)
+ __dynamic_array(u8, assocresp_ies, info->beacon.assocresp_ies_len)
+ __dynamic_array(u8, probe_resp, info->beacon.probe_resp_len)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->link_id = info->beacon.link_id;
+ if (info->beacon.head)
+ memcpy(__get_dynamic_array(head),
+ info->beacon.head,
+ info->beacon.head_len);
+ if (info->beacon.tail)
+ memcpy(__get_dynamic_array(tail),
+ info->beacon.tail,
+ info->beacon.tail_len);
+ if (info->beacon.beacon_ies)
+ memcpy(__get_dynamic_array(beacon_ies),
+ info->beacon.beacon_ies,
+ info->beacon.beacon_ies_len);
+ if (info->beacon.proberesp_ies)
+ memcpy(__get_dynamic_array(proberesp_ies),
+ info->beacon.proberesp_ies,
+ info->beacon.proberesp_ies_len);
+ if (info->beacon.assocresp_ies)
+ memcpy(__get_dynamic_array(assocresp_ies),
+ info->beacon.assocresp_ies,
+ info->beacon.assocresp_ies_len);
+ if (info->beacon.probe_resp)
+ memcpy(__get_dynamic_array(probe_resp),
+ info->beacon.probe_resp,
+ info->beacon.probe_resp_len);
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id:%d",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id)
@@ -1323,16 +1321,18 @@ TRACE_EVENT(rdev_deauth,
NETDEV_ENTRY
MAC_ENTRY(bssid)
__field(u16, reason_code)
+ __field(bool, local_state_change)
),
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
MAC_ASSIGN(bssid, req->bssid);
__entry->reason_code = req->reason_code;
+ __entry->local_state_change = req->local_state_change;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM, reason: %u",
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM, reason: %u, local_state_change:%d",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid,
- __entry->reason_code)
+ __entry->reason_code, __entry->local_state_change)
);
TRACE_EVENT(rdev_disassoc,
@@ -2928,7 +2928,7 @@ DEFINE_EVENT(netdev_evt_only, cfg80211_send_rx_auth,
TRACE_EVENT(cfg80211_send_rx_assoc,
TP_PROTO(struct net_device *netdev,
- struct cfg80211_rx_assoc_resp *data),
+ const struct cfg80211_rx_assoc_resp_data *data),
TP_ARGS(netdev, data),
TP_STRUCT__entry(
NETDEV_ENTRY
@@ -3590,7 +3590,6 @@ TRACE_EVENT(cfg80211_inform_bss_frame,
TP_STRUCT__entry(
WIPHY_ENTRY
CHAN_ENTRY
- __field(enum nl80211_bss_scan_width, scan_width)
__dynamic_array(u8, mgmt, len)
__field(s32, signal)
__field(u64, ts_boottime)
@@ -3600,7 +3599,6 @@ TRACE_EVENT(cfg80211_inform_bss_frame,
TP_fast_assign(
WIPHY_ASSIGN;
CHAN_ASSIGN(data->chan);
- __entry->scan_width = data->scan_width;
if (mgmt)
memcpy(__get_dynamic_array(mgmt), mgmt, len);
__entry->signal = data->signal;
@@ -3609,8 +3607,8 @@ TRACE_EVENT(cfg80211_inform_bss_frame,
MAC_ASSIGN(parent_bssid, data->parent_bssid);
),
TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT
- "(scan_width: %d) signal: %d, tsb:%llu, detect_tsf:%llu, tsf_bssid: %pM",
- WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width,
+ "signal: %d, tsb:%llu, detect_tsf:%llu, tsf_bssid: %pM",
+ WIPHY_PR_ARG, CHAN_PR_ARG,
__entry->signal, (unsigned long long)__entry->ts_boottime,
(unsigned long long)__entry->parent_tsf,
__entry->parent_bssid)
@@ -3981,6 +3979,26 @@ TRACE_EVENT(cfg80211_links_removed,
__entry->link_mask)
);
+TRACE_EVENT(rdev_set_ttlm,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_ttlm_params *params),
+ TP_ARGS(wiphy, netdev, params),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __array(u8, dlink, sizeof(u16) * 8)
+ __array(u8, ulink, sizeof(u16) * 8)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ memcpy(__entry->dlink, params->dlink, sizeof(params->dlink));
+ memcpy(__entry->ulink, params->ulink, sizeof(params->ulink));
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT,
+ WIPHY_PR_ARG, NETDEV_PR_ARG)
+);
+
#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 1783ab9d57a3..d1ce3bee2797 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -43,8 +43,7 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
}
EXPORT_SYMBOL(ieee80211_get_response_rate);
-u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband,
- enum nl80211_bss_scan_width scan_width)
+u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband)
{
struct ieee80211_rate *bitrates;
u32 mandatory_rates = 0;
@@ -54,15 +53,10 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband,
if (WARN_ON(!sband))
return 1;
- if (sband->band == NL80211_BAND_2GHZ) {
- if (scan_width == NL80211_BSS_CHAN_WIDTH_5 ||
- scan_width == NL80211_BSS_CHAN_WIDTH_10)
- mandatory_flag = IEEE80211_RATE_MANDATORY_G;
- else
- mandatory_flag = IEEE80211_RATE_MANDATORY_B;
- } else {
+ if (sband->band == NL80211_BAND_2GHZ)
+ mandatory_flag = IEEE80211_RATE_MANDATORY_B;
+ else
mandatory_flag = IEEE80211_RATE_MANDATORY_A;
- }
bitrates = sband->bitrates;
for (i = 0; i < sband->n_bitrates; i++)
@@ -986,7 +980,63 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
}
}
+ /* The default mapping as defined Section 2.3 in RFC8325: The three
+ * Most Significant Bits (MSBs) of the DSCP are used as the
+ * corresponding L2 markings.
+ */
ret = dscp >> 5;
+
+ /* Handle specific DSCP values for which the default mapping (as
+ * described above) doesn't adhere to the intended usage of the DSCP
+ * value. See section 4 in RFC8325. Specifically, for the following
+ * Diffserv Service Classes no update is needed:
+ * - Standard: DF
+ * - Low Priority Data: CS1
+ * - Multimedia Streaming: AF31, AF32, AF33
+ * - Multimedia Conferencing: AF41, AF42, AF43
+ * - Network Control Traffic: CS7
+ * - Real-Time Interactive: CS4
+ */
+ switch (dscp >> 2) {
+ case 10:
+ case 12:
+ case 14:
+ /* High throughput data: AF11, AF12, AF13 */
+ ret = 0;
+ break;
+ case 16:
+ /* Operations, Administration, and Maintenance and Provisioning:
+ * CS2
+ */
+ ret = 0;
+ break;
+ case 18:
+ case 20:
+ case 22:
+ /* Low latency data: AF21, AF22, AF23 */
+ ret = 3;
+ break;
+ case 24:
+ /* Broadcasting video: CS3 */
+ ret = 4;
+ break;
+ case 40:
+ /* Signaling: CS5 */
+ ret = 5;
+ break;
+ case 44:
+ /* Voice Admit: VA */
+ ret = 6;
+ break;
+ case 46:
+ /* Telephony traffic: EF */
+ ret = 6;
+ break;
+ case 48:
+ /* Network Control Traffic: CS6 */
+ ret = 7;
+ break;
+ }
out:
return array_index_nospec(ret, IEEE80211_NUM_TIDS);
}
@@ -1044,7 +1094,6 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
list_del(&ev->list);
spin_unlock_irqrestore(&wdev->event_lock, flags);
- wdev_lock(wdev);
switch (ev->type) {
case EVENT_CONNECT_RESULT:
__cfg80211_connect_result(
@@ -1066,15 +1115,14 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
ev->ij.channel);
break;
case EVENT_STOPPED:
- __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev);
+ cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev);
break;
case EVENT_PORT_AUTHORIZED:
- __cfg80211_port_authorized(wdev, ev->pa.bssid,
+ __cfg80211_port_authorized(wdev, ev->pa.peer_addr,
ev->pa.td_bitmap,
ev->pa.td_bitmap_len);
break;
}
- wdev_unlock(wdev);
kfree(ev);
@@ -1124,9 +1172,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
return -EBUSY;
dev->ieee80211_ptr->use_4addr = false;
- wdev_lock(dev->ieee80211_ptr);
rdev_set_qos_map(rdev, dev, NULL);
- wdev_unlock(dev->ieee80211_ptr);
switch (otype) {
case NL80211_IFTYPE_AP:
@@ -1138,10 +1184,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
break;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
- wdev_lock(dev->ieee80211_ptr);
cfg80211_disconnect(rdev, dev,
WLAN_REASON_DEAUTH_LEAVING, true);
- wdev_unlock(dev->ieee80211_ptr);
break;
case NL80211_IFTYPE_MESH_POINT:
/* mesh should be handled? */
@@ -1972,6 +2016,35 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
}
EXPORT_SYMBOL(ieee80211_ie_split_ric);
+void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id)
+{
+ unsigned int elem_len;
+
+ if (!len_pos)
+ return;
+
+ elem_len = skb->data + skb->len - len_pos - 1;
+
+ while (elem_len > 255) {
+ /* this one is 255 */
+ *len_pos = 255;
+ /* remaining data gets smaller */
+ elem_len -= 255;
+ /* make space for the fragment ID/len in SKB */
+ skb_put(skb, 2);
+ /* shift back the remaining data to place fragment ID/len */
+ memmove(len_pos + 255 + 3, len_pos + 255 + 1, elem_len);
+ /* place the fragment ID */
+ len_pos += 255 + 1;
+ *len_pos = frag_id;
+ /* and point to fragment length to update later */
+ len_pos++;
+ }
+
+ *len_pos = elem_len;
+}
+EXPORT_SYMBOL(ieee80211_fragment_element);
+
bool ieee80211_operating_class_to_band(u8 operating_class,
enum nl80211_band *band)
{
@@ -1982,6 +2055,7 @@ bool ieee80211_operating_class_to_band(u8 operating_class,
*band = NL80211_BAND_5GHZ;
return true;
case 131 ... 135:
+ case 137:
*band = NL80211_BAND_6GHZ;
return true;
case 81:
@@ -2647,12 +2721,12 @@ void cfg80211_remove_link(struct wireless_dev *wdev, unsigned int link_id)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
switch (wdev->iftype) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
- __cfg80211_stop_ap(rdev, wdev->netdev, link_id, true);
+ cfg80211_stop_ap(rdev, wdev->netdev, link_id, true);
break;
default:
/* per-link not relevant */
@@ -2677,12 +2751,10 @@ void cfg80211_remove_links(struct wireless_dev *wdev)
if (wdev->iftype != NL80211_IFTYPE_AP)
return;
- wdev_lock(wdev);
if (wdev->valid_links) {
for_each_valid_link(wdev, link_id)
cfg80211_remove_link(wdev, link_id);
}
- wdev_unlock(wdev);
}
int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index e3acfac7430a..2371069f3c43 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -7,7 +7,7 @@
* we directly assign the wireless handlers of wireless interfaces.
*
* Copyright 2008-2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2019-2022 Intel Corporation
+ * Copyright (C) 2019-2023 Intel Corporation
*/
#include <linux/export.h>
@@ -227,7 +227,7 @@ EXPORT_WEXT_HANDLER(cfg80211_wext_giwrange);
* cfg80211_wext_freq - get wext frequency for non-"auto"
* @freq: the wext freq encoding
*
- * Returns a frequency, or a negative error code, or 0 for auto.
+ * Returns: a frequency, or a negative error code, or 0 for auto.
*/
int cfg80211_wext_freq(struct iw_freq *freq)
{
@@ -415,10 +415,10 @@ int cfg80211_wext_giwretry(struct net_device *dev,
}
EXPORT_WEXT_HANDLER(cfg80211_wext_giwretry);
-static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
- struct net_device *dev, bool pairwise,
- const u8 *addr, bool remove, bool tx_key,
- int idx, struct key_params *params)
+static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, bool pairwise,
+ const u8 *addr, bool remove, bool tx_key,
+ int idx, struct key_params *params)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err, i;
@@ -471,7 +471,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
*/
if (idx == wdev->wext.default_key &&
wdev->iftype == NL80211_IFTYPE_ADHOC) {
- __cfg80211_leave_ibss(rdev, wdev->netdev, true);
+ cfg80211_leave_ibss(rdev, wdev->netdev, true);
rejoin = true;
}
@@ -552,7 +552,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
*/
if (wdev->iftype == NL80211_IFTYPE_ADHOC &&
wdev->wext.default_key == -1) {
- __cfg80211_leave_ibss(rdev, wdev->netdev, true);
+ cfg80211_leave_ibss(rdev, wdev->netdev, true);
rejoin = true;
}
err = rdev_set_default_key(rdev, dev, -1, idx, true,
@@ -580,21 +580,6 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
return 0;
}
-static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
- struct net_device *dev, bool pairwise,
- const u8 *addr, bool remove, bool tx_key,
- int idx, struct key_params *params)
-{
- int err;
-
- wdev_lock(dev->ieee80211_ptr);
- err = __cfg80211_set_encryption(rdev, dev, pairwise, addr,
- remove, tx_key, idx, params);
- wdev_unlock(dev->ieee80211_ptr);
-
- return err;
-}
-
static int cfg80211_wext_siwencode(struct net_device *dev,
struct iw_request_info *info,
union iwreq_data *wrqu, char *keybuf)
@@ -639,7 +624,6 @@ static int cfg80211_wext_siwencode(struct net_device *dev,
else if (erq->length == 0) {
/* No key data - just set the default TX key index */
err = 0;
- wdev_lock(wdev);
if (wdev->connected ||
(wdev->iftype == NL80211_IFTYPE_ADHOC &&
wdev->u.ibss.current_bss))
@@ -647,7 +631,6 @@ static int cfg80211_wext_siwencode(struct net_device *dev,
true);
if (!err)
wdev->wext.default_key = idx;
- wdev_unlock(wdev);
goto out;
}
@@ -697,12 +680,8 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev,
!rdev->ops->set_default_key)
return -EOPNOTSUPP;
- wdev_lock(wdev);
- if (wdev->valid_links) {
- wdev_unlock(wdev);
+ if (wdev->valid_links)
return -EOPNOTSUPP;
- }
- wdev_unlock(wdev);
switch (ext->alg) {
case IW_ENCODE_ALG_NONE:
@@ -1341,13 +1320,11 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
return -EOPNOTSUPP;
err = 0;
- wdev_lock(wdev);
if (!wdev->valid_links && wdev->links[0].client.current_bss)
memcpy(addr, wdev->links[0].client.current_bss->pub.bssid,
ETH_ALEN);
else
err = -EOPNOTSUPP;
- wdev_unlock(wdev);
if (err)
return err;
@@ -1387,17 +1364,15 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
return NULL;
/* Grab BSSID of current BSS, if any */
- wdev_lock(wdev);
+ wiphy_lock(&rdev->wiphy);
if (wdev->valid_links || !wdev->links[0].client.current_bss) {
- wdev_unlock(wdev);
+ wiphy_unlock(&rdev->wiphy);
return NULL;
}
memcpy(bssid, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN);
- wdev_unlock(wdev);
memset(&sinfo, 0, sizeof(sinfo));
- wiphy_lock(&rdev->wiphy);
ret = rdev_get_station(rdev, dev, bssid, &sinfo);
wiphy_unlock(&rdev->wiphy);
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index f3eaa3388694..8edd9ada69d0 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -23,7 +23,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
int err, i;
ASSERT_RTNL();
- ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_wiphy(wdev->wiphy);
if (!netif_running(wdev->netdev))
return 0;
@@ -87,15 +87,11 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
return -EINVAL;
}
- wdev_lock(wdev);
-
if (wdev->conn) {
bool event = true;
- if (wdev->wext.connect.channel == chan) {
- err = 0;
- goto out;
- }
+ if (wdev->wext.connect.channel == chan)
+ return 0;
/* if SSID set, we'll try right again, avoid event */
if (wdev->wext.connect.ssid_len)
@@ -103,14 +99,11 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
err = cfg80211_disconnect(rdev, dev,
WLAN_REASON_DEAUTH_LEAVING, event);
if (err)
- goto out;
+ return err;
}
wdev->wext.connect.channel = chan;
- err = cfg80211_mgd_wext_connect(rdev, wdev);
- out:
- wdev_unlock(wdev);
- return err;
+ return cfg80211_mgd_wext_connect(rdev, wdev);
}
int cfg80211_mgd_wext_giwfreq(struct net_device *dev,
@@ -127,12 +120,10 @@ int cfg80211_mgd_wext_giwfreq(struct net_device *dev,
if (wdev->valid_links)
return -EOPNOTSUPP;
- wdev_lock(wdev);
if (wdev->links[0].client.current_bss)
chan = wdev->links[0].client.current_bss->pub.channel;
else if (wdev->wext.connect.channel)
chan = wdev->wext.connect.channel;
- wdev_unlock(wdev);
if (chan) {
freq->m = chan->center_freq;
@@ -164,17 +155,13 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
if (len > 0 && ssid[len - 1] == '\0')
len--;
- wdev_lock(wdev);
-
- err = 0;
-
if (wdev->conn) {
bool event = true;
if (wdev->wext.connect.ssid && len &&
len == wdev->wext.connect.ssid_len &&
memcmp(wdev->wext.connect.ssid, ssid, len) == 0)
- goto out;
+ return 0;
/* if SSID set now, we'll try to connect, avoid event */
if (len)
@@ -182,7 +169,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
err = cfg80211_disconnect(rdev, dev,
WLAN_REASON_DEAUTH_LEAVING, event);
if (err)
- goto out;
+ return err;
}
wdev->wext.prev_bssid_valid = false;
@@ -194,10 +181,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
wdev->wext.connect.crypto.control_port_ethertype =
cpu_to_be16(ETH_P_PAE);
- err = cfg80211_mgd_wext_connect(rdev, wdev);
- out:
- wdev_unlock(wdev);
- return err;
+ return cfg80211_mgd_wext_connect(rdev, wdev);
}
int cfg80211_mgd_wext_giwessid(struct net_device *dev,
@@ -216,7 +200,6 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
data->flags = 0;
- wdev_lock(wdev);
if (wdev->links[0].client.current_bss) {
const struct element *ssid_elem;
@@ -238,7 +221,6 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
data->length = wdev->wext.connect.ssid_len;
memcpy(ssid, wdev->wext.connect.ssid, data->length);
}
- wdev_unlock(wdev);
return ret;
}
@@ -263,23 +245,20 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev,
if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid))
bssid = NULL;
- wdev_lock(wdev);
-
if (wdev->conn) {
- err = 0;
/* both automatic */
if (!bssid && !wdev->wext.connect.bssid)
- goto out;
+ return 0;
/* fixed already - and no change */
if (wdev->wext.connect.bssid && bssid &&
ether_addr_equal(bssid, wdev->wext.connect.bssid))
- goto out;
+ return 0;
err = cfg80211_disconnect(rdev, dev,
WLAN_REASON_DEAUTH_LEAVING, false);
if (err)
- goto out;
+ return err;
}
if (bssid) {
@@ -288,10 +267,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev,
} else
wdev->wext.connect.bssid = NULL;
- err = cfg80211_mgd_wext_connect(rdev, wdev);
- out:
- wdev_unlock(wdev);
- return err;
+ return cfg80211_mgd_wext_connect(rdev, wdev);
}
int cfg80211_mgd_wext_giwap(struct net_device *dev,
@@ -306,18 +282,15 @@ int cfg80211_mgd_wext_giwap(struct net_device *dev,
ap_addr->sa_family = ARPHRD_ETHER;
- wdev_lock(wdev);
- if (wdev->valid_links) {
- wdev_unlock(wdev);
+ if (wdev->valid_links)
return -EOPNOTSUPP;
- }
+
if (wdev->links[0].client.current_bss)
memcpy(ap_addr->sa_data,
wdev->links[0].client.current_bss->pub.bssid,
ETH_ALEN);
else
eth_zero_addr(ap_addr->sa_data);
- wdev_unlock(wdev);
return 0;
}
@@ -339,7 +312,6 @@ int cfg80211_wext_siwgenie(struct net_device *dev,
ie = NULL;
wiphy_lock(wdev->wiphy);
- wdev_lock(wdev);
/* no change */
err = 0;
@@ -370,7 +342,6 @@ int cfg80211_wext_siwgenie(struct net_device *dev,
/* userspace better not think we'll reconnect */
err = 0;
out:
- wdev_unlock(wdev);
wiphy_unlock(wdev->wiphy);
return err;
}
@@ -396,7 +367,6 @@ int cfg80211_wext_siwmlme(struct net_device *dev,
return -EINVAL;
wiphy_lock(&rdev->wiphy);
- wdev_lock(wdev);
switch (mlme->cmd) {
case IW_MLME_DEAUTH:
case IW_MLME_DISASSOC:
@@ -406,7 +376,6 @@ int cfg80211_wext_siwmlme(struct net_device *dev,
err = -EOPNOTSUPP;
break;
}
- wdev_unlock(wdev);
wiphy_unlock(&rdev->wiphy);
return err;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 0fb5143bec7a..f7a7c7798c3b 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -598,7 +598,7 @@ static struct sock *x25_make_new(struct sock *osk)
x25 = x25_sk(sk);
sk->sk_type = osk->sk_type;
- sk->sk_priority = osk->sk_priority;
+ sk->sk_priority = READ_ONCE(osk->sk_priority);
sk->sk_protocol = osk->sk_protocol;
sk->sk_rcvbuf = osk->sk_rcvbuf;
sk->sk_sndbuf = osk->sk_sndbuf;
@@ -704,7 +704,7 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
rc = -EINVAL;
}
release_sock(sk);
- SOCK_DEBUG(sk, "x25_bind: socket is bound\n");
+ net_dbg_ratelimited("x25_bind: socket is bound\n");
out:
return rc;
}
@@ -1165,10 +1165,10 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
goto out;
}
- SOCK_DEBUG(sk, "x25_sendmsg: sendto: Addresses built.\n");
+ net_dbg_ratelimited("x25_sendmsg: sendto: Addresses built.\n");
/* Build a packet */
- SOCK_DEBUG(sk, "x25_sendmsg: sendto: building packet.\n");
+ net_dbg_ratelimited("x25_sendmsg: sendto: building packet.\n");
if ((msg->msg_flags & MSG_OOB) && len > 32)
len = 32;
@@ -1187,7 +1187,7 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
/*
* Put the data on the end
*/
- SOCK_DEBUG(sk, "x25_sendmsg: Copying user data\n");
+ net_dbg_ratelimited("x25_sendmsg: Copying user data\n");
skb_reset_transport_header(skb);
skb_put(skb, len);
@@ -1211,7 +1211,7 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
/*
* Push down the X.25 header
*/
- SOCK_DEBUG(sk, "x25_sendmsg: Building X.25 Header.\n");
+ net_dbg_ratelimited("x25_sendmsg: Building X.25 Header.\n");
if (msg->msg_flags & MSG_OOB) {
if (x25->neighbour->extended) {
@@ -1245,8 +1245,8 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
skb->data[0] |= X25_Q_BIT;
}
- SOCK_DEBUG(sk, "x25_sendmsg: Built header.\n");
- SOCK_DEBUG(sk, "x25_sendmsg: Transmitting buffer\n");
+ net_dbg_ratelimited("x25_sendmsg: Built header.\n");
+ net_dbg_ratelimited("x25_sendmsg: Transmitting buffer\n");
rc = -ENOTCONN;
if (sk->sk_state != TCP_ESTABLISHED)
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 8e1a49b0c0dc..6dadb217e101 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -282,7 +282,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
* They want reverse charging, we won't accept it.
*/
if ((theirs.reverse & 0x01 ) && (ours->reverse & 0x01)) {
- SOCK_DEBUG(sk, "X.25: rejecting reverse charging request\n");
+ net_dbg_ratelimited("X.25: rejecting reverse charging request\n");
return -1;
}
@@ -294,11 +294,11 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
int ours_in = ours->throughput & 0x0f;
int ours_out = ours->throughput & 0xf0;
if (!ours_in || theirs_in < ours_in) {
- SOCK_DEBUG(sk, "X.25: inbound throughput negotiated\n");
+ net_dbg_ratelimited("X.25: inbound throughput negotiated\n");
new->throughput = (new->throughput & 0xf0) | theirs_in;
}
if (!ours_out || theirs_out < ours_out) {
- SOCK_DEBUG(sk,
+ net_dbg_ratelimited(
"X.25: outbound throughput negotiated\n");
new->throughput = (new->throughput & 0x0f) | theirs_out;
}
@@ -306,22 +306,22 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
if (theirs.pacsize_in && theirs.pacsize_out) {
if (theirs.pacsize_in < ours->pacsize_in) {
- SOCK_DEBUG(sk, "X.25: packet size inwards negotiated down\n");
+ net_dbg_ratelimited("X.25: packet size inwards negotiated down\n");
new->pacsize_in = theirs.pacsize_in;
}
if (theirs.pacsize_out < ours->pacsize_out) {
- SOCK_DEBUG(sk, "X.25: packet size outwards negotiated down\n");
+ net_dbg_ratelimited("X.25: packet size outwards negotiated down\n");
new->pacsize_out = theirs.pacsize_out;
}
}
if (theirs.winsize_in && theirs.winsize_out) {
if (theirs.winsize_in < ours->winsize_in) {
- SOCK_DEBUG(sk, "X.25: window size inwards negotiated down\n");
+ net_dbg_ratelimited("X.25: window size inwards negotiated down\n");
new->winsize_in = theirs.winsize_in;
}
if (theirs.winsize_out < ours->winsize_out) {
- SOCK_DEBUG(sk, "X.25: window size outwards negotiated down\n");
+ net_dbg_ratelimited("X.25: window size outwards negotiated down\n");
new->winsize_out = theirs.winsize_out;
}
}
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index dbc0940bf35f..f8922b0e23a4 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -72,7 +72,7 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
return sent;
}
- SOCK_DEBUG(sk, "x25_output: fragment alloc"
+ net_dbg_ratelimited("x25_output: fragment alloc"
" failed, err=%d, %d bytes "
"sent\n", err, sent);
return err;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 06cead2b8e34..caa340134b0e 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -148,6 +148,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
return 0;
}
+#define XDP_UMEM_FLAGS_VALID ( \
+ XDP_UMEM_UNALIGNED_CHUNK_FLAG | \
+ XDP_UMEM_TX_SW_CSUM | \
+ 0)
+
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
@@ -167,7 +172,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
}
- if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+ if (mr->flags & ~XDP_UMEM_FLAGS_VALID)
return -EINVAL;
if (!unaligned_chunks && !is_power_of_2(chunk_size))
@@ -199,6 +204,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL;
+ if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8)
+ return -EINVAL;
+
umem->size = size;
umem->headroom = headroom;
umem->chunk_size = chunk_size;
@@ -207,6 +215,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->pgs = NULL;
umem->user = NULL;
umem->flags = mr->flags;
+ umem->tx_metadata_len = mr->tx_metadata_len;
INIT_LIST_HEAD(&umem->xsk_dma_list);
refcount_set(&umem->users, 1);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index fcfc8472f73d..1eadfac03cc4 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -33,6 +33,7 @@
#include "xsk.h"
#define TX_BATCH_SIZE 32
+#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE)
static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
@@ -166,8 +167,10 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
contd = XDP_PKT_CONTD;
err = __xsk_rcv_zc(xs, xskb, len, contd);
- if (err || likely(!frags))
- goto out;
+ if (err)
+ goto err;
+ if (likely(!frags))
+ return 0;
xskb_list = &xskb->pool->xskb_list;
list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
@@ -176,11 +179,13 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
len = pos->xdp.data_end - pos->xdp.data;
err = __xsk_rcv_zc(xs, pos, len, contd);
if (err)
- return err;
+ goto err;
list_del(&pos->xskb_list_node);
}
-out:
+ return 0;
+err:
+ xsk_buff_free(xdp);
return err;
}
@@ -391,6 +396,16 @@ void __xsk_map_flush(void)
}
}
+#ifdef CONFIG_DEBUG_NET
+bool xsk_map_check_flush(void)
+{
+ if (list_empty(this_cpu_ptr(&xskmap_flush_list)))
+ return false;
+ __xsk_map_flush();
+ return true;
+}
+#endif
+
void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
{
xskq_prod_submit_n(pool->cq, nb_entries);
@@ -413,16 +428,25 @@ EXPORT_SYMBOL(xsk_tx_release);
bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
{
+ bool budget_exhausted = false;
struct xdp_sock *xs;
rcu_read_lock();
+again:
list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
+ if (xs->tx_budget_spent >= MAX_PER_SOCKET_BUDGET) {
+ budget_exhausted = true;
+ continue;
+ }
+
if (!xskq_cons_peek_desc(xs->tx, desc, pool)) {
if (xskq_has_descs(xs->tx))
xskq_cons_release(xs->tx);
continue;
}
+ xs->tx_budget_spent++;
+
/* This is the backpressure mechanism for the Tx path.
* Reserve space in the completion queue and only proceed
* if there is space in it. This avoids having to implement
@@ -436,6 +460,14 @@ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
return true;
}
+ if (budget_exhausted) {
+ list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list)
+ xs->tx_budget_spent = 0;
+
+ budget_exhausted = false;
+ goto again;
+ }
+
out:
rcu_read_unlock();
return false;
@@ -543,6 +575,13 @@ static u32 xsk_get_num_desc(struct sk_buff *skb)
static void xsk_destruct_skb(struct sk_buff *skb)
{
+ struct xsk_tx_metadata_compl *compl = &skb_shinfo(skb)->xsk_meta;
+
+ if (compl->tx_timestamp) {
+ /* sw completion timestamp, not a real one */
+ *compl->tx_timestamp = ktime_get_tai_fast_ns();
+ }
+
xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
sock_wfree(skb);
}
@@ -602,7 +641,7 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) {
if (unlikely(i >= MAX_SKB_FRAGS))
- return ERR_PTR(-EFAULT);
+ return ERR_PTR(-EOVERFLOW);
page = pool->umem->pgs[addr >> PAGE_SHIFT];
get_page(page);
@@ -627,8 +666,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
struct xdp_desc *desc)
{
+ struct xsk_tx_metadata *meta = NULL;
struct net_device *dev = xs->dev;
struct sk_buff *skb = xs->skb;
+ bool first_frag = false;
int err;
if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
@@ -655,15 +696,19 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb_put(skb, len);
err = skb_store_bits(skb, 0, buffer, len);
- if (unlikely(err))
+ if (unlikely(err)) {
+ kfree_skb(skb);
goto free_err;
+ }
+
+ first_frag = true;
} else {
int nr_frags = skb_shinfo(skb)->nr_frags;
struct page *page;
u8 *vaddr;
if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
- err = -EFAULT;
+ err = -EOVERFLOW;
goto free_err;
}
@@ -679,23 +724,58 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
}
+
+ if (first_frag && desc->options & XDP_TX_METADATA) {
+ if (unlikely(xs->pool->tx_metadata_len == 0)) {
+ err = -EINVAL;
+ goto free_err;
+ }
+
+ meta = buffer - xs->pool->tx_metadata_len;
+ if (unlikely(!xsk_buff_valid_tx_metadata(meta))) {
+ err = -EINVAL;
+ goto free_err;
+ }
+
+ if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) {
+ if (unlikely(meta->request.csum_start +
+ meta->request.csum_offset +
+ sizeof(__sum16) > len)) {
+ err = -EINVAL;
+ goto free_err;
+ }
+
+ skb->csum_start = hr + meta->request.csum_start;
+ skb->csum_offset = meta->request.csum_offset;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
+ if (unlikely(xs->pool->tx_sw_csum)) {
+ err = skb_checksum_help(skb);
+ if (err)
+ goto free_err;
+ }
+ }
+ }
}
skb->dev = dev;
- skb->priority = xs->sk.sk_priority;
+ skb->priority = READ_ONCE(xs->sk.sk_priority);
skb->mark = READ_ONCE(xs->sk.sk_mark);
skb->destructor = xsk_destruct_skb;
+ xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta);
xsk_set_destructor_arg(skb);
return skb;
free_err:
- if (err == -EAGAIN) {
- xsk_cq_cancel_locked(xs, 1);
- } else {
- xsk_set_destructor_arg(skb);
- xsk_drop_skb(skb);
+ if (err == -EOVERFLOW) {
+ /* Drop the packet */
+ xsk_set_destructor_arg(xs->skb);
+ xsk_drop_skb(xs->skb);
xskq_cons_release(xs->tx);
+ } else {
+ /* Let application retry */
+ xsk_cq_cancel_locked(xs, 1);
}
return ERR_PTR(err);
@@ -738,7 +818,7 @@ static int __xsk_generic_xmit(struct sock *sk)
skb = xsk_build_skb(xs, &desc);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
- if (err == -EAGAIN)
+ if (err != -EOVERFLOW)
goto out;
err = 0;
continue;
@@ -915,7 +995,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
rcu_read_lock();
if (xsk_check_common(xs))
- goto skip_tx;
+ goto out;
pool = xs->pool;
@@ -927,12 +1007,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
xsk_generic_xmit(sk);
}
-skip_tx:
if (xs->rx && !xskq_prod_is_empty(xs->rx))
mask |= EPOLLIN | EPOLLRDNORM;
if (xs->tx && xsk_tx_writeable(xs))
mask |= EPOLLOUT | EPOLLWRNORM;
-
+out:
rcu_read_unlock();
return mask;
}
@@ -1224,7 +1303,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->dev = dev;
xs->zc = xs->umem->zc;
- xs->sg = !!(flags & XDP_USE_SG);
+ xs->sg = !!(xs->umem->flags & XDP_UMEM_SG_FLAG);
xs->queue_id = qid;
xp_add_xsk(xs->pool, xs);
@@ -1251,6 +1330,14 @@ struct xdp_umem_reg_v1 {
__u32 headroom;
};
+struct xdp_umem_reg_v2 {
+ __u64 addr; /* Start of packet data area */
+ __u64 len; /* Length of packet data area */
+ __u32 chunk_size;
+ __u32 headroom;
+ __u32 flags;
+};
+
static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -1294,8 +1381,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(struct xdp_umem_reg_v1))
return -EINVAL;
- else if (optlen < sizeof(mr))
+ else if (optlen < sizeof(struct xdp_umem_reg_v2))
mr_size = sizeof(struct xdp_umem_reg_v1);
+ else if (optlen < sizeof(mr))
+ mr_size = sizeof(struct xdp_umem_reg_v2);
if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index b3f7b310811e..ce60ecd48a4d 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -85,6 +85,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
XDP_PACKET_HEADROOM;
pool->umem = umem;
pool->addrs = umem->addrs;
+ pool->tx_metadata_len = umem->tx_metadata_len;
+ pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM;
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
@@ -123,6 +125,18 @@ void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq)
}
EXPORT_SYMBOL(xp_set_rxq_info);
+void xp_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc)
+{
+ u32 i;
+
+ for (i = 0; i < pool->heads_cnt; i++) {
+ struct xdp_buff_xsk *xskb = &pool->heads[i];
+
+ memcpy(xskb->cb + desc->off, desc->src, desc->bytes);
+ }
+}
+EXPORT_SYMBOL(xp_fill_cb);
+
static void xp_disable_drv_zc(struct xsk_buff_pool *pool)
{
struct netdev_bpf bpf;
@@ -170,6 +184,9 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
if (err)
return err;
+ if (flags & XDP_USE_SG)
+ pool->umem->flags |= XDP_UMEM_SG_FLAG;
+
if (flags & XDP_USE_NEED_WAKEUP)
pool->uses_need_wakeup = true;
/* Tx needs to be explicitly woken up the first time. Also
@@ -538,6 +555,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
xskb->xdp.data_meta = xskb->xdp.data;
+ xskb->xdp.flags = 0;
if (pool->dma_need_sync) {
dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index c014217f5fa7..9f8955367275 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -111,6 +111,9 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
sock_diag_save_cookie(sk, msg->xdiag_cookie);
mutex_lock(&xs->mutex);
+ if (READ_ONCE(xs->state) == XSK_UNBOUND)
+ goto out_nlmsg_trim;
+
if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
goto out_nlmsg_trim;
@@ -208,4 +211,5 @@ static void __exit xsk_diag_exit(void)
module_init(xsk_diag_init);
module_exit(xsk_diag_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("XDP socket monitoring via SOCK_DIAG");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index f8905400ee07..d2c264030017 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
@@ -34,6 +34,16 @@ struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
q->ring_mask = nentries - 1;
size = xskq_get_ring_size(q, umem_queue);
+
+ /* size which is overflowing or close to SIZE_MAX will become 0 in
+ * PAGE_ALIGN(), checking SIZE_MAX is enough due to the previous
+ * is_power_of_2(), the rest will be handled by vmalloc_user()
+ */
+ if (unlikely(size == SIZE_MAX)) {
+ kfree(q);
+ return NULL;
+ }
+
size = PAGE_ALIGN(size);
q->ring = vmalloc_user(size);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 13354a1e4280..6f2d1621c992 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -137,21 +137,23 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
static inline bool xp_unused_options_set(u32 options)
{
- return options & ~XDP_PKT_CONTD;
+ return options & ~(XDP_PKT_CONTD | XDP_TX_METADATA);
}
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 offset = desc->addr & (pool->chunk_size - 1);
+ u64 addr = desc->addr - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
+ u64 offset = addr & (pool->chunk_size - 1);
if (!desc->len)
return false;
- if (offset + desc->len > pool->chunk_size)
+ if (offset + len > pool->chunk_size)
return false;
- if (desc->addr >= pool->addrs_cnt)
+ if (addr >= pool->addrs_cnt)
return false;
if (xp_unused_options_set(desc->options))
@@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+ u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
if (!desc->len)
return false;
- if (desc->len > pool->chunk_size)
+ if (len > pool->chunk_size)
return false;
- if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt ||
- xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
+ if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt ||
+ xp_desc_crosses_non_contig_pg(pool, addr, len))
return false;
if (xp_unused_options_set(desc->options))
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 3adf31a83a79..d7b16f2c23e9 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -15,6 +15,7 @@ config XFRM_ALGO
tristate
select XFRM
select CRYPTO
+ select CRYPTO_AEAD
select CRYPTO_HASH
select CRYPTO_SKCIPHER
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index cd47f88921f5..547cec77ba03 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o
obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
+obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 094734fbec96..41533c631431 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -5,6 +5,7 @@
* Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
*/
+#include <crypto/aead.h>
#include <crypto/hash.h>
#include <crypto/skcipher.h>
#include <linux/module.h>
@@ -644,38 +645,33 @@ static inline int calg_entries(void)
}
struct xfrm_algo_list {
+ int (*find)(const char *name, u32 type, u32 mask);
struct xfrm_algo_desc *algs;
int entries;
- u32 type;
- u32 mask;
};
static const struct xfrm_algo_list xfrm_aead_list = {
+ .find = crypto_has_aead,
.algs = aead_list,
.entries = ARRAY_SIZE(aead_list),
- .type = CRYPTO_ALG_TYPE_AEAD,
- .mask = CRYPTO_ALG_TYPE_MASK,
};
static const struct xfrm_algo_list xfrm_aalg_list = {
+ .find = crypto_has_ahash,
.algs = aalg_list,
.entries = ARRAY_SIZE(aalg_list),
- .type = CRYPTO_ALG_TYPE_HASH,
- .mask = CRYPTO_ALG_TYPE_HASH_MASK,
};
static const struct xfrm_algo_list xfrm_ealg_list = {
+ .find = crypto_has_skcipher,
.algs = ealg_list,
.entries = ARRAY_SIZE(ealg_list),
- .type = CRYPTO_ALG_TYPE_SKCIPHER,
- .mask = CRYPTO_ALG_TYPE_MASK,
};
static const struct xfrm_algo_list xfrm_calg_list = {
+ .find = crypto_has_comp,
.algs = calg_list,
.entries = ARRAY_SIZE(calg_list),
- .type = CRYPTO_ALG_TYPE_COMPRESS,
- .mask = CRYPTO_ALG_TYPE_MASK,
};
static struct xfrm_algo_desc *xfrm_find_algo(
@@ -696,8 +692,7 @@ static struct xfrm_algo_desc *xfrm_find_algo(
if (!probe)
break;
- status = crypto_has_alg(list[i].name, algo_list->type,
- algo_list->mask);
+ status = algo_list->find(list[i].name, 0, 0);
if (!status)
break;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index d5ee96789d4b..bd4ce21d76d7 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -462,7 +462,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
struct xfrm_offload *xo = xfrm_offload(skb);
struct sec_path *sp;
- if (encap_type < 0) {
+ if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) {
x = xfrm_input_state(skb);
if (unlikely(x->km.state != XFRM_STATE_VALID)) {
@@ -485,9 +485,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
seq = XFRM_SKB_CB(skb)->seq.input.low;
goto resume;
}
-
- /* encap_type < -1 indicates a GRO call. */
- encap_type = 0;
+ /* GRO call */
seq = XFRM_SPI_SKB_CB(skb)->seq;
if (xo && (xo->flags & CRYPTO_DONE)) {
diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c
index d74f3fd20f2b..7d5e920141e9 100644
--- a/net/xfrm/xfrm_interface_bpf.c
+++ b/net/xfrm/xfrm_interface_bpf.c
@@ -27,9 +27,7 @@ struct bpf_xfrm_info {
int link;
};
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
- "Global functions as their definitions will be in xfrm_interface BTF");
+__bpf_kfunc_start_defs();
/* bpf_skb_get_xfrm_info - Get XFRM metadata
*
@@ -93,7 +91,7 @@ __bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bp
return 0;
}
-__diag_pop()
+__bpf_kfunc_end_defs();
BTF_SET8_START(xfrm_ifc_kfunc_set)
BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info)
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index b86474084690..21d50d75c260 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -380,8 +380,8 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
skb->dev = dev;
if (err) {
- dev->stats.rx_errors++;
- dev->stats.rx_dropped++;
+ DEV_STATS_INC(dev, rx_errors);
+ DEV_STATS_INC(dev, rx_dropped);
return 0;
}
@@ -426,7 +426,6 @@ static int
xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
{
struct xfrm_if *xi = netdev_priv(dev);
- struct net_device_stats *stats = &xi->dev->stats;
struct dst_entry *dst = skb_dst(skb);
unsigned int length = skb->len;
struct net_device *tdev;
@@ -473,7 +472,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
tdev = dst->dev;
if (tdev == dev) {
- stats->collisions++;
+ DEV_STATS_INC(dev, collisions);
net_warn_ratelimited("%s: Local routing loop detected!\n",
dev->name);
goto tx_err_dst_release;
@@ -512,13 +511,13 @@ xmit:
if (net_xmit_eval(err) == 0) {
dev_sw_netstats_tx_add(dev, 1, length);
} else {
- stats->tx_errors++;
- stats->tx_aborted_errors++;
+ DEV_STATS_INC(dev, tx_errors);
+ DEV_STATS_INC(dev, tx_aborted_errors);
}
return 0;
tx_err_link_failure:
- stats->tx_carrier_errors++;
+ DEV_STATS_INC(dev, tx_carrier_errors);
dst_link_failure(skb);
tx_err_dst_release:
dst_release(dst);
@@ -528,7 +527,6 @@ tx_err_dst_release:
static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
- struct net_device_stats *stats = &xi->dev->stats;
struct dst_entry *dst = skb_dst(skb);
struct flowi fl;
int ret;
@@ -538,14 +536,14 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IPV6):
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
- xfrm_decode_session(skb, &fl, AF_INET6);
+ xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
if (!dst) {
fl.u.ip6.flowi6_oif = dev->ifindex;
fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6);
if (dst->error) {
dst_release(dst);
- stats->tx_carrier_errors++;
+ DEV_STATS_INC(dev, tx_carrier_errors);
goto tx_err;
}
skb_dst_set(skb, dst);
@@ -553,7 +551,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
break;
case htons(ETH_P_IP):
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- xfrm_decode_session(skb, &fl, AF_INET);
+ xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
if (!dst) {
struct rtable *rt;
@@ -561,7 +559,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4);
if (IS_ERR(rt)) {
- stats->tx_carrier_errors++;
+ DEV_STATS_INC(dev, tx_carrier_errors);
goto tx_err;
}
skb_dst_set(skb, &rt->dst);
@@ -580,8 +578,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
tx_err:
- stats->tx_errors++;
- stats->tx_dropped++;
+ DEV_STATS_INC(dev, tx_errors);
+ DEV_STATS_INC(dev, tx_dropped);
kfree_skb(skb);
return NETDEV_TX_OK;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d6b405782b63..1b7e75159727 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -149,6 +149,21 @@ struct xfrm_pol_inexact_candidates {
struct hlist_head *res[XFRM_POL_CAND_MAX];
};
+struct xfrm_flow_keys {
+ struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_control control;
+ union {
+ struct flow_dissector_key_ipv4_addrs ipv4;
+ struct flow_dissector_key_ipv6_addrs ipv6;
+ } addrs;
+ struct flow_dissector_key_ip ip;
+ struct flow_dissector_key_icmp icmp;
+ struct flow_dissector_key_ports ports;
+ struct flow_dissector_key_keyid gre;
+};
+
+static struct flow_dissector xfrm_session_dissector __ro_after_init;
+
static DEFINE_SPINLOCK(xfrm_if_cb_lock);
static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
@@ -851,7 +866,7 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net,
struct hlist_node *newpos = NULL;
bool matches_s, matches_d;
- if (!policy->bydst_reinsert)
+ if (policy->walk.dead || !policy->bydst_reinsert)
continue;
WARN_ON_ONCE(policy->family != family);
@@ -1256,8 +1271,11 @@ static void xfrm_hash_rebuild(struct work_struct *work)
struct xfrm_pol_inexact_bin *bin;
u8 dbits, sbits;
+ if (policy->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(policy->index);
- if (policy->walk.dead || dir >= XFRM_POLICY_MAX)
+ if (dir >= XFRM_POLICY_MAX)
continue;
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
@@ -1372,8 +1390,6 @@ EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
* of an absolute inpredictability of ordering of rules. This will not pass. */
static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
{
- static u32 idx_generator;
-
for (;;) {
struct hlist_head *list;
struct xfrm_policy *p;
@@ -1381,8 +1397,8 @@ static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
int found;
if (!index) {
- idx = (idx_generator | dir);
- idx_generator += 8;
+ idx = (net->xfrm.idx_generator | dir);
+ net->xfrm.idx_generator += 8;
} else {
idx = index;
index = 0;
@@ -1823,9 +1839,11 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
again:
list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(pol->index);
- if (pol->walk.dead ||
- dir >= XFRM_POLICY_MAX ||
+ if (dir >= XFRM_POLICY_MAX ||
pol->type != type)
continue;
@@ -1862,9 +1880,11 @@ int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
again:
list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(pol->index);
- if (pol->walk.dead ||
- dir >= XFRM_POLICY_MAX ||
+ if (dir >= XFRM_POLICY_MAX ||
pol->xdo.dev != dev)
continue;
@@ -2561,7 +2581,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
default:
BUG();
}
- xdst = dst_alloc(dst_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
+ xdst = dst_alloc(dst_ops, NULL, DST_OBSOLETE_NONE, 0);
if (likely(xdst)) {
memset_after(xdst, 0, u.dst);
@@ -2853,7 +2873,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
/* Fixup the mark to support VTI. */
skb_mark = skb->mark;
skb->mark = pol->mark.v;
- xfrm_decode_session(skb, &fl, dst->ops->family);
+ xfrm_decode_session(net, skb, &fl, dst->ops->family);
skb->mark = skb_mark;
spin_unlock(&pq->hold_queue.lock);
@@ -2889,7 +2909,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
/* Fixup the mark to support VTI. */
skb_mark = skb->mark;
skb->mark = pol->mark.v;
- xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
+ xfrm_decode_session(net, skb, &fl, skb_dst(skb)->ops->family);
skb->mark = skb_mark;
dst_hold(xfrm_dst_path(skb_dst(skb)));
@@ -3215,7 +3235,7 @@ no_transform:
}
for (i = 0; i < num_pols; i++)
- pols[i]->curlft.use_time = ktime_get_real_seconds();
+ WRITE_ONCE(pols[i]->curlft.use_time, ktime_get_real_seconds());
if (num_xfrms < 0) {
/* Prohibit the flow */
@@ -3367,209 +3387,106 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star
}
static void
-decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
+decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse)
{
- const struct iphdr *iph = ip_hdr(skb);
- int ihl = iph->ihl;
- u8 *xprth = skb_network_header(skb) + ihl * 4;
struct flowi4 *fl4 = &fl->u.ip4;
- int oif = 0;
-
- if (skb_dst(skb) && skb_dst(skb)->dev)
- oif = skb_dst(skb)->dev->ifindex;
memset(fl4, 0, sizeof(struct flowi4));
- fl4->flowi4_mark = skb->mark;
- fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
-
- fl4->flowi4_proto = iph->protocol;
- fl4->daddr = reverse ? iph->saddr : iph->daddr;
- fl4->saddr = reverse ? iph->daddr : iph->saddr;
- fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK;
-
- if (!ip_is_fragment(iph)) {
- switch (iph->protocol) {
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- case IPPROTO_TCP:
- case IPPROTO_SCTP:
- case IPPROTO_DCCP:
- if (xprth + 4 < skb->data ||
- pskb_may_pull(skb, xprth + 4 - skb->data)) {
- __be16 *ports;
-
- xprth = skb_network_header(skb) + ihl * 4;
- ports = (__be16 *)xprth;
-
- fl4->fl4_sport = ports[!!reverse];
- fl4->fl4_dport = ports[!reverse];
- }
- break;
- case IPPROTO_ICMP:
- if (xprth + 2 < skb->data ||
- pskb_may_pull(skb, xprth + 2 - skb->data)) {
- u8 *icmp;
- xprth = skb_network_header(skb) + ihl * 4;
- icmp = xprth;
+ if (reverse) {
+ fl4->saddr = flkeys->addrs.ipv4.dst;
+ fl4->daddr = flkeys->addrs.ipv4.src;
+ fl4->fl4_sport = flkeys->ports.dst;
+ fl4->fl4_dport = flkeys->ports.src;
+ } else {
+ fl4->saddr = flkeys->addrs.ipv4.src;
+ fl4->daddr = flkeys->addrs.ipv4.dst;
+ fl4->fl4_sport = flkeys->ports.src;
+ fl4->fl4_dport = flkeys->ports.dst;
+ }
- fl4->fl4_icmp_type = icmp[0];
- fl4->fl4_icmp_code = icmp[1];
- }
- break;
- case IPPROTO_GRE:
- if (xprth + 12 < skb->data ||
- pskb_may_pull(skb, xprth + 12 - skb->data)) {
- __be16 *greflags;
- __be32 *gre_hdr;
-
- xprth = skb_network_header(skb) + ihl * 4;
- greflags = (__be16 *)xprth;
- gre_hdr = (__be32 *)xprth;
-
- if (greflags[0] & GRE_KEY) {
- if (greflags[0] & GRE_CSUM)
- gre_hdr++;
- fl4->fl4_gre_key = gre_hdr[1];
- }
- }
- break;
- default:
- break;
- }
+ switch (flkeys->basic.ip_proto) {
+ case IPPROTO_GRE:
+ fl4->fl4_gre_key = flkeys->gre.keyid;
+ break;
+ case IPPROTO_ICMP:
+ fl4->fl4_icmp_type = flkeys->icmp.type;
+ fl4->fl4_icmp_code = flkeys->icmp.code;
+ break;
}
+
+ fl4->flowi4_proto = flkeys->basic.ip_proto;
+ fl4->flowi4_tos = flkeys->ip.tos;
}
#if IS_ENABLED(CONFIG_IPV6)
static void
-decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse)
+decode_session6(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse)
{
struct flowi6 *fl6 = &fl->u.ip6;
- int onlyproto = 0;
- const struct ipv6hdr *hdr = ipv6_hdr(skb);
- u32 offset = sizeof(*hdr);
- struct ipv6_opt_hdr *exthdr;
- const unsigned char *nh = skb_network_header(skb);
- u16 nhoff = IP6CB(skb)->nhoff;
- int oif = 0;
- u8 nexthdr;
-
- if (!nhoff)
- nhoff = offsetof(struct ipv6hdr, nexthdr);
-
- nexthdr = nh[nhoff];
-
- if (skb_dst(skb) && skb_dst(skb)->dev)
- oif = skb_dst(skb)->dev->ifindex;
memset(fl6, 0, sizeof(struct flowi6));
- fl6->flowi6_mark = skb->mark;
- fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
-
- fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
- fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
-
- while (nh + offset + sizeof(*exthdr) < skb->data ||
- pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
- nh = skb_network_header(skb);
- exthdr = (struct ipv6_opt_hdr *)(nh + offset);
-
- switch (nexthdr) {
- case NEXTHDR_FRAGMENT:
- onlyproto = 1;
- fallthrough;
- case NEXTHDR_ROUTING:
- case NEXTHDR_HOP:
- case NEXTHDR_DEST:
- offset += ipv6_optlen(exthdr);
- nexthdr = exthdr->nexthdr;
- break;
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- case IPPROTO_TCP:
- case IPPROTO_SCTP:
- case IPPROTO_DCCP:
- if (!onlyproto && (nh + offset + 4 < skb->data ||
- pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
- __be16 *ports;
-
- nh = skb_network_header(skb);
- ports = (__be16 *)(nh + offset);
- fl6->fl6_sport = ports[!!reverse];
- fl6->fl6_dport = ports[!reverse];
- }
- fl6->flowi6_proto = nexthdr;
- return;
- case IPPROTO_ICMPV6:
- if (!onlyproto && (nh + offset + 2 < skb->data ||
- pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
- u8 *icmp;
-
- nh = skb_network_header(skb);
- icmp = (u8 *)(nh + offset);
- fl6->fl6_icmp_type = icmp[0];
- fl6->fl6_icmp_code = icmp[1];
- }
- fl6->flowi6_proto = nexthdr;
- return;
- case IPPROTO_GRE:
- if (!onlyproto &&
- (nh + offset + 12 < skb->data ||
- pskb_may_pull(skb, nh + offset + 12 - skb->data))) {
- struct gre_base_hdr *gre_hdr;
- __be32 *gre_key;
-
- nh = skb_network_header(skb);
- gre_hdr = (struct gre_base_hdr *)(nh + offset);
- gre_key = (__be32 *)(gre_hdr + 1);
-
- if (gre_hdr->flags & GRE_KEY) {
- if (gre_hdr->flags & GRE_CSUM)
- gre_key++;
- fl6->fl6_gre_key = *gre_key;
- }
- }
- fl6->flowi6_proto = nexthdr;
- return;
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
- case IPPROTO_MH:
- offset += ipv6_optlen(exthdr);
- if (!onlyproto && (nh + offset + 3 < skb->data ||
- pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
- struct ip6_mh *mh;
-
- nh = skb_network_header(skb);
- mh = (struct ip6_mh *)(nh + offset);
- fl6->fl6_mh_type = mh->ip6mh_type;
- }
- fl6->flowi6_proto = nexthdr;
- return;
-#endif
- default:
- fl6->flowi6_proto = nexthdr;
- return;
- }
+ if (reverse) {
+ fl6->saddr = flkeys->addrs.ipv6.dst;
+ fl6->daddr = flkeys->addrs.ipv6.src;
+ fl6->fl6_sport = flkeys->ports.dst;
+ fl6->fl6_dport = flkeys->ports.src;
+ } else {
+ fl6->saddr = flkeys->addrs.ipv6.src;
+ fl6->daddr = flkeys->addrs.ipv6.dst;
+ fl6->fl6_sport = flkeys->ports.src;
+ fl6->fl6_dport = flkeys->ports.dst;
+ }
+
+ switch (flkeys->basic.ip_proto) {
+ case IPPROTO_GRE:
+ fl6->fl6_gre_key = flkeys->gre.keyid;
+ break;
+ case IPPROTO_ICMPV6:
+ fl6->fl6_icmp_type = flkeys->icmp.type;
+ fl6->fl6_icmp_code = flkeys->icmp.code;
+ break;
}
+
+ fl6->flowi6_proto = flkeys->basic.ip_proto;
}
#endif
-int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
+int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family, int reverse)
{
+ struct xfrm_flow_keys flkeys;
+
+ memset(&flkeys, 0, sizeof(flkeys));
+ __skb_flow_dissect(net, skb, &xfrm_session_dissector, &flkeys,
+ NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
+
switch (family) {
case AF_INET:
- decode_session4(skb, fl, reverse);
+ decode_session4(&flkeys, fl, reverse);
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- decode_session6(skb, fl, reverse);
+ decode_session6(&flkeys, fl, reverse);
break;
#endif
default:
return -EAFNOSUPPORT;
}
+ fl->flowi_mark = skb->mark;
+ if (reverse) {
+ fl->flowi_oif = skb->skb_iif;
+ } else {
+ int oif = 0;
+
+ if (skb_dst(skb) && skb_dst(skb)->dev)
+ oif = skb_dst(skb)->dev->ifindex;
+
+ fl->flowi_oif = oif;
+ }
+
return security_xfrm_decode_session(skb, &fl->flowi_secid);
}
EXPORT_SYMBOL(__xfrm_decode_session);
@@ -3618,7 +3535,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
reverse = dir & ~XFRM_POLICY_MASK;
dir &= XFRM_POLICY_MASK;
- if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
+ if (__xfrm_decode_session(net, skb, &fl, family, reverse) < 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
return 0;
}
@@ -3774,7 +3691,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
struct dst_entry *dst;
int res = 1;
- if (xfrm_decode_session(skb, &fl, family) < 0) {
+ if (xfrm_decode_session(net, skb, &fl, family) < 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
return 0;
}
@@ -4253,8 +4170,47 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
.exit = xfrm_net_exit,
};
+static const struct flow_dissector_key xfrm_flow_dissector_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+ .offset = offsetof(struct xfrm_flow_keys, control),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_BASIC,
+ .offset = offsetof(struct xfrm_flow_keys, basic),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ .offset = offsetof(struct xfrm_flow_keys, addrs.ipv4),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ .offset = offsetof(struct xfrm_flow_keys, addrs.ipv6),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_PORTS,
+ .offset = offsetof(struct xfrm_flow_keys, ports),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
+ .offset = offsetof(struct xfrm_flow_keys, gre),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IP,
+ .offset = offsetof(struct xfrm_flow_keys, ip),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_ICMP,
+ .offset = offsetof(struct xfrm_flow_keys, icmp),
+ },
+};
+
void __init xfrm_init(void)
{
+ skb_flow_dissector_init(&xfrm_session_dissector,
+ xfrm_flow_dissector_keys,
+ ARRAY_SIZE(xfrm_flow_dissector_keys));
+
register_pernet_subsys(&xfrm_net_ops);
xfrm_dev_init();
xfrm_input_init();
@@ -4262,6 +4218,8 @@ void __init xfrm_init(void)
#ifdef CONFIG_XFRM_ESPINTCP
espintcp_init();
#endif
+
+ register_xfrm_state_bpf();
}
#ifdef CONFIG_AUDITSYSCALL
diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c
new file mode 100644
index 000000000000..9e20d4a377f7
--- /dev/null
+++ b/net/xfrm/xfrm_state_bpf.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable XFRM state BPF helpers.
+ *
+ * Note that it is allowed to break compatibility for these functions since the
+ * interface they are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
+#include <net/xdp.h>
+#include <net/xfrm.h>
+
+/* bpf_xfrm_state_opts - Options for XFRM state lookup helpers
+ *
+ * Members:
+ * @error - Out parameter, set for any errors encountered
+ * Values:
+ * -EINVAL - netns_id is less than -1
+ * -EINVAL - opts__sz isn't BPF_XFRM_STATE_OPTS_SZ
+ * -ENONET - No network namespace found for netns_id
+ * -ENOENT - No xfrm_state found
+ * @netns_id - Specify the network namespace for lookup
+ * Values:
+ * BPF_F_CURRENT_NETNS (-1)
+ * Use namespace associated with ctx
+ * [0, S32_MAX]
+ * Network Namespace ID
+ * @mark - XFRM mark to match on
+ * @daddr - Destination address to match on
+ * @spi - Security parameter index to match on
+ * @proto - IP protocol to match on (eg. IPPROTO_ESP)
+ * @family - Protocol family to match on (AF_INET/AF_INET6)
+ */
+struct bpf_xfrm_state_opts {
+ s32 error;
+ s32 netns_id;
+ u32 mark;
+ xfrm_address_t daddr;
+ __be32 spi;
+ u8 proto;
+ u16 family;
+};
+
+enum {
+ BPF_XFRM_STATE_OPTS_SZ = sizeof(struct bpf_xfrm_state_opts),
+};
+
+__bpf_kfunc_start_defs();
+
+/* bpf_xdp_get_xfrm_state - Get XFRM state
+ *
+ * A `struct xfrm_state *`, if found, must be released with a corresponding
+ * bpf_xdp_xfrm_state_release.
+ *
+ * Parameters:
+ * @ctx - Pointer to ctx (xdp_md) in XDP program
+ * Cannot be NULL
+ * @opts - Options for lookup (documented above)
+ * Cannot be NULL
+ * @opts__sz - Length of the bpf_xfrm_state_opts structure
+ * Must be BPF_XFRM_STATE_OPTS_SZ
+ */
+__bpf_kfunc struct xfrm_state *
+bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, u32 opts__sz)
+{
+ struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+ struct net *net = dev_net(xdp->rxq->dev);
+ struct xfrm_state *x;
+
+ if (!opts || opts__sz < sizeof(opts->error))
+ return NULL;
+
+ if (opts__sz != BPF_XFRM_STATE_OPTS_SZ) {
+ opts->error = -EINVAL;
+ return NULL;
+ }
+
+ if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) {
+ opts->error = -EINVAL;
+ return NULL;
+ }
+
+ if (opts->netns_id >= 0) {
+ net = get_net_ns_by_id(net, opts->netns_id);
+ if (unlikely(!net)) {
+ opts->error = -ENONET;
+ return NULL;
+ }
+ }
+
+ x = xfrm_state_lookup(net, opts->mark, &opts->daddr, opts->spi,
+ opts->proto, opts->family);
+
+ if (opts->netns_id >= 0)
+ put_net(net);
+ if (!x)
+ opts->error = -ENOENT;
+
+ return x;
+}
+
+/* bpf_xdp_xfrm_state_release - Release acquired xfrm_state object
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
+ * the program if any references remain in the program in all of the explored
+ * states.
+ *
+ * Parameters:
+ * @x - Pointer to referenced xfrm_state object, obtained using
+ * bpf_xdp_get_xfrm_state.
+ */
+__bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x)
+{
+ xfrm_state_put(x);
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_SET8_START(xfrm_state_kfunc_set)
+BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE)
+BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE)
+BTF_SET8_END(xfrm_state_kfunc_set)
+
+static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &xfrm_state_kfunc_set,
+};
+
+int __init register_xfrm_state_bpf(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP,
+ &xfrm_state_xdp_kfunc_set);
+}