summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/6lowpan_i.h1
-rw-r--r--net/6lowpan/Makefile1
-rw-r--r--net/6lowpan/nhc.h1
-rw-r--r--net/802/Makefile1
-rw-r--r--net/8021q/Makefile1
-rw-r--r--net/8021q/vlan.c15
-rw-r--r--net/8021q/vlan.h3
-rw-r--r--net/8021q/vlan_core.c7
-rw-r--r--net/8021q/vlan_netlink.c3
-rw-r--r--net/8021q/vlanproc.h1
-rw-r--r--net/9p/Makefile1
-rw-r--r--net/9p/trans_fd.c14
-rw-r--r--net/Kconfig21
-rw-r--r--net/Makefile3
-rw-r--r--net/appletalk/dev.c1
-rw-r--r--net/appletalk/sysctl_net_atalk.c1
-rw-r--r--net/atm/Makefile1
-rw-r--r--net/atm/addr.c1
-rw-r--r--net/atm/addr.h1
-rw-r--r--net/atm/atm_misc.c1
-rw-r--r--net/atm/atm_sysfs.c1
-rw-r--r--net/atm/clip.c6
-rw-r--r--net/atm/common.h1
-rw-r--r--net/atm/ioctl.c1
-rw-r--r--net/atm/lec.c21
-rw-r--r--net/atm/lec.h1
-rw-r--r--net/atm/lec_arpc.h1
-rw-r--r--net/atm/mpc.c15
-rw-r--r--net/atm/mpc.h1
-rw-r--r--net/atm/mpoa_caches.c1
-rw-r--r--net/atm/mpoa_caches.h1
-rw-r--r--net/atm/mpoa_proc.c1
-rw-r--r--net/atm/proc.c1
-rw-r--r--net/atm/protocols.h1
-rw-r--r--net/atm/pvc.c1
-rw-r--r--net/atm/raw.c1
-rw-r--r--net/atm/resources.c1
-rw-r--r--net/atm/resources.h1
-rw-r--r--net/atm/signaling.c3
-rw-r--r--net/atm/signaling.h1
-rw-r--r--net/atm/svc.c1
-rw-r--r--net/ax25/Makefile1
-rw-r--r--net/ax25/af_ax25.c7
-rw-r--r--net/ax25/ax25_ds_timer.c9
-rw-r--r--net/ax25/ax25_timer.c41
-rw-r--r--net/batman-adv/bat_iv_ogm.c30
-rw-r--r--net/batman-adv/bat_v.c11
-rw-r--r--net/batman-adv/bat_v_elp.c6
-rw-r--r--net/batman-adv/bat_v_ogm.c28
-rw-r--r--net/batman-adv/distributed-arp-table.c6
-rw-r--r--net/batman-adv/gateway_client.c8
-rw-r--r--net/batman-adv/gateway_common.c18
-rw-r--r--net/batman-adv/hard-interface.c14
-rw-r--r--net/batman-adv/icmp_socket.c4
-rw-r--r--net/batman-adv/main.c12
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c2
-rw-r--r--net/batman-adv/originator.c26
-rw-r--r--net/batman-adv/routing.c6
-rw-r--r--net/batman-adv/send.c8
-rw-r--r--net/batman-adv/soft-interface.c10
-rw-r--r--net/batman-adv/sysfs.c4
-rw-r--r--net/batman-adv/tp_meter.c2
-rw-r--r--net/bluetooth/6lowpan.c9
-rw-r--r--net/bluetooth/Kconfig12
-rw-r--r--net/bluetooth/Makefile1
-rw-r--r--net/bluetooth/a2mp.c2
-rw-r--r--net/bluetooth/amp.c4
-rw-r--r--net/bluetooth/ecdh_helper.c228
-rw-r--r--net/bluetooth/ecdh_helper.h9
-rw-r--r--net/bluetooth/hci_conn.c6
-rw-r--r--net/bluetooth/hci_core.c35
-rw-r--r--net/bluetooth/hci_event.c46
-rw-r--r--net/bluetooth/hci_request.c21
-rw-r--r--net/bluetooth/hci_request.h1
-rw-r--r--net/bluetooth/hci_sock.c17
-rw-r--r--net/bluetooth/hci_sysfs.c7
-rw-r--r--net/bluetooth/hidp/core.c3
-rw-r--r--net/bluetooth/l2cap_core.c80
-rw-r--r--net/bluetooth/lib.c6
-rw-r--r--net/bluetooth/mgmt.c57
-rw-r--r--net/bluetooth/selftest.c50
-rw-r--r--net/bluetooth/smp.c149
-rw-r--r--net/bpf/test_run.c3
-rw-r--r--net/bridge/Makefile3
-rw-r--r--net/bridge/br.c2
-rw-r--r--net/bridge/br_arp_nd_proxy.c469
-rw-r--r--net/bridge/br_device.c28
-rw-r--r--net/bridge/br_fdb.c30
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_if.c24
-rw-r--r--net/bridge/br_input.c77
-rw-r--r--net/bridge/br_ioctl.c4
-rw-r--r--net/bridge/br_mdb.c61
-rw-r--r--net/bridge/br_multicast.c141
-rw-r--r--net/bridge/br_netfilter_hooks.c21
-rw-r--r--net/bridge/br_netlink.c131
-rw-r--r--net/bridge/br_netlink_tunnel.c14
-rw-r--r--net/bridge/br_private.h49
-rw-r--r--net/bridge/br_private_tunnel.h3
-rw-r--r--net/bridge/br_stp.c6
-rw-r--r--net/bridge/br_stp_if.c4
-rw-r--r--net/bridge/br_stp_timer.c50
-rw-r--r--net/bridge/br_switchdev.c3
-rw-r--r--net/bridge/br_sysfs_if.c22
-rw-r--r--net/bridge/br_vlan.c78
-rw-r--r--net/bridge/netfilter/Makefile1
-rw-r--r--net/bridge/netfilter/ebt_ip.c4
-rw-r--r--net/bridge/netfilter/ebt_ip6.c2
-rw-r--r--net/bridge/netfilter/ebtable_broute.c4
-rw-r--r--net/bridge/netfilter/ebtable_filter.c6
-rw-r--r--net/bridge/netfilter/ebtable_nat.c8
-rw-r--r--net/bridge/netfilter/ebtables.c53
-rw-r--r--net/caif/Makefile1
-rw-r--r--net/can/Makefile1
-rw-r--r--net/can/af_can.c24
-rw-r--r--net/can/af_can.h2
-rw-r--r--net/can/bcm.c5
-rw-r--r--net/can/gw.c6
-rw-r--r--net/can/proc.c8
-rw-r--r--net/ceph/Makefile1
-rw-r--r--net/ceph/armor.c1
-rw-r--r--net/ceph/auth.c1
-rw-r--r--net/ceph/auth_none.c1
-rw-r--r--net/ceph/auth_none.h1
-rw-r--r--net/ceph/auth_x.c1
-rw-r--r--net/ceph/auth_x.h1
-rw-r--r--net/ceph/auth_x_protocol.h1
-rw-r--r--net/ceph/buffer.c1
-rw-r--r--net/ceph/ceph_fs.c1
-rw-r--r--net/ceph/ceph_strings.c1
-rw-r--r--net/ceph/cls_lock_client.c1
-rw-r--r--net/ceph/crush/crush.c1
-rw-r--r--net/ceph/crush/hash.c1
-rw-r--r--net/ceph/crypto.c1
-rw-r--r--net/ceph/crypto.h1
-rw-r--r--net/ceph/debugfs.c1
-rw-r--r--net/ceph/messenger.c1
-rw-r--r--net/ceph/mon_client.c7
-rw-r--r--net/ceph/msgpool.c1
-rw-r--r--net/ceph/osd_client.c6
-rw-r--r--net/ceph/osdmap.c36
-rw-r--r--net/ceph/pagelist.c1
-rw-r--r--net/ceph/pagevec.c5
-rw-r--r--net/ceph/string_table.c1
-rw-r--r--net/compat.c7
-rw-r--r--net/core/Makefile5
-rw-r--r--net/core/datagram.c72
-rw-r--r--net/core/dev.c672
-rw-r--r--net/core/dev_ioctl.c14
-rw-r--r--net/core/devlink.c85
-rw-r--r--net/core/dst.c25
-rw-r--r--net/core/ethtool.c65
-rw-r--r--net/core/fib_notifier.c181
-rw-r--r--net/core/fib_rules.c78
-rw-r--r--net/core/filter.c842
-rw-r--r--net/core/flow.c516
-rw-r--r--net/core/flow_dissector.c429
-rw-r--r--net/core/gen_estimator.c4
-rw-r--r--net/core/gro_cells.c1
-rw-r--r--net/core/lwt_bpf.c2
-rw-r--r--net/core/lwtunnel.c28
-rw-r--r--net/core/neighbour.c28
-rw-r--r--net/core/net-procfs.c1
-rw-r--r--net/core/net-sysfs.c250
-rw-r--r--net/core/net-sysfs.h1
-rw-r--r--net/core/net-traces.c12
-rw-r--r--net/core/net_namespace.c6
-rw-r--r--net/core/netclassid_cgroup.c2
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/pktgen.c18
-rw-r--r--net/core/rtnetlink.c713
-rw-r--r--net/core/skbuff.c655
-rw-r--r--net/core/sock.c136
-rw-r--r--net/core/sock_reuseport.c13
-rw-r--r--net/core/stream.c1
-rw-r--r--net/core/sysctl_net_core.c1
-rw-r--r--net/core/tso.c1
-rw-r--r--net/dcb/dcbnl.c4
-rw-r--r--net/dccp/Makefile1
-rw-r--r--net/dccp/ccids/ccid2.c10
-rw-r--r--net/dccp/ccids/ccid2.h1
-rw-r--r--net/dccp/ccids/ccid3.c11
-rw-r--r--net/dccp/ccids/ccid3.h1
-rw-r--r--net/dccp/ccids/lib/packet_history.c4
-rw-r--r--net/dccp/ccids/lib/tfrc.c1
-rw-r--r--net/dccp/input.c1
-rw-r--r--net/dccp/ipv4.c17
-rw-r--r--net/dccp/ipv6.c51
-rw-r--r--net/dccp/options.c2
-rw-r--r--net/dccp/proto.c14
-rw-r--r--net/dccp/timer.c30
-rw-r--r--net/decnet/Makefile1
-rw-r--r--net/decnet/af_decnet.c7
-rw-r--r--net/decnet/dn_dev.c19
-rw-r--r--net/decnet/dn_fib.c5
-rw-r--r--net/decnet/dn_neigh.c3
-rw-r--r--net/decnet/dn_nsp_in.c8
-rw-r--r--net/decnet/dn_nsp_out.c18
-rw-r--r--net/decnet/dn_route.c14
-rw-r--r--net/decnet/dn_rules.c1
-rw-r--r--net/decnet/dn_table.c2
-rw-r--r--net/decnet/dn_timer.c1
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c2
-rw-r--r--net/decnet/sysctl_net_decnet.c1
-rw-r--r--net/dns_resolver/dns_key.c2
-rw-r--r--net/dsa/Kconfig4
-rw-r--r--net/dsa/Makefile4
-rw-r--r--net/dsa/dsa.c133
-rw-r--r--net/dsa/dsa2.c906
-rw-r--r--net/dsa/dsa_priv.h109
-rw-r--r--net/dsa/legacy.c113
-rw-r--r--net/dsa/master.c143
-rw-r--r--net/dsa/port.c132
-rw-r--r--net/dsa/slave.c963
-rw-r--r--net/dsa/switch.c27
-rw-r--r--net/dsa/tag_brcm.c94
-rw-r--r--net/dsa/tag_dsa.c31
-rw-r--r--net/dsa/tag_edsa.c31
-rw-r--r--net/dsa/tag_ksz.c28
-rw-r--r--net/dsa/tag_lan9303.c56
-rw-r--r--net/dsa/tag_mtk.c37
-rw-r--r--net/dsa/tag_qca.c25
-rw-r--r--net/dsa/tag_trailer.c18
-rw-r--r--net/hsr/hsr_device.c12
-rw-r--r--net/hsr/hsr_framereg.c6
-rw-r--r--net/hsr/hsr_framereg.h2
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h1
-rw-r--r--net/ieee802154/6lowpan/core.c2
-rw-r--r--net/ieee802154/6lowpan/reassembly.c16
-rw-r--r--net/ieee802154/Makefile1
-rw-r--r--net/ieee802154/core.h1
-rw-r--r--net/ieee802154/netlink.c6
-rw-r--r--net/ieee802154/nl802154.h1
-rw-r--r--net/ieee802154/rdev-ops.h1
-rw-r--r--net/ieee802154/sysfs.h1
-rw-r--r--net/ieee802154/trace.h1
-rw-r--r--net/ife/ife.c2
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c40
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/cipso_ipv4.c24
-rw-r--r--net/ipv4/devinet.c51
-rw-r--r--net/ipv4/esp4.c86
-rw-r--r--net/ipv4/esp4_offload.c7
-rw-r--r--net/ipv4/fib_frontend.c57
-rw-r--r--net/ipv4/fib_lookup.h2
-rw-r--r--net/ipv4/fib_notifier.c102
-rw-r--r--net/ipv4/fib_rules.c44
-rw-r--r--net/ipv4/fib_semantics.c82
-rw-r--r--net/ipv4/fib_trie.c50
-rw-r--r--net/ipv4/gre_offload.c16
-rw-r--r--net/ipv4/icmp.c24
-rw-r--r--net/ipv4/igmp.c16
-rw-r--r--net/ipv4/inet_connection_sock.c75
-rw-r--r--net/ipv4/inet_diag.c33
-rw-r--r--net/ipv4/inet_fragment.c10
-rw-r--r--net/ipv4/inet_hashtables.c32
-rw-r--r--net/ipv4/inet_timewait_sock.c10
-rw-r--r--net/ipv4/inetpeer.c434
-rw-r--r--net/ipv4/ip_forward.c1
-rw-r--r--net/ipv4/ip_fragment.c18
-rw-r--r--net/ipv4/ip_gre.c493
-rw-r--r--net/ipv4/ip_input.c25
-rw-r--r--net/ipv4/ip_options.c10
-rw-r--r--net/ipv4/ip_output.c92
-rw-r--r--net/ipv4/ip_sockglue.c22
-rw-r--r--net/ipv4/ip_tunnel.c18
-rw-r--r--net/ipv4/ip_vti.c50
-rw-r--r--net/ipv4/ipconfig.c1
-rw-r--r--net/ipv4/ipip.c66
-rw-r--r--net/ipv4/ipmr.c270
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/arp_tables.c35
-rw-r--r--net/ipv4/netfilter/ip_tables.c49
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c6
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c5
-rw-r--r--net/ipv4/netfilter/iptable_nat.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c42
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c43
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_log_arp.c2
-rw-r--r--net/ipv4/netfilter/nf_log_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c57
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c8
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c20
-rw-r--r--net/ipv4/proc.c8
-rw-r--r--net/ipv4/raw.c18
-rw-r--r--net/ipv4/raw_diag.c4
-rw-r--r--net/ipv4/route.c97
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c524
-rw-r--r--net/ipv4/tcp.c454
-rw-r--r--net/ipv4/tcp_bic.c14
-rw-r--r--net/ipv4/tcp_cdg.c14
-rw-r--r--net/ipv4/tcp_cong.c97
-rw-r--r--net/ipv4/tcp_cubic.c13
-rw-r--r--net/ipv4/tcp_diag.c109
-rw-r--r--net/ipv4/tcp_fastopen.c163
-rw-r--r--net/ipv4/tcp_highspeed.c11
-rw-r--r--net/ipv4/tcp_htcp.c3
-rw-r--r--net/ipv4/tcp_illinois.c11
-rw-r--r--net/ipv4/tcp_input.c735
-rw-r--r--net/ipv4/tcp_ipv4.c184
-rw-r--r--net/ipv4/tcp_metrics.c23
-rw-r--r--net/ipv4/tcp_minisocks.c42
-rw-r--r--net/ipv4/tcp_nv.c60
-rw-r--r--net/ipv4/tcp_offload.c12
-rw-r--r--net/ipv4/tcp_output.c418
-rw-r--r--net/ipv4/tcp_probe.c5
-rw-r--r--net/ipv4/tcp_recovery.c105
-rw-r--r--net/ipv4/tcp_scalable.c16
-rw-r--r--net/ipv4/tcp_timer.c89
-rw-r--r--net/ipv4/tcp_vegas.c2
-rw-r--r--net/ipv4/tcp_vegas.h1
-rw-r--r--net/ipv4/tcp_veno.c11
-rw-r--r--net/ipv4/tcp_yeah.c11
-rw-r--r--net/ipv4/udp.c148
-rw-r--r--net/ipv4/udp_diag.c10
-rw-r--r--net/ipv4/udp_impl.h1
-rw-r--r--net/ipv4/udp_offload.c66
-rw-r--r--net/ipv4/udp_tunnel.c25
-rw-r--r--net/ipv4/xfrm4_input.c1
-rw-r--r--net/ipv4/xfrm4_policy.c26
-rw-r--r--net/ipv4/xfrm4_state.c1
-rw-r--r--net/ipv6/Kconfig16
-rw-r--r--net/ipv6/Makefile5
-rw-r--r--net/ipv6/addrconf.c474
-rw-r--r--net/ipv6/addrconf_core.c9
-rw-r--r--net/ipv6/addrlabel.c169
-rw-r--r--net/ipv6/af_inet6.c9
-rw-r--r--net/ipv6/ah6.c3
-rw-r--r--net/ipv6/esp6.c87
-rw-r--r--net/ipv6/esp6_offload.c7
-rw-r--r--net/ipv6/exthdrs.c77
-rw-r--r--net/ipv6/exthdrs_core.c5
-rw-r--r--net/ipv6/fib6_notifier.c63
-rw-r--r--net/ipv6/fib6_rules.c69
-rw-r--r--net/ipv6/icmp.c77
-rw-r--r--net/ipv6/ila/ila.h12
-rw-r--r--net/ipv6/ila/ila_common.c104
-rw-r--r--net/ipv6/ila/ila_lwt.c112
-rw-r--r--net/ipv6/ila/ila_xlat.c29
-rw-r--r--net/ipv6/inet6_hashtables.c28
-rw-r--r--net/ipv6/ip6_checksum.c1
-rw-r--r--net/ipv6/ip6_fib.c839
-rw-r--r--net/ipv6/ip6_flowlabel.c3
-rw-r--r--net/ipv6/ip6_gre.c58
-rw-r--r--net/ipv6/ip6_icmp.c1
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/ip6_output.c81
-rw-r--r--net/ipv6/ip6_tunnel.c108
-rw-r--r--net/ipv6/ip6_vti.c57
-rw-r--r--net/ipv6/ip6mr.c7
-rw-r--r--net/ipv6/ipv6_sockglue.c13
-rw-r--r--net/ipv6/ndisc.c21
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c37
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c4
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c44
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c49
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c17
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c2
-rw-r--r--net/ipv6/netfilter/nf_log_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c9
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c29
-rw-r--r--net/ipv6/output_core.c37
-rw-r--r--net/ipv6/ping.c5
-rw-r--r--net/ipv6/raw.c17
-rw-r--r--net/ipv6/reassembly.c17
-rw-r--r--net/ipv6/route.c1100
-rw-r--r--net/ipv6/seg6.c7
-rw-r--r--net/ipv6/seg6_hmac.c7
-rw-r--r--net/ipv6/seg6_iptunnel.c82
-rw-r--r--net/ipv6/seg6_local.c934
-rw-r--r--net/ipv6/sit.c40
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/sysctl_net_ipv6.c41
-rw-r--r--net/ipv6/tcp_ipv6.c46
-rw-r--r--net/ipv6/udp.c72
-rw-r--r--net/ipv6/udp_impl.h1
-rw-r--r--net/ipv6/udp_offload.c100
-rw-r--r--net/ipv6/xfrm6_input.c5
-rw-r--r--net/ipv6/xfrm6_policy.c22
-rw-r--r--net/ipv6/xfrm6_state.c1
-rw-r--r--net/ipv6/xfrm6_tunnel.c8
-rw-r--r--net/ipx/af_ipx.c1
-rw-r--r--net/ipx/ipx_proc.c1
-rw-r--r--net/ipx/ipx_route.c1
-rw-r--r--net/ipx/pe2.c1
-rw-r--r--net/ipx/sysctl_net_ipx.c1
-rw-r--r--net/irda/Kconfig96
-rw-r--r--net/irda/Makefile15
-rw-r--r--net/irda/af_irda.c2695
-rw-r--r--net/irda/discovery.c417
-rw-r--r--net/irda/ircomm/Kconfig12
-rw-r--r--net/irda/ircomm/Makefile8
-rw-r--r--net/irda/ircomm/ircomm_core.c563
-rw-r--r--net/irda/ircomm/ircomm_event.c246
-rw-r--r--net/irda/ircomm/ircomm_lmp.c350
-rw-r--r--net/irda/ircomm/ircomm_param.c501
-rw-r--r--net/irda/ircomm/ircomm_ttp.c350
-rw-r--r--net/irda/ircomm/ircomm_tty.c1329
-rw-r--r--net/irda/ircomm/ircomm_tty_attach.c987
-rw-r--r--net/irda/ircomm/ircomm_tty_ioctl.c291
-rw-r--r--net/irda/irda_device.c316
-rw-r--r--net/irda/iriap.c1085
-rw-r--r--net/irda/iriap_event.c496
-rw-r--r--net/irda/irias_object.c555
-rw-r--r--net/irda/irlan/Kconfig14
-rw-r--r--net/irda/irlan/Makefile7
-rw-r--r--net/irda/irlan/irlan_client.c559
-rw-r--r--net/irda/irlan/irlan_client_event.c511
-rw-r--r--net/irda/irlan/irlan_common.c1176
-rw-r--r--net/irda/irlan/irlan_eth.c340
-rw-r--r--net/irda/irlan/irlan_event.c60
-rw-r--r--net/irda/irlan/irlan_filter.c240
-rw-r--r--net/irda/irlan/irlan_provider.c408
-rw-r--r--net/irda/irlan/irlan_provider_event.c233
-rw-r--r--net/irda/irlap.c1207
-rw-r--r--net/irda/irlap_event.c2316
-rw-r--r--net/irda/irlap_frame.c1407
-rw-r--r--net/irda/irlmp.c1996
-rw-r--r--net/irda/irlmp_event.c886
-rw-r--r--net/irda/irlmp_frame.c476
-rw-r--r--net/irda/irmod.c199
-rw-r--r--net/irda/irnet/Kconfig13
-rw-r--r--net/irda/irnet/Makefile7
-rw-r--r--net/irda/irnet/irnet.h522
-rw-r--r--net/irda/irnet/irnet_irda.c1885
-rw-r--r--net/irda/irnet/irnet_irda.h178
-rw-r--r--net/irda/irnet/irnet_ppp.c1189
-rw-r--r--net/irda/irnet/irnet_ppp.h116
-rw-r--r--net/irda/irnetlink.c162
-rw-r--r--net/irda/irproc.c96
-rw-r--r--net/irda/irqueue.c911
-rw-r--r--net/irda/irsysctl.c258
-rw-r--r--net/irda/irttp.c1891
-rw-r--r--net/irda/parameters.c584
-rw-r--r--net/irda/qos.c771
-rw-r--r--net/irda/timer.c231
-rw-r--r--net/irda/wrapper.c492
-rw-r--r--net/kcm/kcmproc.c35
-rw-r--r--net/kcm/kcmsock.c55
-rw-r--r--net/key/af_key.c8
-rw-r--r--net/l2tp/Makefile1
-rw-r--r--net/l2tp/l2tp_core.c204
-rw-r--r--net/l2tp/l2tp_core.h64
-rw-r--r--net/l2tp/l2tp_debugfs.c4
-rw-r--r--net/l2tp/l2tp_eth.c164
-rw-r--r--net/l2tp/l2tp_ip.c28
-rw-r--r--net/l2tp/l2tp_ip6.c28
-rw-r--r--net/l2tp/l2tp_netlink.c98
-rw-r--r--net/l2tp/l2tp_ppp.c345
-rw-r--r--net/lapb/lapb_iface.c4
-rw-r--r--net/lapb/lapb_timer.c18
-rw-r--r--net/llc/llc_c_ac.c27
-rw-r--r--net/llc/llc_conn.c12
-rw-r--r--net/llc/llc_input.c4
-rw-r--r--net/llc/sysctl_net_llc.c1
-rw-r--r--net/mac80211/Makefile4
-rw-r--r--net/mac80211/aead_api.c (renamed from net/mac80211/aes_ccm.c)40
-rw-r--r--net/mac80211/aead_api.h27
-rw-r--r--net/mac80211/aes_ccm.h42
-rw-r--r--net/mac80211/aes_gcm.c109
-rw-r--r--net/mac80211/aes_gcm.h38
-rw-r--r--net/mac80211/agg-rx.c36
-rw-r--r--net/mac80211/agg-tx.c8
-rw-r--r--net/mac80211/cfg.c12
-rw-r--r--net/mac80211/debug.h1
-rw-r--r--net/mac80211/debugfs.h1
-rw-r--r--net/mac80211/debugfs_key.h1
-rw-r--r--net/mac80211/debugfs_netdev.h1
-rw-r--r--net/mac80211/debugfs_sta.h1
-rw-r--r--net/mac80211/driver-ops.h1
-rw-r--r--net/mac80211/ht.c36
-rw-r--r--net/mac80211/ieee80211_i.h6
-rw-r--r--net/mac80211/iface.c19
-rw-r--r--net/mac80211/key.c54
-rw-r--r--net/mac80211/mesh.c3
-rw-r--r--net/mac80211/mesh.h1
-rw-r--r--net/mac80211/mesh_hwmp.c8
-rw-r--r--net/mac80211/mesh_plink.c13
-rw-r--r--net/mac80211/mlme.c35
-rw-r--r--net/mac80211/offchannel.c2
-rw-r--r--net/mac80211/pm.c1
-rw-r--r--net/mac80211/scan.c37
-rw-r--r--net/mac80211/sta_info.c63
-rw-r--r--net/mac80211/sta_info.h4
-rw-r--r--net/mac80211/trace.c1
-rw-r--r--net/mac80211/trace.h1
-rw-r--r--net/mac80211/trace_msg.h1
-rw-r--r--net/mac80211/tx.c70
-rw-r--r--net/mac80211/util.c27
-rw-r--r--net/mac80211/vht.c10
-rw-r--r--net/mac80211/wpa.c4
-rw-r--r--net/mac802154/cfg.h1
-rw-r--r--net/mac802154/driver-ops.h1
-rw-r--r--net/mac802154/llsec.c14
-rw-r--r--net/mac802154/trace.c1
-rw-r--r--net/mac802154/trace.h1
-rw-r--r--net/mpls/Kconfig1
-rw-r--r--net/mpls/af_mpls.c44
-rw-r--r--net/mpls/internal.h1
-rw-r--r--net/ncsi/internal.h12
-rw-r--r--net/ncsi/ncsi-aen.c17
-rw-r--r--net/ncsi/ncsi-cmd.c10
-rw-r--r--net/ncsi/ncsi-manage.c398
-rw-r--r--net/ncsi/ncsi-pkt.h2
-rw-r--r--net/ncsi/ncsi-rsp.c55
-rw-r--r--net/netfilter/Kconfig9
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/core.c351
-rw-r--r--net/netfilter/ipset/Makefile1
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h10
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c2
-rw-r--r--net/netfilter/ipset/ip_set_core.c29
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h26
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c22
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c6
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c4
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c17
-rw-r--r--net/netfilter/ipset/pfxlen.c395
-rw-r--r--net/netfilter/ipvs/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c18
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c20
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c54
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c124
-rw-r--r--net/netfilter/nf_conntrack_expect.c70
-rw-r--r--net/netfilter/nf_conntrack_extend.c2
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c81
-rw-r--r--net/netfilter/nf_conntrack_helper.c34
-rw-r--r--net/netfilter/nf_conntrack_l3proto_generic.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c116
-rw-r--r--net/netfilter/nf_conntrack_pptp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto.c176
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c39
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c22
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c17
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c24
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c82
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c59
-rw-r--r--net/netfilter/nf_conntrack_sip.c6
-rw-r--r--net/netfilter/nf_conntrack_standalone.c103
-rw-r--r--net/netfilter/nf_internals.h11
-rw-r--r--net/netfilter/nf_nat_core.c159
-rw-r--r--net/netfilter/nf_nat_ftp.c2
-rw-r--r--net/netfilter/nf_nat_irc.c2
-rw-r--r--net/netfilter/nf_nat_redirect.c6
-rw-r--r--net/netfilter/nf_queue.c68
-rw-r--r--net/netfilter/nf_sockopt.c3
-rw-r--r--net/netfilter/nf_tables_api.c716
-rw-r--r--net/netfilter/nf_tables_core.c28
-rw-r--r--net/netfilter/nf_tables_trace.c42
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c22
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue.c25
-rw-r--r--net/netfilter/nft_compat.c4
-rw-r--r--net/netfilter/nft_counter.c20
-rw-r--r--net/netfilter/nft_ct.c57
-rw-r--r--net/netfilter/nft_exthdr.c213
-rw-r--r--net/netfilter/nft_fib_netdev.c87
-rw-r--r--net/netfilter/nft_limit.c173
-rw-r--r--net/netfilter/nft_objref.c7
-rw-r--r--net/netfilter/nft_payload.c2
-rw-r--r--net/netfilter/nft_quota.c20
-rw-r--r--net/netfilter/nft_rt.c73
-rw-r--r--net/netfilter/nft_set_bitmap.c18
-rw-r--r--net/netfilter/nft_set_hash.c42
-rw-r--r--net/netfilter/nft_set_rbtree.c120
-rw-r--r--net/netfilter/x_tables.c39
-rw-r--r--net/netfilter/xt_CT.c2
-rw-r--r--net/netfilter/xt_NETMAP.c8
-rw-r--r--net/netfilter/xt_TCPMSS.c2
-rw-r--r--net/netfilter/xt_TPROXY.c10
-rw-r--r--net/netfilter/xt_addrtype.c3
-rw-r--r--net/netfilter/xt_bpf.c22
-rw-r--r--net/netfilter/xt_connlimit.c81
-rw-r--r--net/netfilter/xt_hashlimit.c291
-rw-r--r--net/netfilter/xt_nat.c20
-rw-r--r--net/netfilter/xt_osf.c2
-rw-r--r--net/netfilter/xt_recent.c2
-rw-r--r--net/netfilter/xt_repldata.h1
-rw-r--r--net/netfilter/xt_socket.c4
-rw-r--r--net/netlabel/Makefile1
-rw-r--r--net/netlabel/netlabel_addrlist.h4
-rw-r--r--net/netlabel/netlabel_calipso.c2
-rw-r--r--net/netlink/af_netlink.c86
-rw-r--r--net/netlink/af_netlink.h2
-rw-r--r--net/netlink/genetlink.c1
-rw-r--r--net/netrom/af_netrom.c6
-rw-r--r--net/netrom/nr_in.c2
-rw-r--r--net/netrom/nr_loopback.c2
-rw-r--r--net/netrom/nr_route.c62
-rw-r--r--net/netrom/nr_timer.c48
-rw-r--r--net/nfc/Makefile1
-rw-r--r--net/nfc/core.c11
-rw-r--r--net/nfc/digital_core.c1
-rw-r--r--net/nfc/hci/core.c8
-rw-r--r--net/nfc/hci/llc_shdlc.c26
-rw-r--r--net/nfc/llcp_core.c16
-rw-r--r--net/nfc/nci/Makefile1
-rw-r--r--net/nfc/netlink.c35
-rw-r--r--net/nsh/Kconfig9
-rw-r--r--net/nsh/Makefile1
-rw-r--r--net/nsh/nsh.c151
-rw-r--r--net/openvswitch/Kconfig1
-rw-r--r--net/openvswitch/Makefile2
-rw-r--r--net/openvswitch/actions.c127
-rw-r--r--net/openvswitch/conntrack.c30
-rw-r--r--net/openvswitch/conntrack.h7
-rw-r--r--net/openvswitch/datapath.c114
-rw-r--r--net/openvswitch/datapath.h41
-rw-r--r--net/openvswitch/dp_notify.c4
-rw-r--r--net/openvswitch/flow.c67
-rw-r--r--net/openvswitch/flow.h9
-rw-r--r--net/openvswitch/flow_netlink.c407
-rw-r--r--net/openvswitch/flow_netlink.h5
-rw-r--r--net/openvswitch/flow_table.c4
-rw-r--r--net/openvswitch/meter.c597
-rw-r--r--net/openvswitch/meter.h54
-rw-r--r--net/openvswitch/vport-netdev.c3
-rw-r--r--net/packet/af_packet.c91
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/phonet/Makefile1
-rw-r--r--net/phonet/af_phonet.c17
-rw-r--r--net/phonet/datagram.c2
-rw-r--r--net/phonet/pep.c2
-rw-r--r--net/phonet/pn_dev.c3
-rw-r--r--net/phonet/pn_netlink.c12
-rw-r--r--net/psample/psample.c2
-rw-r--r--net/qrtr/qrtr.c381
-rw-r--r--net/qrtr/qrtr.h1
-rw-r--r--net/rds/Makefile1
-rw-r--r--net/rds/bind.c2
-rw-r--r--net/rds/connection.c50
-rw-r--r--net/rds/ib.c11
-rw-r--r--net/rds/ib.h3
-rw-r--r--net/rds/ib_fmr.c4
-rw-r--r--net/rds/ib_rdma.c4
-rw-r--r--net/rds/ib_recv.c10
-rw-r--r--net/rds/ib_send.c16
-rw-r--r--net/rds/info.h1
-rw-r--r--net/rds/loop.h1
-rw-r--r--net/rds/rdma_transport.h1
-rw-r--r--net/rds/rds.h5
-rw-r--r--net/rds/rds_single_path.h1
-rw-r--r--net/rds/send.c14
-rw-r--r--net/rds/tcp.c2
-rw-r--r--net/rds/tcp.h1
-rw-r--r--net/rds/tcp_connect.c4
-rw-r--r--net/rds/tcp_send.c2
-rw-r--r--net/rds/threads.c2
-rw-r--r--net/rose/af_rose.c17
-rw-r--r--net/rose/rose_in.c1
-rw-r--r--net/rose/rose_link.c16
-rw-r--r--net/rose/rose_loopback.c9
-rw-r--r--net/rose/rose_route.c10
-rw-r--r--net/rose/rose_timer.c39
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/af_rxrpc.c122
-rw-r--r--net/rxrpc/ar-internal.h28
-rw-r--r--net/rxrpc/call_accept.c3
-rw-r--r--net/rxrpc/call_event.c2
-rw-r--r--net/rxrpc/call_object.c103
-rw-r--r--net/rxrpc/conn_client.c17
-rw-r--r--net/rxrpc/conn_object.c2
-rw-r--r--net/rxrpc/conn_service.c3
-rw-r--r--net/rxrpc/input.c3
-rw-r--r--net/rxrpc/key.c22
-rw-r--r--net/rxrpc/local_event.c2
-rw-r--r--net/rxrpc/output.c21
-rw-r--r--net/rxrpc/peer_event.c6
-rw-r--r--net/rxrpc/peer_object.c13
-rw-r--r--net/rxrpc/protocol.h190
-rw-r--r--net/rxrpc/recvmsg.c7
-rw-r--r--net/rxrpc/rxkad.c22
-rw-r--r--net/rxrpc/sendmsg.c168
-rw-r--r--net/rxrpc/utils.c23
-rw-r--r--net/sched/Kconfig11
-rw-r--r--net/sched/Makefile2
-rw-r--r--net/sched/act_api.c565
-rw-r--r--net/sched/act_bpf.c21
-rw-r--r--net/sched/act_connmark.c16
-rw-r--r--net/sched/act_csum.c22
-rw-r--r--net/sched/act_gact.c16
-rw-r--r--net/sched/act_ife.c184
-rw-r--r--net/sched/act_ipt.c28
-rw-r--r--net/sched/act_meta_mark.c2
-rw-r--r--net/sched/act_meta_skbprio.c2
-rw-r--r--net/sched/act_meta_skbtcindex.c2
-rw-r--r--net/sched/act_mirred.c32
-rw-r--r--net/sched/act_nat.c16
-rw-r--r--net/sched/act_pedit.c18
-rw-r--r--net/sched/act_police.c18
-rw-r--r--net/sched/act_sample.c20
-rw-r--r--net/sched/act_simple.c20
-rw-r--r--net/sched/act_skbedit.c18
-rw-r--r--net/sched/act_skbmod.c18
-rw-r--r--net/sched/act_tunnel_key.c20
-rw-r--r--net/sched/act_vlan.c101
-rw-r--r--net/sched/cls_api.c580
-rw-r--r--net/sched/cls_basic.c120
-rw-r--r--net/sched/cls_bpf.c218
-rw-r--r--net/sched/cls_cgroup.c70
-rw-r--r--net/sched/cls_flow.c122
-rw-r--r--net/sched/cls_flower.c263
-rw-r--r--net/sched/cls_fw.c106
-rw-r--r--net/sched/cls_matchall.c145
-rw-r--r--net/sched/cls_route.c99
-rw-r--r--net/sched/cls_rsvp.h69
-rw-r--r--net/sched/cls_tcindex.c123
-rw-r--r--net/sched/cls_u32.c393
-rw-r--r--net/sched/ematch.c2
-rw-r--r--net/sched/sch_api.c533
-rw-r--r--net/sched/sch_atm.c46
-rw-r--r--net/sched/sch_cbq.c57
-rw-r--r--net/sched/sch_cbs.c373
-rw-r--r--net/sched/sch_drr.c36
-rw-r--r--net/sched/sch_dsmark.c19
-rw-r--r--net/sched/sch_fq_codel.c16
-rw-r--r--net/sched/sch_generic.c81
-rw-r--r--net/sched/sch_hfsc.c119
-rw-r--r--net/sched/sch_hhf.c3
-rw-r--r--net/sched/sch_htb.c51
-rw-r--r--net/sched/sch_ingress.c81
-rw-r--r--net/sched/sch_mq.c19
-rw-r--r--net/sched/sch_mqprio.c286
-rw-r--r--net/sched/sch_multiq.c21
-rw-r--r--net/sched/sch_netem.c180
-rw-r--r--net/sched/sch_pie.c10
-rw-r--r--net/sched/sch_prio.c16
-rw-r--r--net/sched/sch_qfq.c38
-rw-r--r--net/sched/sch_red.c102
-rw-r--r--net/sched/sch_sfb.c12
-rw-r--r--net/sched/sch_sfq.c44
-rw-r--r--net/sched/sch_tbf.c14
-rw-r--r--net/sctp/Makefile4
-rw-r--r--net/sctp/associola.c24
-rw-r--r--net/sctp/auth.c13
-rw-r--r--net/sctp/bind_addr.c20
-rw-r--r--net/sctp/chunk.c10
-rw-r--r--net/sctp/debug.c8
-rw-r--r--net/sctp/endpointola.c12
-rw-r--r--net/sctp/input.c26
-rw-r--r--net/sctp/ipv6.c23
-rw-r--r--net/sctp/objcnt.c2
-rw-r--r--net/sctp/output.c60
-rw-r--r--net/sctp/outqueue.c83
-rw-r--r--net/sctp/primitive.c4
-rw-r--r--net/sctp/probe.c13
-rw-r--r--net/sctp/protocol.c15
-rw-r--r--net/sctp/sctp_diag.c42
-rw-r--r--net/sctp/sm_make_chunk.c533
-rw-r--r--net/sctp/sm_sideeffect.c246
-rw-r--r--net/sctp/sm_statefuns.c1555
-rw-r--r--net/sctp/sm_statetable.c59
-rw-r--r--net/sctp/socket.c345
-rw-r--r--net/sctp/stream.c260
-rw-r--r--net/sctp/stream_sched.c275
-rw-r--r--net/sctp/stream_sched_prio.c347
-rw-r--r--net/sctp/stream_sched_rr.c201
-rw-r--r--net/sctp/sysctl.c2
-rw-r--r--net/sctp/transport.c15
-rw-r--r--net/sctp/ulpevent.c12
-rw-r--r--net/sctp/ulpqueue.c3
-rw-r--r--net/smc/Kconfig4
-rw-r--r--net/smc/af_smc.c118
-rw-r--r--net/smc/smc.h3
-rw-r--r--net/smc/smc_cdc.c8
-rw-r--r--net/smc/smc_cdc.h4
-rw-r--r--net/smc/smc_clc.c23
-rw-r--r--net/smc/smc_clc.h4
-rw-r--r--net/smc/smc_close.c31
-rw-r--r--net/smc/smc_close.h1
-rw-r--r--net/smc/smc_core.c417
-rw-r--r--net/smc/smc_core.h32
-rw-r--r--net/smc/smc_ib.c154
-rw-r--r--net/smc/smc_ib.h20
-rw-r--r--net/smc/smc_llc.c1
-rw-r--r--net/smc/smc_llc.h1
-rw-r--r--net/smc/smc_pnet.c5
-rw-r--r--net/smc/smc_pnet.h1
-rw-r--r--net/smc/smc_rx.c6
-rw-r--r--net/smc/smc_rx.h1
-rw-r--r--net/smc/smc_tx.c28
-rw-r--r--net/smc/smc_tx.h1
-rw-r--r--net/smc/smc_wr.c66
-rw-r--r--net/smc/smc_wr.h2
-rw-r--r--net/socket.c34
-rw-r--r--net/strparser/strparser.c316
-rw-r--r--net/sunrpc/Makefile1
-rw-r--r--net/sunrpc/auth_gss/Makefile1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c14
-rw-r--r--net/sunrpc/auth_null.c1
-rw-r--r--net/sunrpc/auth_unix.c1
-rw-r--r--net/sunrpc/backchannel_rqst.c4
-rw-r--r--net/sunrpc/clnt.c26
-rw-r--r--net/sunrpc/debugfs.c1
-rw-r--r--net/sunrpc/netns.h1
-rw-r--r--net/sunrpc/rpc_pipe.c8
-rw-r--r--net/sunrpc/rpcb_clnt.c6
-rw-r--r--net/sunrpc/sched.c11
-rw-r--r--net/sunrpc/sunrpc_syms.c3
-rw-r--r--net/sunrpc/svc.c12
-rw-r--r--net/sunrpc/svc_xprt.c115
-rw-r--r--net/sunrpc/svcsock.c34
-rw-r--r--net/sunrpc/xprt.c101
-rw-r--r--net/sunrpc/xprtmultipath.c1
-rw-r--r--net/sunrpc/xprtrdma/Makefile1
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c80
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c30
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c42
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c1173
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c16
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c117
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c51
-rw-r--r--net/sunrpc/xprtrdma/transport.c27
-rw-r--r--net/sunrpc/xprtrdma/verbs.c250
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h180
-rw-r--r--net/sunrpc/xprtsock.c97
-rw-r--r--net/switchdev/switchdev.c521
-rw-r--r--net/tipc/Makefile3
-rw-r--r--net/tipc/bcast.c22
-rw-r--r--net/tipc/bearer.c52
-rw-r--r--net/tipc/bearer.h3
-rw-r--r--net/tipc/core.h5
-rw-r--r--net/tipc/discover.c6
-rw-r--r--net/tipc/group.c871
-rw-r--r--net/tipc/group.h73
-rw-r--r--net/tipc/link.c57
-rw-r--r--net/tipc/monitor.c17
-rw-r--r--net/tipc/msg.c48
-rw-r--r--net/tipc/msg.h135
-rw-r--r--net/tipc/name_table.c176
-rw-r--r--net/tipc/name_table.h28
-rw-r--r--net/tipc/netlink_compat.c8
-rw-r--r--net/tipc/node.c70
-rw-r--r--net/tipc/node.h5
-rw-r--r--net/tipc/server.c121
-rw-r--r--net/tipc/server.h5
-rw-r--r--net/tipc/socket.c851
-rw-r--r--net/tipc/subscr.c27
-rw-r--r--net/tls/tls_main.c96
-rw-r--r--net/tls/tls_sw.c24
-rw-r--r--net/unix/Makefile1
-rw-r--r--net/unix/af_unix.c30
-rw-r--r--net/unix/diag.c2
-rw-r--r--net/vmw_vsock/Kconfig22
-rw-r--r--net/vmw_vsock/Makefile7
-rw-r--r--net/vmw_vsock/af_vsock.c73
-rw-r--r--net/vmw_vsock/diag.c186
-rw-r--r--net/vmw_vsock/hyperv_transport.c917
-rw-r--r--net/vmw_vsock/virtio_transport.c2
-rw-r--r--net/vmw_vsock/virtio_transport_common.c22
-rw-r--r--net/vmw_vsock/vmci_transport.c35
-rw-r--r--net/vmw_vsock/vmci_transport_notify.c2
-rw-r--r--net/vmw_vsock/vmci_transport_notify_qstate.c2
-rw-r--r--net/wimax/Makefile1
-rw-r--r--net/wireless/.gitignore3
-rw-r--r--net/wireless/Kconfig58
-rw-r--r--net/wireless/Makefile25
-rw-r--r--net/wireless/ap.c1
-rw-r--r--net/wireless/certs/sforshee.x509bin0 -> 680 bytes
-rw-r--r--net/wireless/chan.c5
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/core.h6
-rw-r--r--net/wireless/db.txt17
-rw-r--r--net/wireless/debugfs.h1
-rw-r--r--net/wireless/ethtool.c1
-rw-r--r--net/wireless/genregdb.awk158
-rw-r--r--net/wireless/ibss.c1
-rw-r--r--net/wireless/mesh.c1
-rw-r--r--net/wireless/mlme.c1
-rw-r--r--net/wireless/nl80211.c229
-rw-r--r--net/wireless/nl80211.h3
-rw-r--r--net/wireless/rdev-ops.h1
-rw-r--r--net/wireless/reg.c472
-rw-r--r--net/wireless/reg.h14
-rw-r--r--net/wireless/regdb.h23
-rw-r--r--net/wireless/scan.c1
-rw-r--r--net/wireless/sme.c96
-rw-r--r--net/wireless/sysfs.h1
-rw-r--r--net/wireless/trace.h1
-rw-r--r--net/wireless/util.c203
-rw-r--r--net/wireless/wext-compat.c1
-rw-r--r--net/wireless/wext-sme.c1
-rw-r--r--net/x25/Makefile1
-rw-r--r--net/x25/af_x25.c11
-rw-r--r--net/x25/sysctl_net_x25.c1
-rw-r--r--net/x25/x25_facilities.c2
-rw-r--r--net/x25/x25_in.c1
-rw-r--r--net/x25/x25_timer.c18
-rw-r--r--net/xfrm/Makefile1
-rw-r--r--net/xfrm/xfrm_device.c10
-rw-r--r--net/xfrm/xfrm_hash.c1
-rw-r--r--net/xfrm/xfrm_hash.h1
-rw-r--r--net/xfrm/xfrm_input.c14
-rw-r--r--net/xfrm/xfrm_output.c7
-rw-r--r--net/xfrm/xfrm_policy.c483
-rw-r--r--net/xfrm/xfrm_state.c32
-rw-r--r--net/xfrm/xfrm_sysctl.c1
-rw-r--r--net/xfrm/xfrm_user.c153
928 files changed, 33934 insertions, 50044 deletions
diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h
index a67caee11929..53cf446ce2e3 100644
--- a/net/6lowpan/6lowpan_i.h
+++ b/net/6lowpan/6lowpan_i.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __6LOWPAN_I_H
#define __6LOWPAN_I_H
diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile
index 12d131ab2324..2247b96dbc75 100644
--- a/net/6lowpan/Makefile
+++ b/net/6lowpan/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_6LOWPAN) += 6lowpan.o
6lowpan-y := core.o iphc.o nhc.o ndisc.o
diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h
index 803041400136..67951c40734b 100644
--- a/net/6lowpan/nhc.h
+++ b/net/6lowpan/nhc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __6LOWPAN_NHC_H
#define __6LOWPAN_NHC_H
diff --git a/net/802/Makefile b/net/802/Makefile
index 37e654d6615e..19406a87bdaa 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux 802.x protocol layers.
#
diff --git a/net/8021q/Makefile b/net/8021q/Makefile
index 7bc8db08d7ef..9b703454b93e 100644
--- a/net/8021q/Makefile
+++ b/net/8021q/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux VLAN layer.
#
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 9649579b5b9f..8dfdd94e430f 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -138,7 +138,7 @@ int vlan_check_real_dev(struct net_device *real_dev,
return 0;
}
-int register_vlan_dev(struct net_device *dev)
+int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev = vlan->real_dev;
@@ -174,7 +174,7 @@ int register_vlan_dev(struct net_device *dev)
if (err < 0)
goto out_uninit_mvrp;
- err = netdev_upper_dev_link(real_dev, dev);
+ err = netdev_upper_dev_link(real_dev, dev, extack);
if (err)
goto out_unregister_netdev;
@@ -270,7 +270,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
vlan->flags = VLAN_FLAG_REORDER_HDR;
new_dev->rtnl_link_ops = &vlan_link_ops;
- err = register_vlan_dev(new_dev);
+ err = register_vlan_dev(new_dev, NULL);
if (err < 0)
goto out_free_newdev;
@@ -328,6 +328,9 @@ static void vlan_transfer_features(struct net_device *dev,
vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
#endif
+ vlandev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ vlandev->priv_flags |= (vlan->real_dev->priv_flags & IFF_XMIT_DST_RELEASE);
+
netdev_update_features(vlandev);
}
@@ -376,6 +379,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
dev->name);
vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
}
+ if (event == NETDEV_DOWN &&
+ (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
vlan_info = rtnl_dereference(dev->vlan_info);
if (!vlan_info)
@@ -423,9 +429,6 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
struct net_device *tmp;
LIST_HEAD(close_list);
- if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
- vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
-
/* Put all VLANs for this dev in the down state too. */
vlan_group_for_each_dev(grp, i, vlandev) {
flgs = vlandev->flags;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index df8bd65dd370..a8ba51030b75 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __BEN_VLAN_802_1Q_INC__
#define __BEN_VLAN_802_1Q_INC__
@@ -107,7 +108,7 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
int vlan_check_real_dev(struct net_device *real_dev,
__be16 protocol, u16 vlan_id);
void vlan_setup(struct net_device *dev);
-int register_vlan_dev(struct net_device *dev);
+int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
bool vlan_dev_inherit_address(struct net_device *dev,
struct net_device *real_dev);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index e2ed69850489..64aa9f755e1d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/if_vlan.h>
@@ -21,6 +22,12 @@ bool vlan_do_receive(struct sk_buff **skbp)
if (unlikely(!skb))
return false;
+ if (unlikely(!(vlan_dev->flags & IFF_UP))) {
+ kfree_skb(skb);
+ *skbp = NULL;
+ return false;
+ }
+
skb->dev = vlan_dev;
if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) {
/* Our lower layer thinks this is not local, let's make sure.
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 5e831de3103e..6689c0b272a7 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -143,6 +143,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
vlan->vlan_proto = proto;
vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]);
vlan->real_dev = real_dev;
+ dev->priv_flags |= (real_dev->priv_flags & IFF_XMIT_DST_RELEASE);
vlan->flags = VLAN_FLAG_REORDER_HDR;
err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id);
@@ -160,7 +161,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
if (err < 0)
return err;
- return register_vlan_dev(dev);
+ return register_vlan_dev(dev, extack);
}
static inline size_t vlan_qos_map_size(unsigned int n)
diff --git a/net/8021q/vlanproc.h b/net/8021q/vlanproc.h
index 8838a2e92eb6..48cd4b4784e8 100644
--- a/net/8021q/vlanproc.h
+++ b/net/8021q/vlanproc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __BEN_VLAN_PROC_INC__
#define __BEN_VLAN_PROC_INC__
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 697ea7caf466..c0486cfc85d9 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_NET_9P) := 9pnet.o
obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o
obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index ddfa86648f95..903a190319b9 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -272,6 +272,7 @@ static int p9_fd_read(struct p9_client *client, void *v, int len)
{
int ret;
struct p9_trans_fd *ts = NULL;
+ loff_t pos;
if (client && client->status != Disconnected)
ts = client->trans;
@@ -282,7 +283,8 @@ static int p9_fd_read(struct p9_client *client, void *v, int len)
if (!(ts->rd->f_flags & O_NONBLOCK))
p9_debug(P9_DEBUG_ERROR, "blocking read ...\n");
- ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
+ pos = ts->rd->f_pos;
+ ret = kernel_read(ts->rd, v, len, &pos);
if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
client->status = Disconnected;
return ret;
@@ -420,8 +422,7 @@ error:
static int p9_fd_write(struct p9_client *client, void *v, int len)
{
- int ret;
- mm_segment_t oldfs;
+ ssize_t ret;
struct p9_trans_fd *ts = NULL;
if (client && client->status != Disconnected)
@@ -433,12 +434,7 @@ static int p9_fd_write(struct p9_client *client, void *v, int len)
if (!(ts->wr->f_flags & O_NONBLOCK))
p9_debug(P9_DEBUG_ERROR, "blocking write ...\n");
- oldfs = get_fs();
- set_fs(get_ds());
- /* The cast to a user pointer is valid due to the set_fs() */
- ret = vfs_write(ts->wr, (__force void __user *)v, len, &ts->wr->f_pos);
- set_fs(oldfs);
-
+ ret = kernel_write(ts->wr, v, len, &ts->wr->f_pos);
if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
client->status = Disconnected;
return ret;
diff --git a/net/Kconfig b/net/Kconfig
index 7d57ef34b79c..9dba2715919d 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -166,13 +166,6 @@ menuconfig NETFILTER
if NETFILTER
-config NETFILTER_DEBUG
- bool "Network packet filtering debugging"
- depends on NETFILTER
- help
- You can say Y here if you want to get additional messages useful in
- debugging the netfilter code.
-
config NETFILTER_ADVANCED
bool "Advanced netfilter configuration"
depends on NETFILTER
@@ -235,6 +228,7 @@ source "net/openvswitch/Kconfig"
source "net/vmw_vsock/Kconfig"
source "net/netlink/Kconfig"
source "net/mpls/Kconfig"
+source "net/nsh/Kconfig"
source "net/hsr/Kconfig"
source "net/switchdev/Kconfig"
source "net/l3mdev/Kconfig"
@@ -301,6 +295,18 @@ config BPF_JIT
/proc/sys/net/core/bpf_jit_harden (optional)
/proc/sys/net/core/bpf_jit_kallsyms (optional)
+config BPF_STREAM_PARSER
+ bool "enable BPF STREAM_PARSER"
+ depends on BPF_SYSCALL
+ select STREAM_PARSER
+ ---help---
+ Enabling this allows a stream parser to be used with
+ BPF_MAP_TYPE_SOCKMAP.
+
+ BPF_MAP_TYPE_SOCKMAP provides a map type to use with network sockets.
+ It can be used to enforce socket policy, implement socket redirects,
+ etc.
+
config NET_FLOW_LIMIT
bool
depends on RPS
@@ -364,7 +370,6 @@ endmenu
source "net/ax25/Kconfig"
source "net/can/Kconfig"
-source "net/irda/Kconfig"
source "net/bluetooth/Kconfig"
source "net/rxrpc/Kconfig"
source "net/kcm/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index bed80fa398b7..14fede520840 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the linux networking.
#
@@ -31,7 +32,6 @@ obj-$(CONFIG_NETROM) += netrom/
obj-$(CONFIG_ROSE) += rose/
obj-$(CONFIG_AX25) += ax25/
obj-$(CONFIG_CAN) += can/
-obj-$(CONFIG_IRDA) += irda/
obj-$(CONFIG_BT) += bluetooth/
obj-$(CONFIG_SUNRPC) += sunrpc/
obj-$(CONFIG_AF_RXRPC) += rxrpc/
@@ -76,6 +76,7 @@ obj-$(CONFIG_NET_IFE) += ife/
obj-$(CONFIG_OPENVSWITCH) += openvswitch/
obj-$(CONFIG_VSOCKETS) += vmw_vsock/
obj-$(CONFIG_MPLS) += mpls/
+obj-$(CONFIG_NET_NSH) += nsh/
obj-$(CONFIG_HSR) += hsr/
ifneq ($(CONFIG_NET_SWITCHDEV),)
obj-y += switchdev/
diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c
index e4158b8b926d..284c8e585533 100644
--- a/net/appletalk/dev.c
+++ b/net/appletalk/dev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Moved here from drivers/net/net_init.c, which is:
* Written 1993,1994,1995 by Donald Becker.
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index ebb864361f7a..c744a853fa5f 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* sysctl_net_atalk.c: sysctl interface to net AppleTalk subsystem.
*
diff --git a/net/atm/Makefile b/net/atm/Makefile
index cc50bd1ff1de..bfec0f2d83b5 100644
--- a/net/atm/Makefile
+++ b/net/atm/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the ATM Protocol Families.
#
diff --git a/net/atm/addr.c b/net/atm/addr.c
index dcda35c66f15..0530b63f509a 100644
--- a/net/atm/addr.c
+++ b/net/atm/addr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/addr.c - Local ATM address registry */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/addr.h b/net/atm/addr.h
index 6837e9e7eb13..da3f848411a0 100644
--- a/net/atm/addr.h
+++ b/net/atm/addr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* net/atm/addr.h - Local ATM address registry */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c
index 876fbe83e2e4..a30b83c1cb3f 100644
--- a/net/atm/atm_misc.c
+++ b/net/atm/atm_misc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/atm_misc.c - Various functions for use by ATM drivers */
/* Written 1995-2000 by Werner Almesberger, EPFL ICA */
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 350bf62b2ae3..5d2fed9f5710 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* ATM driver model support. */
#include <linux/kernel.h>
diff --git a/net/atm/clip.c b/net/atm/clip.c
index f271a7bcf5b2..d4f6029d5109 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -153,7 +153,7 @@ static int neigh_check_cb(struct neighbour *n)
return 1;
}
-static void idle_timer_check(unsigned long dummy)
+static void idle_timer_check(struct timer_list *unused)
{
write_lock(&arp_tbl.lock);
__neigh_for_each_release(&arp_tbl, neigh_check_cb);
@@ -617,7 +617,7 @@ static void atmarpd_close(struct atm_vcc *vcc)
module_put(THIS_MODULE);
}
-static struct atmdev_ops atmarpd_dev_ops = {
+static const struct atmdev_ops atmarpd_dev_ops = {
.close = atmarpd_close
};
@@ -887,7 +887,7 @@ static int __init atm_clip_init(void)
register_netdevice_notifier(&clip_dev_notifier);
register_inetaddr_notifier(&clip_inet_notifier);
- setup_timer(&idle_timer, idle_timer_check, 0);
+ timer_setup(&idle_timer, idle_timer_check, 0);
#ifdef CONFIG_PROC_FS
{
diff --git a/net/atm/common.h b/net/atm/common.h
index 959436b87182..d9d583712a91 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* net/atm/common.h - ATM sockets (common part for PVC and SVC) */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index bbd3b639992e..2ff0e5e470e3 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* ATM ioctl handling */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 093fe8707731..c976196da3ea 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -486,7 +486,7 @@ static void lec_atm_close(struct atm_vcc *vcc)
module_put(THIS_MODULE);
}
-static struct atmdev_ops lecdev_ops = {
+static const struct atmdev_ops lecdev_ops = {
.close = lec_atm_close,
.send = lec_atm_send
};
@@ -1232,7 +1232,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
#define LEC_ARP_REFRESH_INTERVAL (3*HZ)
static void lec_arp_check_expire(struct work_struct *work);
-static void lec_arp_expire_arp(unsigned long data);
+static void lec_arp_expire_arp(struct timer_list *t);
/*
* Arp table funcs
@@ -1559,8 +1559,7 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
}
ether_addr_copy(to_return->mac_addr, mac_addr);
INIT_HLIST_NODE(&to_return->next);
- setup_timer(&to_return->timer, lec_arp_expire_arp,
- (unsigned long)to_return);
+ timer_setup(&to_return->timer, lec_arp_expire_arp, 0);
to_return->last_used = jiffies;
to_return->priv = priv;
skb_queue_head_init(&to_return->tx_wait);
@@ -1569,11 +1568,11 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
}
/* Arp sent timer expired */
-static void lec_arp_expire_arp(unsigned long data)
+static void lec_arp_expire_arp(struct timer_list *t)
{
struct lec_arp_table *entry;
- entry = (struct lec_arp_table *)data;
+ entry = from_timer(entry, t, timer);
pr_debug("\n");
if (entry->status == ESI_ARP_PENDING) {
@@ -1591,10 +1590,10 @@ static void lec_arp_expire_arp(unsigned long data)
}
/* Unknown/unused vcc expire, remove associated entry */
-static void lec_arp_expire_vcc(unsigned long data)
+static void lec_arp_expire_vcc(struct timer_list *t)
{
unsigned long flags;
- struct lec_arp_table *to_remove = (struct lec_arp_table *)data;
+ struct lec_arp_table *to_remove = from_timer(to_remove, t, timer);
struct lec_priv *priv = to_remove->priv;
del_timer(&to_remove->timer);
@@ -1799,7 +1798,7 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
else
send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL);
entry->timer.expires = jiffies + (1 * HZ);
- entry->timer.function = lec_arp_expire_arp;
+ entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_arp;
add_timer(&entry->timer);
found = priv->mcast_vcc;
}
@@ -1999,7 +1998,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
entry->old_recv_push = old_push;
entry->status = ESI_UNKNOWN;
entry->timer.expires = jiffies + priv->vcc_timeout_period;
- entry->timer.function = lec_arp_expire_vcc;
+ entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_vcc;
hlist_add_head(&entry->next, &priv->lec_no_forward);
add_timer(&entry->timer);
dump_arp_table(priv);
@@ -2083,7 +2082,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
entry->status = ESI_UNKNOWN;
hlist_add_head(&entry->next, &priv->lec_arp_empty_ones);
entry->timer.expires = jiffies + priv->vcc_timeout_period;
- entry->timer.function = lec_arp_expire_vcc;
+ entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_vcc;
add_timer(&entry->timer);
pr_debug("After vcc was added\n");
dump_arp_table(priv);
diff --git a/net/atm/lec.h b/net/atm/lec.h
index 4149db1b7885..be0e2667bd8c 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Lan Emulation client header file
*
diff --git a/net/atm/lec_arpc.h b/net/atm/lec_arpc.h
index d923f53812a3..1205d8792d28 100644
--- a/net/atm/lec_arpc.h
+++ b/net/atm/lec_arpc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Lec arp cache
*
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 680a4b9095a1..e882d8b5db05 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -95,7 +95,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
unsigned long event, void *dev);
static void mpc_timer_refresh(void);
-static void mpc_cache_check(unsigned long checking_time);
+static void mpc_cache_check(struct timer_list *unused);
static struct llc_snap_hdr llc_snap_mpoa_ctrl = {
0xaa, 0xaa, 0x03,
@@ -121,7 +121,7 @@ static struct notifier_block mpoa_notifier = {
struct mpoa_client *mpcs = NULL; /* FIXME */
static struct atm_mpoa_qos *qos_head = NULL;
-static DEFINE_TIMER(mpc_timer, NULL, 0, 0);
+static DEFINE_TIMER(mpc_timer, NULL);
static struct mpoa_client *find_mpc_by_itfnum(int itf)
@@ -779,7 +779,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
netif_rx(new_skb);
}
-static struct atmdev_ops mpc_ops = { /* only send is required */
+static const struct atmdev_ops mpc_ops = { /* only send is required */
.close = mpoad_close,
.send = msg_from_mpoad
};
@@ -799,7 +799,6 @@ static int atm_mpoa_mpoad_attach(struct atm_vcc *vcc, int arg)
int err;
if (mpcs == NULL) {
- init_timer(&mpc_timer);
mpc_timer_refresh();
/* This lets us now how our LECs are doing */
@@ -1408,15 +1407,17 @@ static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action)
msg_to_mpoad(msg, mpc);
}
+static unsigned long checking_time;
+
static void mpc_timer_refresh(void)
{
mpc_timer.expires = jiffies + (MPC_P2 * HZ);
- mpc_timer.data = mpc_timer.expires;
- mpc_timer.function = mpc_cache_check;
+ checking_time = mpc_timer.expires;
+ mpc_timer.function = (TIMER_FUNC_TYPE)mpc_cache_check;
add_timer(&mpc_timer);
}
-static void mpc_cache_check(unsigned long checking_time)
+static void mpc_cache_check(struct timer_list *unused)
{
struct mpoa_client *mpc = mpcs;
static unsigned long previous_resolving_check_time;
diff --git a/net/atm/mpc.h b/net/atm/mpc.h
index cfc7b745aa91..454abd07651a 100644
--- a/net/atm/mpc.h
+++ b/net/atm/mpc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MPC_H_
#define _MPC_H_
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index 4ccaa16b1eb1..e01450bb32d6 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/atmmpc.h>
#include <linux/slab.h>
diff --git a/net/atm/mpoa_caches.h b/net/atm/mpoa_caches.h
index 30fe34841ced..6a266669ebf4 100644
--- a/net/atm/mpoa_caches.h
+++ b/net/atm/mpoa_caches.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef MPOA_CACHES_H
#define MPOA_CACHES_H
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 2df34eb5d65f..8a0c17e1c203 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
#ifdef CONFIG_PROC_FS
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 4caca2a90ec4..642f9272ab95 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/proc.c - ATM /proc interface
*
* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA
diff --git a/net/atm/protocols.h b/net/atm/protocols.h
index acdfc856222d..18d4d008bac3 100644
--- a/net/atm/protocols.h
+++ b/net/atm/protocols.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* net/atm/protocols.h - ATM protocol handler entry points */
/* Written 1995-1997 by Werner Almesberger, EPFL LRC */
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 040207ec399f..e1140b3bdcaa 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/pvc.c - ATM PVC sockets */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/raw.c b/net/atm/raw.c
index 821c0797553d..ee10e8d46185 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/raw.c - Raw AAL0 and AAL5 transports */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 918244757b7d..bada395ecdb1 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/resources.c - Statically allocated resources */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/resources.h b/net/atm/resources.h
index 521431e30507..048232e4d4c6 100644
--- a/net/atm/resources.h
+++ b/net/atm/resources.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* net/atm/resources.h - ATM-related resources */
/* Written 1995-1998 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 983c3a21a133..6c11cdf4dd4c 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/signaling.c - ATM signaling */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
@@ -217,7 +218,7 @@ static void sigd_close(struct atm_vcc *vcc)
read_unlock(&vcc_sklist_lock);
}
-static struct atmdev_ops sigd_dev_ops = {
+static const struct atmdev_ops sigd_dev_ops = {
.close = sigd_close,
.send = sigd_send
};
diff --git a/net/atm/signaling.h b/net/atm/signaling.h
index 08b2a69cc572..2df8220f7ab5 100644
--- a/net/atm/signaling.h
+++ b/net/atm/signaling.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* net/atm/signaling.h - ATM signaling */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 5589de7086af..c458adcbc177 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/svc.c - ATM SVC sockets */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/ax25/Makefile b/net/ax25/Makefile
index 43c46d2cafb6..2e53affc8568 100644
--- a/net/ax25/Makefile
+++ b/net/ax25/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux AX.25 layer.
#
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index f3f9d18891de..06eac1f50c5e 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -268,9 +268,9 @@ void ax25_destroy_socket(ax25_cb *);
/*
* Handler for deferred kills.
*/
-static void ax25_destroy_timer(unsigned long data)
+static void ax25_destroy_timer(struct timer_list *t)
{
- ax25_cb *ax25=(ax25_cb *)data;
+ ax25_cb *ax25 = from_timer(ax25, t, dtimer);
struct sock *sk;
sk=ax25->sk;
@@ -326,8 +326,7 @@ void ax25_destroy_socket(ax25_cb *ax25)
if (ax25->sk != NULL) {
if (sk_has_allocations(ax25->sk)) {
/* Defer: outstanding buffers */
- setup_timer(&ax25->dtimer, ax25_destroy_timer,
- (unsigned long)ax25);
+ timer_setup(&ax25->dtimer, ax25_destroy_timer, 0);
ax25->dtimer.expires = jiffies + 2 * HZ;
add_timer(&ax25->dtimer);
} else {
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 5fb2104b7304..e9d11313d45b 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -29,7 +29,7 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
-static void ax25_ds_timeout(unsigned long);
+static void ax25_ds_timeout(struct timer_list *);
/*
* Add DAMA slave timeout timer to timer list.
@@ -41,8 +41,7 @@ static void ax25_ds_timeout(unsigned long);
void ax25_ds_setup_timer(ax25_dev *ax25_dev)
{
- setup_timer(&ax25_dev->dama.slave_timer, ax25_ds_timeout,
- (unsigned long)ax25_dev);
+ timer_setup(&ax25_dev->dama.slave_timer, ax25_ds_timeout, 0);
}
void ax25_ds_del_timer(ax25_dev *ax25_dev)
@@ -66,9 +65,9 @@ void ax25_ds_set_timer(ax25_dev *ax25_dev)
* Silently discard all (slave) connections in case our master forgot us...
*/
-static void ax25_ds_timeout(unsigned long arg)
+static void ax25_ds_timeout(struct timer_list *t)
{
- ax25_dev *ax25_dev = (struct ax25_dev *) arg;
+ ax25_dev *ax25_dev = from_timer(ax25_dev, t, dama.slave_timer);
ax25_cb *ax25;
if (ax25_dev == NULL || !ax25_dev->dama.slave)
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
index 23a6f38a80bf..c47b7ee1e4da 100644
--- a/net/ax25/ax25_timer.c
+++ b/net/ax25/ax25_timer.c
@@ -33,20 +33,19 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
-static void ax25_heartbeat_expiry(unsigned long);
-static void ax25_t1timer_expiry(unsigned long);
-static void ax25_t2timer_expiry(unsigned long);
-static void ax25_t3timer_expiry(unsigned long);
-static void ax25_idletimer_expiry(unsigned long);
+static void ax25_heartbeat_expiry(struct timer_list *);
+static void ax25_t1timer_expiry(struct timer_list *);
+static void ax25_t2timer_expiry(struct timer_list *);
+static void ax25_t3timer_expiry(struct timer_list *);
+static void ax25_idletimer_expiry(struct timer_list *);
void ax25_setup_timers(ax25_cb *ax25)
{
- setup_timer(&ax25->timer, ax25_heartbeat_expiry, (unsigned long)ax25);
- setup_timer(&ax25->t1timer, ax25_t1timer_expiry, (unsigned long)ax25);
- setup_timer(&ax25->t2timer, ax25_t2timer_expiry, (unsigned long)ax25);
- setup_timer(&ax25->t3timer, ax25_t3timer_expiry, (unsigned long)ax25);
- setup_timer(&ax25->idletimer, ax25_idletimer_expiry,
- (unsigned long)ax25);
+ timer_setup(&ax25->timer, ax25_heartbeat_expiry, 0);
+ timer_setup(&ax25->t1timer, ax25_t1timer_expiry, 0);
+ timer_setup(&ax25->t2timer, ax25_t2timer_expiry, 0);
+ timer_setup(&ax25->t3timer, ax25_t3timer_expiry, 0);
+ timer_setup(&ax25->idletimer, ax25_idletimer_expiry, 0);
}
void ax25_start_heartbeat(ax25_cb *ax25)
@@ -120,10 +119,10 @@ unsigned long ax25_display_timer(struct timer_list *timer)
EXPORT_SYMBOL(ax25_display_timer);
-static void ax25_heartbeat_expiry(unsigned long param)
+static void ax25_heartbeat_expiry(struct timer_list *t)
{
int proto = AX25_PROTO_STD_SIMPLEX;
- ax25_cb *ax25 = (ax25_cb *)param;
+ ax25_cb *ax25 = from_timer(ax25, t, timer);
if (ax25->ax25_dev)
proto = ax25->ax25_dev->values[AX25_VALUES_PROTOCOL];
@@ -145,9 +144,9 @@ static void ax25_heartbeat_expiry(unsigned long param)
}
}
-static void ax25_t1timer_expiry(unsigned long param)
+static void ax25_t1timer_expiry(struct timer_list *t)
{
- ax25_cb *ax25 = (ax25_cb *)param;
+ ax25_cb *ax25 = from_timer(ax25, t, t1timer);
switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
case AX25_PROTO_STD_SIMPLEX:
@@ -164,9 +163,9 @@ static void ax25_t1timer_expiry(unsigned long param)
}
}
-static void ax25_t2timer_expiry(unsigned long param)
+static void ax25_t2timer_expiry(struct timer_list *t)
{
- ax25_cb *ax25 = (ax25_cb *)param;
+ ax25_cb *ax25 = from_timer(ax25, t, t2timer);
switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
case AX25_PROTO_STD_SIMPLEX:
@@ -183,9 +182,9 @@ static void ax25_t2timer_expiry(unsigned long param)
}
}
-static void ax25_t3timer_expiry(unsigned long param)
+static void ax25_t3timer_expiry(struct timer_list *t)
{
- ax25_cb *ax25 = (ax25_cb *)param;
+ ax25_cb *ax25 = from_timer(ax25, t, t3timer);
switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
case AX25_PROTO_STD_SIMPLEX:
@@ -204,9 +203,9 @@ static void ax25_t3timer_expiry(unsigned long param)
}
}
-static void ax25_idletimer_expiry(unsigned long param)
+static void ax25_idletimer_expiry(struct timer_list *t)
{
- ax25_cb *ax25 = (ax25_cb *)param;
+ ax25_cb *ax25 = from_timer(ax25, t, idletimer);
switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
case AX25_PROTO_STD_SIMPLEX:
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a3501173e200..1b659ab652fb 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -729,11 +729,9 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr,
const unsigned char *packet_buff,
int packet_len, bool direct_link)
{
- unsigned char *skb_buff;
unsigned long new_direct_link_flag;
- skb_buff = skb_put_data(forw_packet_aggr->skb, packet_buff,
- packet_len);
+ skb_put_data(forw_packet_aggr->skb, packet_buff, packet_len);
forw_packet_aggr->packet_len += packet_len;
forw_packet_aggr->num_packets++;
@@ -918,8 +916,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
u16 tvlv_len = 0;
unsigned long send_time;
- if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
- (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
+ if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
+ hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
return;
/* the interface gets activated here to avoid race conditions between
@@ -1266,7 +1264,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
* drops as they can't send and receive at the same time.
*/
tq_iface_penalty = BATADV_TQ_MAX_VALUE;
- if (if_outgoing && (if_incoming == if_outgoing) &&
+ if (if_outgoing && if_incoming == if_outgoing &&
batadv_is_wifi_hardif(if_outgoing))
tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE,
bat_priv);
@@ -1281,7 +1279,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
batadv_ogm_packet->tq = combined_tq;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "bidirectional: orig = %-15pM neigh = %-15pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n",
+ "bidirectional: orig = %pM neigh = %pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n",
orig_node->orig, orig_neigh_node->orig, total_count,
neigh_rq_count, tq_own, tq_asym_penalty, tq_iface_penalty,
batadv_ogm_packet->tq, if_incoming->net_dev->name,
@@ -1371,7 +1369,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
ret = BATADV_NEIGH_DUP;
} else {
set_mark = 0;
- if (is_dup && (ret != BATADV_NEIGH_DUP))
+ if (is_dup && ret != BATADV_NEIGH_DUP)
ret = BATADV_ORIG_DUP;
}
@@ -1517,7 +1515,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
/* drop packet if sender is not a direct neighbor and if we
* don't route towards it
*/
- if (!is_single_hop_neigh && (!orig_neigh_router)) {
+ if (!is_single_hop_neigh && !orig_neigh_router) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Drop packet: OGM via unknown neighbor!\n");
goto out_neigh;
@@ -1537,7 +1535,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno);
similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl;
- if (is_bidirect && ((dup_status == BATADV_NO_DUP) ||
+ if (is_bidirect && (dup_status == BATADV_NO_DUP ||
(sameseq && similar_ttl))) {
batadv_iv_ogm_orig_update(bat_priv, orig_node,
orig_ifinfo, ethhdr,
@@ -1555,8 +1553,8 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
/* OGMs from secondary interfaces should only scheduled once
* per interface where it has been received, not multiple times
*/
- if ((ogm_packet->ttl <= 2) &&
- (if_incoming != if_outgoing)) {
+ if (ogm_packet->ttl <= 2 &&
+ if_incoming != if_outgoing) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Drop packet: OGM from secondary interface and wrong outgoing interface\n");
goto out_neigh;
@@ -1592,7 +1590,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
if_incoming, if_outgoing);
out_neigh:
- if ((orig_neigh_node) && (!is_single_hop_neigh))
+ if (orig_neigh_node && !is_single_hop_neigh)
batadv_orig_node_put(orig_neigh_node);
out:
if (router_ifinfo)
@@ -2525,9 +2523,9 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
tmp_gw_factor *= 100 * 100;
tmp_gw_factor >>= 18;
- if ((tmp_gw_factor > max_gw_factor) ||
- ((tmp_gw_factor == max_gw_factor) &&
- (tq_avg > max_tq))) {
+ if (tmp_gw_factor > max_gw_factor ||
+ (tmp_gw_factor == max_gw_factor &&
+ tq_avg > max_tq)) {
if (curr_gw)
batadv_gw_node_put(curr_gw);
curr_gw = gw_node;
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 4e2724c5b33d..341ceab8338d 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -19,7 +19,6 @@
#include "main.h"
#include <linux/atomic.h>
-#include <linux/bug.h>
#include <linux/cache.h>
#include <linux/errno.h>
#include <linux/if_ether.h>
@@ -623,11 +622,11 @@ static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1,
int ret = 0;
ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
- if (WARN_ON(!ifinfo1))
+ if (!ifinfo1)
goto err_ifinfo1;
ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
- if (WARN_ON(!ifinfo2))
+ if (!ifinfo2)
goto err_ifinfo2;
ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput;
@@ -649,11 +648,11 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1,
bool ret = false;
ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
- if (WARN_ON(!ifinfo1))
+ if (!ifinfo1)
goto err_ifinfo1;
ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
- if (WARN_ON(!ifinfo2))
+ if (!ifinfo2)
goto err_ifinfo2;
threshold = ifinfo1->bat_v.throughput / 4;
@@ -767,7 +766,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
if (batadv_v_gw_throughput_get(gw_node, &bw) < 0)
goto next;
- if (curr_gw && (bw <= max_bw))
+ if (curr_gw && bw <= max_bw)
goto next;
if (curr_gw)
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index bd1064d98e16..1de992c58b35 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -134,7 +134,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
throughput = link_settings.base.speed;
- if (throughput && (throughput != SPEED_UNKNOWN))
+ if (throughput && throughput != SPEED_UNKNOWN)
return throughput * 10;
}
@@ -263,8 +263,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
goto out;
/* we are in the process of shutting this interface down */
- if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
- (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
+ if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
+ hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
goto out;
/* the interface was enabled but may not be ready yet */
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 1e3dc374bfde..c251445a42a0 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -137,7 +137,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
struct batadv_priv *bat_priv;
struct batadv_ogm2_packet *ogm_packet;
struct sk_buff *skb, *skb_tmp;
- unsigned char *ogm_buff, *pkt_buff;
+ unsigned char *ogm_buff;
int ogm_buff_len;
u16 tvlv_len = 0;
int ret;
@@ -166,7 +166,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
goto reschedule;
skb_reserve(skb, ETH_HLEN);
- pkt_buff = skb_put_data(skb, ogm_buff, ogm_buff_len);
+ skb_put_data(skb, ogm_buff, ogm_buff_len);
ogm_packet = (struct batadv_ogm2_packet *)skb->data;
ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno));
@@ -200,7 +200,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
type = "unknown";
}
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselve on %s surpressed: %s\n",
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselves on %s suppressed: %s\n",
hard_iface->net_dev->name, type);
batadv_hardif_put(hard_iface);
@@ -304,8 +304,8 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
* due to the store & forward characteristics of WIFI.
* Very low throughput values are the exception.
*/
- if ((throughput > 10) &&
- (if_incoming == if_outgoing) &&
+ if (throughput > 10 &&
+ if_incoming == if_outgoing &&
!(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX))
return throughput / 2;
@@ -455,7 +455,7 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
/* drop packets with old seqnos, however accept the first packet after
* a host has been rebooted.
*/
- if ((seq_diff < 0) && !protection_started)
+ if (seq_diff < 0 && !protection_started)
goto out;
neigh_node->last_seen = jiffies;
@@ -568,8 +568,8 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
router_throughput = router_ifinfo->bat_v.throughput;
neigh_throughput = neigh_ifinfo->bat_v.throughput;
- if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) &&
- (router_throughput >= neigh_throughput))
+ if (neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF &&
+ router_throughput >= neigh_throughput)
goto out;
}
@@ -621,7 +621,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
return;
/* only unknown & newer OGMs contain TVLVs we are interested in */
- if ((seqno_age > 0) && (if_outgoing == BATADV_IF_DEFAULT))
+ if (seqno_age > 0 && if_outgoing == BATADV_IF_DEFAULT)
batadv_tvlv_containers_process(bat_priv, true, orig_node,
NULL, NULL,
(unsigned char *)(ogm2 + 1),
@@ -683,18 +683,18 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
ogm_throughput = ntohl(ogm_packet->throughput);
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, troughput %u, TTL %u, V %u, tvlv_len %u)\n",
+ "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, throughput %u, TTL %u, V %u, tvlv_len %u)\n",
ethhdr->h_source, if_incoming->net_dev->name,
if_incoming->net_dev->dev_addr, ogm_packet->orig,
ntohl(ogm_packet->seqno), ogm_throughput, ogm_packet->ttl,
ogm_packet->version, ntohs(ogm_packet->tvlv_len));
- /* If the troughput metric is 0, immediately drop the packet. No need to
- * create orig_node / neigh_node for an unusable route.
+ /* If the throughput metric is 0, immediately drop the packet. No need
+ * to create orig_node / neigh_node for an unusable route.
*/
if (ogm_throughput == 0) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Drop packet: originator packet with troughput metric of 0\n");
+ "Drop packet: originator packet with throughput metric of 0\n");
return;
}
@@ -762,7 +762,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
type = "unknown";
}
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s surpressed: %s\n",
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s suppressed: %s\n",
ogm_packet->orig, hard_iface->net_dev->name,
type);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 6930d6b50f99..760c0de72582 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -492,8 +492,8 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
/* this is an hash collision with the temporary selected node. Choose
* the one with the lowest address
*/
- if ((tmp_max == max) && max_orig_node &&
- (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0))
+ if (tmp_max == max && max_orig_node &&
+ batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
goto out;
ret = true;
@@ -834,7 +834,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
last_seen_msecs = last_seen_msecs % 60000;
last_seen_secs = last_seen_msecs / 1000;
- seq_printf(seq, " * %15pI4 %14pM %4i %6i:%02i\n",
+ seq_printf(seq, " * %15pI4 %pM %4i %6i:%02i\n",
&dat_entry->ip, dat_entry->mac_addr,
batadv_print_vid(dat_entry->vid),
last_seen_mins, last_seen_secs);
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index de9955d5224d..10d521f0b17f 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -248,12 +248,12 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
}
}
- if ((curr_gw) && (!next_gw)) {
+ if (curr_gw && !next_gw) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Removing selected gateway - no gateway in range\n");
batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL,
NULL);
- } else if ((!curr_gw) && (next_gw)) {
+ } else if (!curr_gw && next_gw) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Adding route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n",
next_gw->orig_node->orig,
@@ -411,8 +411,8 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
goto out;
}
- if ((gw_node->bandwidth_down == ntohl(gateway->bandwidth_down)) &&
- (gw_node->bandwidth_up == ntohl(gateway->bandwidth_up)))
+ if (gw_node->bandwidth_down == ntohl(gateway->bandwidth_down) &&
+ gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))
goto out;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 33940c5c74a8..2c26039c23fc 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -56,8 +56,8 @@ bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
if (strncasecmp(tmp_ptr, "mbit", 4) == 0)
bw_unit_type = BATADV_BW_UNIT_MBIT;
- if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) ||
- (bw_unit_type == BATADV_BW_UNIT_MBIT))
+ if (strncasecmp(tmp_ptr, "kbit", 4) == 0 ||
+ bw_unit_type == BATADV_BW_UNIT_MBIT)
*tmp_ptr = '\0';
}
@@ -190,7 +190,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
if (!up_new)
up_new = 1;
- if ((down_curr == down_new) && (up_curr == up_new))
+ if (down_curr == down_new && up_curr == up_new)
return count;
batadv_gw_reselect(bat_priv);
@@ -224,16 +224,16 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
/* only fetch the tvlv value if the handler wasn't called via the
* CIFNOTFND flag and if there is data to fetch
*/
- if ((flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) ||
- (tvlv_value_len < sizeof(gateway))) {
+ if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND ||
+ tvlv_value_len < sizeof(gateway)) {
gateway.bandwidth_down = 0;
gateway.bandwidth_up = 0;
} else {
gateway_ptr = tvlv_value;
gateway.bandwidth_down = gateway_ptr->bandwidth_down;
gateway.bandwidth_up = gateway_ptr->bandwidth_up;
- if ((gateway.bandwidth_down == 0) ||
- (gateway.bandwidth_up == 0)) {
+ if (gateway.bandwidth_down == 0 ||
+ gateway.bandwidth_up == 0) {
gateway.bandwidth_down = 0;
gateway.bandwidth_up = 0;
}
@@ -242,8 +242,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
batadv_gw_node_update(bat_priv, orig, &gateway);
/* restart gateway selection */
- if ((gateway.bandwidth_down != 0) &&
- (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT))
+ if (gateway.bandwidth_down != 0 &&
+ atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)
batadv_gw_check_election(bat_priv, orig);
}
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index e348f76ea8c1..4e3d5340ad96 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -504,8 +504,8 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev)
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
- (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+ if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+ hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
continue;
if (hard_iface->net_dev == net_dev)
@@ -568,8 +568,8 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
- (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+ if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+ hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
continue;
if (hard_iface->soft_iface != soft_iface)
@@ -654,8 +654,8 @@ out:
static void
batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
{
- if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
- (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+ if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+ hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
return;
hard_iface->if_status = BATADV_IF_INACTIVE;
@@ -738,7 +738,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
bat_priv = netdev_priv(hard_iface->soft_iface);
ret = netdev_master_upper_dev_link(hard_iface->net_dev,
- soft_iface, NULL, NULL);
+ soft_iface, NULL, NULL, NULL);
if (ret)
goto err_dev;
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 8ead292886d1..bded31121d12 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -132,10 +132,10 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf,
size_t packet_len;
int error;
- if ((file->f_flags & O_NONBLOCK) && (socket_client->queue_len == 0))
+ if ((file->f_flags & O_NONBLOCK) && socket_client->queue_len == 0)
return -EAGAIN;
- if ((!buf) || (count < sizeof(struct batadv_icmp_packet)))
+ if (!buf || count < sizeof(struct batadv_icmp_packet))
return -EINVAL;
if (!access_ok(VERIFY_WRITE, buf, count))
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index fb381fb26a66..4daed7ad46f2 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -73,8 +73,8 @@
* list traversals just rcu-locked
*/
struct list_head batadv_hardif_list;
-static int (*batadv_rx_handler[256])(struct sk_buff *,
- struct batadv_hard_iface *);
+static int (*batadv_rx_handler[256])(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if);
unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
@@ -540,12 +540,12 @@ batadv_recv_handler_register(u8 packet_type,
int (*recv_handler)(struct sk_buff *,
struct batadv_hard_iface *))
{
- int (*curr)(struct sk_buff *,
- struct batadv_hard_iface *);
+ int (*curr)(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if);
curr = batadv_rx_handler[packet_type];
- if ((curr != batadv_recv_unhandled_packet) &&
- (curr != batadv_recv_unhandled_unicast_packet))
+ if (curr != batadv_recv_unhandled_packet &&
+ curr != batadv_recv_unhandled_unicast_packet)
return -EBUSY;
batadv_rx_handler[packet_type] = recv_handler;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 2be8f1f46529..edb2f239d04d 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2017.2"
+#define BATADV_SOURCE_VERSION "2017.4"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index d327670641ac..e553a8770a89 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1126,7 +1126,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
bool orig_initialized;
if (orig_mcast_enabled && tvlv_value &&
- (tvlv_value_len >= sizeof(mcast_flags)))
+ tvlv_value_len >= sizeof(mcast_flags))
mcast_flags = *(u8 *)tvlv_value;
spin_lock_bh(&orig->mcast_handler_lock);
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 8e2a4b205257..2967b86c13da 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1062,9 +1062,9 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
continue;
/* don't purge if the interface is not (going) down */
- if ((if_outgoing->if_status != BATADV_IF_INACTIVE) &&
- (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) &&
- (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED))
+ if (if_outgoing->if_status != BATADV_IF_INACTIVE &&
+ if_outgoing->if_status != BATADV_IF_NOT_IN_USE &&
+ if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)
continue;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1106,9 +1106,9 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
continue;
/* don't purge if the interface is not (going) down */
- if ((if_outgoing->if_status != BATADV_IF_INACTIVE) &&
- (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) &&
- (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED))
+ if (if_outgoing->if_status != BATADV_IF_INACTIVE &&
+ if_outgoing->if_status != BATADV_IF_NOT_IN_USE &&
+ if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)
continue;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1155,13 +1155,13 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
last_seen = neigh_node->last_seen;
if_incoming = neigh_node->if_incoming;
- if ((batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT)) ||
- (if_incoming->if_status == BATADV_IF_INACTIVE) ||
- (if_incoming->if_status == BATADV_IF_NOT_IN_USE) ||
- (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) {
- if ((if_incoming->if_status == BATADV_IF_INACTIVE) ||
- (if_incoming->if_status == BATADV_IF_NOT_IN_USE) ||
- (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED))
+ if (batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT) ||
+ if_incoming->if_status == BATADV_IF_INACTIVE ||
+ if_incoming->if_status == BATADV_IF_NOT_IN_USE ||
+ if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) {
+ if (if_incoming->if_status == BATADV_IF_INACTIVE ||
+ if_incoming->if_status == BATADV_IF_NOT_IN_USE ||
+ if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"neighbor purge: originator %pM, neighbor: %pM, iface: %s\n",
orig_node->orig, neigh_node->addr,
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index f10e3ff26f9d..40d9bf3e5bfe 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -93,14 +93,14 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
batadv_orig_ifinfo_put(orig_ifinfo);
/* route deleted */
- if ((curr_router) && (!neigh_node)) {
+ if (curr_router && !neigh_node) {
batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
"Deleting route towards: %pM\n", orig_node->orig);
batadv_tt_global_del_orig(bat_priv, orig_node, -1,
"Deleted route towards originator");
/* route added */
- } else if ((!curr_router) && (neigh_node)) {
+ } else if (!curr_router && neigh_node) {
batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
"Adding route towards: %pM (via %pM)\n",
orig_node->orig, neigh_node->addr);
@@ -381,7 +381,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
/* add record route information if not full */
if ((icmph->msg_type == BATADV_ECHO_REPLY ||
icmph->msg_type == BATADV_ECHO_REQUEST) &&
- (skb->len >= sizeof(struct batadv_icmp_packet_rr))) {
+ skb->len >= sizeof(struct batadv_icmp_packet_rr)) {
if (skb_linearize(skb) < 0)
goto free_skb;
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index d239a9d72ac3..7895323fd2a7 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -142,7 +142,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr);
- if ((hardif_neigh) && (ret != NET_XMIT_DROP))
+ if (hardif_neigh && ret != NET_XMIT_DROP)
hardif_neigh->bat_v.last_unicast_tx = jiffies;
if (hardif_neigh)
@@ -615,8 +615,8 @@ batadv_forw_packet_list_steal(struct hlist_head *forw_list,
* we delete only packets belonging to the given interface
*/
if (hard_iface &&
- (forw_packet->if_incoming != hard_iface) &&
- (forw_packet->if_outgoing != hard_iface))
+ forw_packet->if_incoming != hard_iface &&
+ forw_packet->if_outgoing != hard_iface)
continue;
hlist_del(&forw_packet->list);
@@ -911,7 +911,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
type = "unknown";
}
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s surpressed: %s\n",
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s suppressed: %s\n",
bcast_packet->orig,
hard_iface->net_dev->name, type);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 10f7edfb176e..9f673cdfecf8 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -69,8 +69,8 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len)
int result;
/* TODO: We must check if we can release all references to non-payload
- * data using skb_header_release in our skbs to allow skb_cow_header to
- * work optimally. This means that those skbs are not allowed to read
+ * data using __skb_header_release in our skbs to allow skb_cow_header
+ * to work optimally. This means that those skbs are not allowed to read
* or write any data which is before the current position of skb->data
* after that call and thus allow other skbs with the same data buffer
* to write freely in that area.
@@ -160,7 +160,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
{
/* check ranges */
- if ((new_mtu < 68) || (new_mtu > batadv_hardif_min_mtu(dev)))
+ if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
return -EINVAL;
dev->mtu = new_mtu;
@@ -863,11 +863,13 @@ free_bat_counters:
* batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface
* @dev: batadv_soft_interface used as master interface
* @slave_dev: net_device which should become the slave interface
+ * @extack: extended ACK report struct
*
* Return: 0 if successful or error otherwise.
*/
static int batadv_softif_slave_add(struct net_device *dev,
- struct net_device *slave_dev)
+ struct net_device *slave_dev,
+ struct netlink_ext_ack *extack)
{
struct batadv_hard_iface *hard_iface;
struct net *net = dev_net(dev);
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 0ae8b30e4eaa..aa187fd42475 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -925,8 +925,8 @@ static int batadv_store_mesh_iface_finish(struct net_device *net_dev,
if (hard_iface->if_status == status_tmp)
goto out;
- if ((hard_iface->soft_iface) &&
- (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0))
+ if (hard_iface->soft_iface &&
+ strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)
goto out;
if (status_tmp == BATADV_IF_NOT_IN_USE) {
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index bfe8effe9238..4b90033f35a8 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1206,7 +1206,7 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
/* send the ack */
r = batadv_send_skb_to_orig(skb, orig_node, NULL);
- if (unlikely(r < 0) || (r == NET_XMIT_DROP)) {
+ if (unlikely(r < 0) || r == NET_XMIT_DROP) {
ret = BATADV_TP_REASON_DST_UNREACHABLE;
goto out;
}
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index ab3b654b05cc..4e2576fc0c59 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -273,9 +273,6 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
struct lowpan_peer *peer)
{
const u8 *saddr;
- struct lowpan_btle_dev *dev;
-
- dev = lowpan_btle_dev(netdev);
saddr = peer->lladdr;
@@ -618,12 +615,8 @@ static void ifup(struct net_device *netdev)
static void ifdown(struct net_device *netdev)
{
- int err;
-
rtnl_lock();
- err = dev_close(netdev);
- if (err < 0)
- BT_INFO("iface %s cannot be closed (%d)", netdev->name, err);
+ dev_close(netdev);
rtnl_unlock();
}
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 68f951b3e85a..db82a40875e8 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -45,6 +45,11 @@ config BT_BREDR
bool "Bluetooth Classic (BR/EDR) features"
depends on BT
default y
+ help
+ Bluetooth Classic includes support for Basic Rate (BR)
+ available with Bluetooth version 1.0b or later and support
+ for Enhanced Data Rate (EDR) available with Bluetooth
+ version 2.0 or later.
source "net/bluetooth/rfcomm/Kconfig"
@@ -58,11 +63,18 @@ config BT_HS
bool "Bluetooth High Speed (HS) features"
depends on BT_BREDR
default y
+ help
+ Bluetooth High Speed includes support for off-loading
+ Bluetooth connections via 802.11 (wifi) physical layer
+ available with Bluetooth version 3.0 or later.
config BT_LE
bool "Bluetooth Low Energy (LE) features"
depends on BT
default y
+ help
+ Bluetooth Low Energy includes support low-energy physical
+ layer available with Bluetooth version 4.0 or later.
config BT_6LOWPAN
tristate "Bluetooth 6LoWPAN support"
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 5d0a113e2e40..fda41c0b4781 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux Bluetooth subsystem.
#
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index aad994edd3bb..51c2cf2d8923 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -573,7 +573,7 @@ static int a2mp_discphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK,
&mgr->l2cap_conn->hcon->dst);
if (!hcon) {
- BT_ERR("No phys link exist");
+ bt_dev_err(hdev, "no phys link exist");
rsp.status = A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS;
goto clean;
}
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index ebcab5bbadd7..78bec8df8525 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -187,7 +187,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type)
/* Legacy key */
if (conn->key_type < 3) {
- BT_ERR("Legacy key type %d", conn->key_type);
+ bt_dev_err(hdev, "legacy key type %d", conn->key_type);
return -EACCES;
}
@@ -207,7 +207,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type)
/* Derive Generic AMP Link Key (gamp) */
err = hmac_sha256(keybuf, HCI_AMP_LINK_KEY_SIZE, "gamp", 4, gamp_key);
if (err) {
- BT_ERR("Could not derive Generic AMP Key: err %d", err);
+ bt_dev_err(hdev, "could not derive Generic AMP Key: err %d", err);
return err;
}
diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c
index c7b1a9aee579..2155ce802877 100644
--- a/net/bluetooth/ecdh_helper.c
+++ b/net/bluetooth/ecdh_helper.c
@@ -23,7 +23,6 @@
#include "ecdh_helper.h"
#include <linux/scatterlist.h>
-#include <crypto/kpp.h>
#include <crypto/ecdh.h>
struct ecdh_completion {
@@ -50,55 +49,35 @@ static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits)
out[i] = __swab64(in[ndigits - 1 - i]);
}
-bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32],
- u8 secret[32])
+/* compute_ecdh_secret() - function assumes that the private key was
+ * already set.
+ * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
+ * @public_key: pair's ecc public key.
+ * secret: memory where the ecdh computed shared secret will be saved.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64],
+ u8 secret[32])
{
- struct crypto_kpp *tfm;
struct kpp_request *req;
- struct ecdh p;
+ u8 *tmp;
struct ecdh_completion result;
struct scatterlist src, dst;
- u8 *tmp, *buf;
- unsigned int buf_len;
- int err = -ENOMEM;
+ int err;
tmp = kmalloc(64, GFP_KERNEL);
if (!tmp)
- return false;
+ return -ENOMEM;
- tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
- if (IS_ERR(tfm)) {
- pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n",
- PTR_ERR(tfm));
+ req = kpp_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ err = -ENOMEM;
goto free_tmp;
}
- req = kpp_request_alloc(tfm, GFP_KERNEL);
- if (!req)
- goto free_kpp;
-
init_completion(&result.completion);
- /* Security Manager Protocol holds digits in litte-endian order
- * while ECC API expect big-endian data
- */
- swap_digits((u64 *)private_key, (u64 *)tmp, 4);
- p.key = (char *)tmp;
- p.key_size = 32;
- /* Set curve_id */
- p.curve_id = ECC_CURVE_NIST_P256;
- buf_len = crypto_ecdh_key_len(&p);
- buf = kmalloc(buf_len, GFP_KERNEL);
- if (!buf)
- goto free_req;
-
- crypto_ecdh_encode_key(buf, buf_len, &p);
-
- /* Set A private Key */
- err = crypto_kpp_set_secret(tfm, (void *)buf, buf_len);
- if (err)
- goto free_all;
-
swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */
swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */
@@ -123,104 +102,129 @@ bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32],
memcpy(secret, tmp, 32);
free_all:
- kzfree(buf);
-free_req:
kpp_request_free(req);
-free_kpp:
- crypto_free_kpp(tfm);
free_tmp:
- kfree(tmp);
- return (err == 0);
+ kzfree(tmp);
+ return err;
}
-bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32])
+/* set_ecdh_privkey() - set or generate ecc private key.
+ *
+ * Function generates an ecc private key in the crypto subsystem when receiving
+ * a NULL private key or sets the received key when not NULL.
+ *
+ * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
+ * @private_key: user's ecc private key. When not NULL, the key is expected
+ * in little endian format.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 private_key[32])
+{
+ u8 *buf, *tmp = NULL;
+ unsigned int buf_len;
+ int err;
+ struct ecdh p = {0};
+
+ p.curve_id = ECC_CURVE_NIST_P256;
+
+ if (private_key) {
+ tmp = kmalloc(32, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+ swap_digits((u64 *)private_key, (u64 *)tmp, 4);
+ p.key = tmp;
+ p.key_size = 32;
+ }
+
+ buf_len = crypto_ecdh_key_len(&p);
+ buf = kmalloc(buf_len, GFP_KERNEL);
+ if (!buf) {
+ err = -ENOMEM;
+ goto free_tmp;
+ }
+
+ err = crypto_ecdh_encode_key(buf, buf_len, &p);
+ if (err)
+ goto free_all;
+
+ err = crypto_kpp_set_secret(tfm, buf, buf_len);
+ /* fall through */
+free_all:
+ kzfree(buf);
+free_tmp:
+ kzfree(tmp);
+ return err;
+}
+
+/* generate_ecdh_public_key() - function assumes that the private key was
+ * already set.
+ *
+ * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
+ * @public_key: memory where the computed ecc public key will be saved.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64])
{
- struct crypto_kpp *tfm;
struct kpp_request *req;
- struct ecdh p;
+ u8 *tmp;
struct ecdh_completion result;
struct scatterlist dst;
- u8 *tmp, *buf;
- unsigned int buf_len;
- int err = -ENOMEM;
- const unsigned short max_tries = 16;
- unsigned short tries = 0;
+ int err;
tmp = kmalloc(64, GFP_KERNEL);
if (!tmp)
- return false;
+ return -ENOMEM;
- tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
- if (IS_ERR(tfm)) {
- pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n",
- PTR_ERR(tfm));
+ req = kpp_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ err = -ENOMEM;
goto free_tmp;
}
- req = kpp_request_alloc(tfm, GFP_KERNEL);
- if (!req)
- goto free_kpp;
-
init_completion(&result.completion);
+ sg_init_one(&dst, tmp, 64);
+ kpp_request_set_input(req, NULL, 0);
+ kpp_request_set_output(req, &dst, 64);
+ kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ ecdh_complete, &result);
- /* Set curve_id */
- p.curve_id = ECC_CURVE_NIST_P256;
- p.key_size = 32;
- buf_len = crypto_ecdh_key_len(&p);
- buf = kmalloc(buf_len, GFP_KERNEL);
- if (!buf)
- goto free_req;
-
- do {
- if (tries++ >= max_tries)
- goto free_all;
-
- /* Set private Key */
- p.key = (char *)private_key;
- crypto_ecdh_encode_key(buf, buf_len, &p);
- err = crypto_kpp_set_secret(tfm, buf, buf_len);
- if (err)
- goto free_all;
-
- sg_init_one(&dst, tmp, 64);
- kpp_request_set_input(req, NULL, 0);
- kpp_request_set_output(req, &dst, 64);
- kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- ecdh_complete, &result);
-
- err = crypto_kpp_generate_public_key(req);
-
- if (err == -EINPROGRESS) {
- wait_for_completion(&result.completion);
- err = result.err;
- }
-
- /* Private key is not valid. Regenerate */
- if (err == -EINVAL)
- continue;
-
- if (err < 0)
- goto free_all;
- else
- break;
-
- } while (true);
-
- /* Keys are handed back in little endian as expected by Security
- * Manager Protocol
+ err = crypto_kpp_generate_public_key(req);
+ if (err == -EINPROGRESS) {
+ wait_for_completion(&result.completion);
+ err = result.err;
+ }
+ if (err < 0)
+ goto free_all;
+
+ /* The public key is handed back in little endian as expected by
+ * the Security Manager Protocol.
*/
swap_digits((u64 *)tmp, (u64 *)public_key, 4); /* x */
swap_digits((u64 *)&tmp[32], (u64 *)&public_key[32], 4); /* y */
- swap_digits((u64 *)private_key, (u64 *)tmp, 4);
- memcpy(private_key, tmp, 32);
free_all:
- kzfree(buf);
-free_req:
kpp_request_free(req);
-free_kpp:
- crypto_free_kpp(tfm);
free_tmp:
kfree(tmp);
- return (err == 0);
+ return err;
+}
+
+/* generate_ecdh_keys() - generate ecc key pair.
+ *
+ * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
+ * @public_key: memory where the computed ecc public key will be saved.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64])
+{
+ int err;
+
+ err = set_ecdh_privkey(tfm, NULL);
+ if (err)
+ return err;
+
+ return generate_ecdh_public_key(tfm, public_key);
}
diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h
index 7a423faf76e5..a6f8d03d4aaf 100644
--- a/net/bluetooth/ecdh_helper.h
+++ b/net/bluetooth/ecdh_helper.h
@@ -20,8 +20,11 @@
* COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
* SOFTWARE IS DISCLAIMED.
*/
+#include <crypto/kpp.h>
#include <linux/types.h>
-bool compute_ecdh_secret(const u8 pub_a[64], const u8 priv_b[32],
- u8 secret[32]);
-bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]);
+int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pair_public_key[64],
+ u8 secret[32]);
+int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 *private_key);
+int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]);
+int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64]);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index dc59eae54717..a9682534c377 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -729,8 +729,8 @@ static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode)
goto done;
}
- BT_ERR("HCI request failed to create LE connection: status 0x%2.2x",
- status);
+ bt_dev_err(hdev, "request failed to create LE connection: "
+ "status 0x%2.2x", status);
if (!conn)
goto done;
@@ -907,7 +907,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
*/
if (hci_dev_test_flag(hdev, HCI_LE_SCAN) &&
hdev->le_scan_type == LE_SCAN_ACTIVE) {
- skb_queue_purge(&req.cmd_q);
+ hci_req_purge(&req);
hci_conn_del(conn);
return ERR_PTR(-EBUSY);
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 6bc679cd3481..40d260f2bea5 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -267,7 +267,7 @@ static int hci_init1_req(struct hci_request *req, unsigned long opt)
amp_init1(req);
break;
default:
- BT_ERR("Unknown device type %d", hdev->dev_type);
+ bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type);
break;
}
@@ -2150,8 +2150,7 @@ static void hci_error_reset(struct work_struct *work)
if (hdev->hw_error)
hdev->hw_error(hdev, hdev->hw_error_code);
else
- BT_ERR("%s hardware error 0x%2.2x", hdev->name,
- hdev->hw_error_code);
+ bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);
if (hci_dev_do_close(hdev))
return;
@@ -2524,9 +2523,9 @@ static void hci_cmd_timeout(struct work_struct *work)
struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
u16 opcode = __le16_to_cpu(sent->opcode);
- BT_ERR("%s command 0x%4.4x tx timeout", hdev->name, opcode);
+ bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode);
} else {
- BT_ERR("%s command tx timeout", hdev->name);
+ bt_dev_err(hdev, "command tx timeout");
}
atomic_set(&hdev->cmd_cnt, 1);
@@ -2858,7 +2857,7 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
params = kzalloc(sizeof(*params), GFP_KERNEL);
if (!params) {
- BT_ERR("Out of memory");
+ bt_dev_err(hdev, "out of memory");
return NULL;
}
@@ -3393,7 +3392,7 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
err = hdev->send(hdev, skb);
if (err < 0) {
- BT_ERR("%s sending frame failed (%d)", hdev->name, err);
+ bt_dev_err(hdev, "sending frame failed (%d)", err);
kfree_skb(skb);
}
}
@@ -3408,7 +3407,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
skb = hci_prepare_cmd(hdev, opcode, plen, param);
if (!skb) {
- BT_ERR("%s no memory for command", hdev->name);
+ bt_dev_err(hdev, "no memory for command");
return -ENOMEM;
}
@@ -3493,7 +3492,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue,
hci_add_acl_hdr(skb, chan->handle, flags);
break;
default:
- BT_ERR("%s unknown dev_type %d", hdev->name, hdev->dev_type);
+ bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
return;
}
@@ -3618,7 +3617,7 @@ static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type,
break;
default:
cnt = 0;
- BT_ERR("Unknown link type");
+ bt_dev_err(hdev, "unknown link type %d", conn->type);
}
q = cnt / num;
@@ -3635,15 +3634,15 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
struct hci_conn_hash *h = &hdev->conn_hash;
struct hci_conn *c;
- BT_ERR("%s link tx timeout", hdev->name);
+ bt_dev_err(hdev, "link tx timeout");
rcu_read_lock();
/* Kill stalled connections */
list_for_each_entry_rcu(c, &h->list, list) {
if (c->type == type && c->sent) {
- BT_ERR("%s killing stalled connection %pMR",
- hdev->name, &c->dst);
+ bt_dev_err(hdev, "killing stalled connection %pMR",
+ &c->dst);
hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
}
}
@@ -3724,7 +3723,7 @@ static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type,
break;
default:
cnt = 0;
- BT_ERR("Unknown link type");
+ bt_dev_err(hdev, "unknown link type %d", chan->conn->type);
}
q = cnt / num;
@@ -4066,8 +4065,8 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
l2cap_recv_acldata(conn, skb, flags);
return;
} else {
- BT_ERR("%s ACL packet for unknown connection handle %d",
- hdev->name, handle);
+ bt_dev_err(hdev, "ACL packet for unknown connection handle %d",
+ handle);
}
kfree_skb(skb);
@@ -4097,8 +4096,8 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
sco_recv_scodata(conn, skb);
return;
} else {
- BT_ERR("%s SCO packet for unknown connection handle %d",
- hdev->name, handle);
+ bt_dev_err(hdev, "SCO packet for unknown connection handle %d",
+ handle);
}
kfree_skb(skb);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0b4dba08a14e..cd3bbb766c24 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1188,7 +1188,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
break;
default:
- BT_ERR("Used reserved LE_Scan_Enable param %d", cp->enable);
+ bt_dev_err(hdev, "use of reserved LE_Scan_Enable param %d",
+ cp->enable);
break;
}
@@ -1485,7 +1486,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr,
HCI_ROLE_MASTER);
if (!conn)
- BT_ERR("No memory for new connection");
+ bt_dev_err(hdev, "no memory for new connection");
}
}
@@ -2269,7 +2270,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr,
HCI_ROLE_SLAVE);
if (!conn) {
- BT_ERR("No memory for new connection");
+ bt_dev_err(hdev, "no memory for new connection");
hci_dev_unlock(hdev);
return;
}
@@ -2431,7 +2432,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (!hci_conn_ssp_enabled(conn) &&
test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) {
- BT_INFO("re-auth of legacy device is not possible.");
+ bt_dev_info(hdev, "re-auth of legacy device is not possible.");
} else {
set_bit(HCI_CONN_AUTH, &conn->flags);
conn->sec_level = conn->pending_sec_level;
@@ -2535,8 +2536,7 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
BT_DBG("%s status 0x%02x", hdev->name, status);
if (!skb || skb->len < sizeof(*rp)) {
- BT_ERR("%s invalid HCI Read Encryption Key Size response",
- hdev->name);
+ bt_dev_err(hdev, "invalid read key size response");
return;
}
@@ -2554,8 +2554,8 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
* supported.
*/
if (rp->status) {
- BT_ERR("%s failed to read key size for handle %u", hdev->name,
- handle);
+ bt_dev_err(hdev, "failed to read key size for handle %u",
+ handle);
conn->enc_key_size = HCI_LINK_KEY_SIZE;
} else {
conn->enc_key_size = rp->key_size;
@@ -2664,7 +2664,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_req_add(&req, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp);
if (hci_req_run_skb(&req, read_enc_key_size_complete)) {
- BT_ERR("Sending HCI Read Encryption Key Size failed");
+ bt_dev_err(hdev, "sending read key size failed");
conn->enc_key_size = HCI_LINK_KEY_SIZE;
goto notify;
}
@@ -3197,7 +3197,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb)
int i;
if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) {
- BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode);
+ bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode);
return;
}
@@ -3249,7 +3249,8 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb)
break;
default:
- BT_ERR("Unknown type %d conn %p", conn->type, conn);
+ bt_dev_err(hdev, "unknown type %d conn %p",
+ conn->type, conn);
break;
}
}
@@ -3271,7 +3272,7 @@ static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev,
return chan->conn;
break;
default:
- BT_ERR("%s unknown dev_type %d", hdev->name, hdev->dev_type);
+ bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
break;
}
@@ -3284,7 +3285,7 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb)
int i;
if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) {
- BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode);
+ bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode);
return;
}
@@ -3320,7 +3321,8 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb)
break;
default:
- BT_ERR("Unknown type %d conn %p", conn->type, conn);
+ bt_dev_err(hdev, "unknown type %d conn %p",
+ conn->type, conn);
break;
}
}
@@ -4479,7 +4481,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (!conn) {
conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr, ev->role);
if (!conn) {
- BT_ERR("No memory for new connection");
+ bt_dev_err(hdev, "no memory for new connection");
goto unlock;
}
@@ -4749,8 +4751,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
case LE_ADV_SCAN_RSP:
break;
default:
- BT_ERR_RATELIMITED("Unknown advertising packet type: 0x%02x",
- type);
+ bt_dev_err_ratelimited(hdev, "unknown advertising packet "
+ "type: 0x%02x", type);
return;
}
@@ -4769,8 +4771,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
/* Adjust for actual length */
if (len != real_len) {
- BT_ERR_RATELIMITED("%s advertising data length corrected",
- hdev->name);
+ bt_dev_err_ratelimited(hdev, "advertising data len corrected");
len = real_len;
}
@@ -5192,7 +5193,7 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
return false;
if (skb->len < sizeof(*hdr)) {
- BT_ERR("Too short HCI event");
+ bt_dev_err(hdev, "too short HCI event");
return false;
}
@@ -5206,12 +5207,13 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
}
if (hdr->evt != HCI_EV_CMD_COMPLETE) {
- BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt);
+ bt_dev_err(hdev, "last event is not cmd complete (0x%2.2x)",
+ hdr->evt);
return false;
}
if (skb->len < sizeof(*ev)) {
- BT_ERR("Too short cmd_complete event");
+ bt_dev_err(hdev, "too short cmd_complete event");
return false;
}
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index b73ac149de34..abc0f3224dd1 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -41,6 +41,11 @@ void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
req->err = 0;
}
+void hci_req_purge(struct hci_request *req)
+{
+ skb_queue_purge(&req->cmd_q);
+}
+
static int req_run(struct hci_request *req, hci_req_complete_t complete,
hci_req_complete_skb_t complete_skb)
{
@@ -331,8 +336,8 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
skb = hci_prepare_cmd(hdev, opcode, plen, param);
if (!skb) {
- BT_ERR("%s no memory for command (opcode 0x%4.4x)",
- hdev->name, opcode);
+ bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)",
+ opcode);
req->err = -ENOMEM;
return;
}
@@ -1421,7 +1426,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
if (err < 0) {
- BT_ERR("%s failed to generate new RPA", hdev->name);
+ bt_dev_err(hdev, "failed to generate new RPA");
return err;
}
@@ -1783,7 +1788,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
err = hci_req_run(&req, abort_conn_complete);
if (err && err != -ENODATA) {
- BT_ERR("Failed to run HCI request: err %d", err);
+ bt_dev_err(conn->hdev, "failed to run HCI request: err %d", err);
return err;
}
@@ -1867,7 +1872,8 @@ static void le_scan_disable_work(struct work_struct *work)
hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status);
if (status) {
- BT_ERR("Failed to disable LE scan: status 0x%02x", status);
+ bt_dev_err(hdev, "failed to disable LE scan: status 0x%02x",
+ status);
return;
}
@@ -1898,7 +1904,7 @@ static void le_scan_disable_work(struct work_struct *work)
hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN,
HCI_CMD_TIMEOUT, &status);
if (status) {
- BT_ERR("Inquiry failed: status 0x%02x", status);
+ bt_dev_err(hdev, "inquiry failed: status 0x%02x", status);
goto discov_stopped;
}
@@ -1940,7 +1946,8 @@ static void le_scan_restart_work(struct work_struct *work)
hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status);
if (status) {
- BT_ERR("Failed to restart LE scan: status %d", status);
+ bt_dev_err(hdev, "failed to restart LE scan: status %d",
+ status);
return;
}
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index dde77bd59f91..702beb140d9f 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -36,6 +36,7 @@ struct hci_request {
};
void hci_req_init(struct hci_request *req, struct hci_dev *hdev);
+void hci_req_purge(struct hci_request *req);
int hci_req_run(struct hci_request *req, hci_req_complete_t complete);
int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete);
void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 65d734c165bd..923e9a271872 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -251,15 +251,13 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
}
/* Send frame to sockets with specific channel */
-void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
- int flag, struct sock *skip_sk)
+static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
+ int flag, struct sock *skip_sk)
{
struct sock *sk;
BT_DBG("channel %u len %d", channel, skb->len);
- read_lock(&hci_sk_list.lock);
-
sk_for_each(sk, &hci_sk_list.head) {
struct sk_buff *nskb;
@@ -285,6 +283,13 @@ void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
kfree_skb(nskb);
}
+}
+
+void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
+ int flag, struct sock *skip_sk)
+{
+ read_lock(&hci_sk_list.lock);
+ __hci_send_to_channel(channel, skb, flag, skip_sk);
read_unlock(&hci_sk_list.lock);
}
@@ -388,8 +393,8 @@ void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event,
hdr->index = index;
hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
- hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
- HCI_SOCK_TRUSTED, NULL);
+ __hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+ HCI_SOCK_TRUSTED, NULL);
kfree_skb(skb);
}
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index ca7a35ebaefb..9874844a95a9 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Bluetooth HCI driver model support. */
#include <linux/module.h>
@@ -13,7 +14,7 @@ static void bt_link_release(struct device *dev)
kfree(conn);
}
-static struct device_type bt_link = {
+static const struct device_type bt_link = {
.name = "link",
.release = bt_link_release,
};
@@ -50,7 +51,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
if (device_add(&conn->dev) < 0) {
- BT_ERR("Failed to register connection device");
+ bt_dev_err(hdev, "failed to register connection device");
return;
}
@@ -86,7 +87,7 @@ static void bt_host_release(struct device *dev)
module_put(THIS_MODULE);
}
-static struct device_type bt_host = {
+static const struct device_type bt_host = {
.name = "host",
.release = bt_host_release,
};
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 002743ea509c..8112893037bd 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -734,7 +734,7 @@ static void hidp_stop(struct hid_device *hid)
hid->claimed = 0;
}
-static struct hid_ll_driver hidp_hid_driver = {
+struct hid_ll_driver hidp_hid_driver = {
.parse = hidp_parse,
.start = hidp_start,
.stop = hidp_stop,
@@ -743,6 +743,7 @@ static struct hid_ll_driver hidp_hid_driver = {
.raw_request = hidp_raw_request,
.output_report = hidp_output_report,
};
+EXPORT_SYMBOL_GPL(hidp_hid_driver);
/* This function sets up the hid device. It does not add it
to the HID system. That is done in hidp_add_connection(). */
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 303c779bfe38..43ba91c440bc 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -58,7 +58,7 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
u8 code, u8 ident, u16 dlen, void *data);
static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len,
void *data);
-static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data);
+static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size);
static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err);
static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
@@ -1473,7 +1473,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
set_bit(CONF_REQ_SENT, &chan->conf_state);
l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, buf), buf);
+ l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
chan->num_conf_req++;
}
@@ -2987,12 +2987,15 @@ static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen,
return len;
}
-static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
+static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val, size_t size)
{
struct l2cap_conf_opt *opt = *ptr;
BT_DBG("type 0x%2.2x len %u val 0x%lx", type, len, val);
+ if (size < L2CAP_CONF_OPT_SIZE + len)
+ return;
+
opt->type = type;
opt->len = len;
@@ -3017,7 +3020,7 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
*ptr += L2CAP_CONF_OPT_SIZE + len;
}
-static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan)
+static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan, size_t size)
{
struct l2cap_conf_efs efs;
@@ -3045,7 +3048,7 @@ static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan)
}
l2cap_add_conf_opt(ptr, L2CAP_CONF_EFS, sizeof(efs),
- (unsigned long) &efs);
+ (unsigned long) &efs, size);
}
static void l2cap_ack_timeout(struct work_struct *work)
@@ -3191,11 +3194,12 @@ static inline void l2cap_txwin_setup(struct l2cap_chan *chan)
chan->ack_win = chan->tx_win;
}
-static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data)
+static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size)
{
struct l2cap_conf_req *req = data;
struct l2cap_conf_rfc rfc = { .mode = chan->mode };
void *ptr = req->data;
+ void *endptr = data + data_size;
u16 size;
BT_DBG("chan %p", chan);
@@ -3220,7 +3224,7 @@ static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data)
done:
if (chan->imtu != L2CAP_DEFAULT_MTU)
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr);
switch (chan->mode) {
case L2CAP_MODE_BASIC:
@@ -3239,7 +3243,7 @@ done:
rfc.max_pdu_size = 0;
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
- (unsigned long) &rfc);
+ (unsigned long) &rfc, endptr - ptr);
break;
case L2CAP_MODE_ERTM:
@@ -3259,21 +3263,21 @@ done:
L2CAP_DEFAULT_TX_WINDOW);
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
- (unsigned long) &rfc);
+ (unsigned long) &rfc, endptr - ptr);
if (test_bit(FLAG_EFS_ENABLE, &chan->flags))
- l2cap_add_opt_efs(&ptr, chan);
+ l2cap_add_opt_efs(&ptr, chan, endptr - ptr);
if (test_bit(FLAG_EXT_CTRL, &chan->flags))
l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2,
- chan->tx_win);
+ chan->tx_win, endptr - ptr);
if (chan->conn->feat_mask & L2CAP_FEAT_FCS)
if (chan->fcs == L2CAP_FCS_NONE ||
test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) {
chan->fcs = L2CAP_FCS_NONE;
l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1,
- chan->fcs);
+ chan->fcs, endptr - ptr);
}
break;
@@ -3291,17 +3295,17 @@ done:
rfc.max_pdu_size = cpu_to_le16(size);
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
- (unsigned long) &rfc);
+ (unsigned long) &rfc, endptr - ptr);
if (test_bit(FLAG_EFS_ENABLE, &chan->flags))
- l2cap_add_opt_efs(&ptr, chan);
+ l2cap_add_opt_efs(&ptr, chan, endptr - ptr);
if (chan->conn->feat_mask & L2CAP_FEAT_FCS)
if (chan->fcs == L2CAP_FCS_NONE ||
test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) {
chan->fcs = L2CAP_FCS_NONE;
l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1,
- chan->fcs);
+ chan->fcs, endptr - ptr);
}
break;
}
@@ -3312,10 +3316,11 @@ done:
return ptr - data;
}
-static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data)
+static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data_size)
{
struct l2cap_conf_rsp *rsp = data;
void *ptr = rsp->data;
+ void *endptr = data + data_size;
void *req = chan->conf_req;
int len = chan->conf_len;
int type, hint, olen;
@@ -3417,7 +3422,7 @@ done:
return -ECONNREFUSED;
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
- (unsigned long) &rfc);
+ (unsigned long) &rfc, endptr - ptr);
}
if (result == L2CAP_CONF_SUCCESS) {
@@ -3430,7 +3435,7 @@ done:
chan->omtu = mtu;
set_bit(CONF_MTU_DONE, &chan->conf_state);
}
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->omtu);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->omtu, endptr - ptr);
if (remote_efs) {
if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
@@ -3444,7 +3449,7 @@ done:
l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS,
sizeof(efs),
- (unsigned long) &efs);
+ (unsigned long) &efs, endptr - ptr);
} else {
/* Send PENDING Conf Rsp */
result = L2CAP_CONF_PENDING;
@@ -3477,7 +3482,7 @@ done:
set_bit(CONF_MODE_DONE, &chan->conf_state);
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
- sizeof(rfc), (unsigned long) &rfc);
+ sizeof(rfc), (unsigned long) &rfc, endptr - ptr);
if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) {
chan->remote_id = efs.id;
@@ -3491,7 +3496,7 @@ done:
le32_to_cpu(efs.sdu_itime);
l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS,
sizeof(efs),
- (unsigned long) &efs);
+ (unsigned long) &efs, endptr - ptr);
}
break;
@@ -3505,7 +3510,7 @@ done:
set_bit(CONF_MODE_DONE, &chan->conf_state);
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
- (unsigned long) &rfc);
+ (unsigned long) &rfc, endptr - ptr);
break;
@@ -3527,10 +3532,11 @@ done:
}
static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
- void *data, u16 *result)
+ void *data, size_t size, u16 *result)
{
struct l2cap_conf_req *req = data;
void *ptr = req->data;
+ void *endptr = data + size;
int type, olen;
unsigned long val;
struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC };
@@ -3548,13 +3554,13 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
chan->imtu = L2CAP_DEFAULT_MIN_MTU;
} else
chan->imtu = val;
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr);
break;
case L2CAP_CONF_FLUSH_TO:
chan->flush_to = val;
l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO,
- 2, chan->flush_to);
+ 2, chan->flush_to, endptr - ptr);
break;
case L2CAP_CONF_RFC:
@@ -3568,13 +3574,13 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
chan->fcs = 0;
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
- sizeof(rfc), (unsigned long) &rfc);
+ sizeof(rfc), (unsigned long) &rfc, endptr - ptr);
break;
case L2CAP_CONF_EWS:
chan->ack_win = min_t(u16, val, chan->ack_win);
l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2,
- chan->tx_win);
+ chan->tx_win, endptr - ptr);
break;
case L2CAP_CONF_EFS:
@@ -3587,7 +3593,7 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
return -ECONNREFUSED;
l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
- (unsigned long) &efs);
+ (unsigned long) &efs, endptr - ptr);
break;
case L2CAP_CONF_FCS:
@@ -3692,7 +3698,7 @@ void __l2cap_connect_rsp_defer(struct l2cap_chan *chan)
return;
l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, buf), buf);
+ l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
chan->num_conf_req++;
}
@@ -3900,7 +3906,7 @@ sendresp:
u8 buf[128];
set_bit(CONF_REQ_SENT, &chan->conf_state);
l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, buf), buf);
+ l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
chan->num_conf_req++;
}
@@ -3978,7 +3984,7 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
break;
l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, req), req);
+ l2cap_build_conf_req(chan, req, sizeof(req)), req);
chan->num_conf_req++;
break;
@@ -4090,7 +4096,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
}
/* Complete config. */
- len = l2cap_parse_conf_req(chan, rsp);
+ len = l2cap_parse_conf_req(chan, rsp, sizeof(rsp));
if (len < 0) {
l2cap_send_disconn_req(chan, ECONNRESET);
goto unlock;
@@ -4124,7 +4130,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
if (!test_and_set_bit(CONF_REQ_SENT, &chan->conf_state)) {
u8 buf[64];
l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, buf), buf);
+ l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
chan->num_conf_req++;
}
@@ -4184,7 +4190,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
char buf[64];
len = l2cap_parse_conf_rsp(chan, rsp->data, len,
- buf, &result);
+ buf, sizeof(buf), &result);
if (len < 0) {
l2cap_send_disconn_req(chan, ECONNRESET);
goto done;
@@ -4214,7 +4220,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
/* throw out any old stored conf requests */
result = L2CAP_CONF_SUCCESS;
len = l2cap_parse_conf_rsp(chan, rsp->data, len,
- req, &result);
+ req, sizeof(req), &result);
if (len < 0) {
l2cap_send_disconn_req(chan, ECONNRESET);
goto done;
@@ -4791,7 +4797,7 @@ static void l2cap_do_create(struct l2cap_chan *chan, int result,
set_bit(CONF_REQ_SENT, &chan->conf_state);
l2cap_send_cmd(chan->conn, l2cap_get_ident(chan->conn),
L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, buf), buf);
+ l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
chan->num_conf_req++;
}
}
@@ -7465,7 +7471,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
set_bit(CONF_REQ_SENT, &chan->conf_state);
l2cap_send_cmd(conn, l2cap_get_ident(conn),
L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, buf),
+ l2cap_build_conf_req(chan, buf, sizeof(buf)),
buf);
chan->num_conf_req++;
}
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index aa4cf64e32a6..63e65d9b4b24 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -30,10 +30,10 @@
#include <net/bluetooth/bluetooth.h>
-void baswap(bdaddr_t *dst, bdaddr_t *src)
+void baswap(bdaddr_t *dst, const bdaddr_t *src)
{
- unsigned char *d = (unsigned char *) dst;
- unsigned char *s = (unsigned char *) src;
+ const unsigned char *s = (const unsigned char *)src;
+ unsigned char *d = (unsigned char *)dst;
unsigned int i;
for (i = 0; i < 6; i++)
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 1fba2a03f8ae..6e9fc86d8daf 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2159,8 +2159,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
key_count = __le16_to_cpu(cp->key_count);
if (key_count > max_key_count) {
- BT_ERR("load_link_keys: too big key_count value %u",
- key_count);
+ bt_dev_err(hdev, "load_link_keys: too big key_count value %u",
+ key_count);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -2168,8 +2168,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
expected_len = sizeof(*cp) + key_count *
sizeof(struct mgmt_link_key_info);
if (expected_len != len) {
- BT_ERR("load_link_keys: expected %u bytes, got %u bytes",
- expected_len, len);
+ bt_dev_err(hdev, "load_link_keys: expected %u bytes, got %u bytes",
+ expected_len, len);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -2561,7 +2561,7 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data,
memcpy(&ncp.addr, &cp->addr, sizeof(ncp.addr));
- BT_ERR("PIN code is not 16 bytes long");
+ bt_dev_err(hdev, "PIN code is not 16 bytes long");
err = send_pin_code_neg_reply(sk, hdev, &ncp);
if (err >= 0)
@@ -3391,7 +3391,8 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
MGMT_OP_ADD_REMOTE_OOB_DATA,
status, &cp->addr, sizeof(cp->addr));
} else {
- BT_ERR("add_remote_oob_data: invalid length of %u bytes", len);
+ bt_dev_err(hdev, "add_remote_oob_data: invalid len of %u bytes",
+ len);
err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -3604,8 +3605,8 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
uuid_count = __le16_to_cpu(cp->uuid_count);
if (uuid_count > max_uuid_count) {
- BT_ERR("service_discovery: too big uuid_count value %u",
- uuid_count);
+ bt_dev_err(hdev, "service_discovery: too big uuid_count value %u",
+ uuid_count);
err = mgmt_cmd_complete(sk, hdev->id,
MGMT_OP_START_SERVICE_DISCOVERY,
MGMT_STATUS_INVALID_PARAMS, &cp->type,
@@ -3615,8 +3616,8 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
expected_len = sizeof(*cp) + uuid_count * 16;
if (expected_len != len) {
- BT_ERR("service_discovery: expected %u bytes, got %u bytes",
- expected_len, len);
+ bt_dev_err(hdev, "service_discovery: expected %u bytes, got %u bytes",
+ expected_len, len);
err = mgmt_cmd_complete(sk, hdev->id,
MGMT_OP_START_SERVICE_DISCOVERY,
MGMT_STATUS_INVALID_PARAMS, &cp->type,
@@ -3943,7 +3944,7 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status,
err = hci_req_run(&req, enable_advertising_instance);
if (err)
- BT_ERR("Failed to re-configure advertising");
+ bt_dev_err(hdev, "failed to re-configure advertising");
unlock:
hci_dev_unlock(hdev);
@@ -4664,15 +4665,16 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
irk_count = __le16_to_cpu(cp->irk_count);
if (irk_count > max_irk_count) {
- BT_ERR("load_irks: too big irk_count value %u", irk_count);
+ bt_dev_err(hdev, "load_irks: too big irk_count value %u",
+ irk_count);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS,
MGMT_STATUS_INVALID_PARAMS);
}
expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info);
if (expected_len != len) {
- BT_ERR("load_irks: expected %u bytes, got %u bytes",
- expected_len, len);
+ bt_dev_err(hdev, "load_irks: expected %u bytes, got %u bytes",
+ expected_len, len);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -4745,7 +4747,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
key_count = __le16_to_cpu(cp->key_count);
if (key_count > max_key_count) {
- BT_ERR("load_ltks: too big key_count value %u", key_count);
+ bt_dev_err(hdev, "load_ltks: too big key_count value %u",
+ key_count);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -4753,8 +4756,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
expected_len = sizeof(*cp) + key_count *
sizeof(struct mgmt_ltk_info);
if (expected_len != len) {
- BT_ERR("load_keys: expected %u bytes, got %u bytes",
- expected_len, len);
+ bt_dev_err(hdev, "load_keys: expected %u bytes, got %u bytes",
+ expected_len, len);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -4873,14 +4876,15 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status,
}
if (!cp) {
- BT_ERR("invalid sent_cmd in conn_info response");
+ bt_dev_err(hdev, "invalid sent_cmd in conn_info response");
goto unlock;
}
handle = __le16_to_cpu(cp->handle);
conn = hci_conn_hash_lookup_handle(hdev, handle);
if (!conn) {
- BT_ERR("unknown handle (%d) in conn_info response", handle);
+ bt_dev_err(hdev, "unknown handle (%d) in conn_info response",
+ handle);
goto unlock;
}
@@ -5477,8 +5481,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
param_count = __le16_to_cpu(cp->param_count);
if (param_count > max_param_count) {
- BT_ERR("load_conn_param: too big param_count value %u",
- param_count);
+ bt_dev_err(hdev, "load_conn_param: too big param_count value %u",
+ param_count);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -5486,8 +5490,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
expected_len = sizeof(*cp) + param_count *
sizeof(struct mgmt_conn_param);
if (expected_len != len) {
- BT_ERR("load_conn_param: expected %u bytes, got %u bytes",
- expected_len, len);
+ bt_dev_err(hdev, "load_conn_param: expected %u bytes, got %u bytes",
+ expected_len, len);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -5512,7 +5516,7 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
} else if (param->addr.type == BDADDR_LE_RANDOM) {
addr_type = ADDR_LE_DEV_RANDOM;
} else {
- BT_ERR("Ignoring invalid connection parameters");
+ bt_dev_err(hdev, "ignoring invalid connection parameters");
continue;
}
@@ -5525,14 +5529,14 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
min, max, latency, timeout);
if (hci_check_conn_params(min, max, latency, timeout) < 0) {
- BT_ERR("Ignoring invalid connection parameters");
+ bt_dev_err(hdev, "ignoring invalid connection parameters");
continue;
}
hci_param = hci_conn_params_add(hdev, &param->addr.bdaddr,
addr_type);
if (!hci_param) {
- BT_ERR("Failed to add connection parameters");
+ bt_dev_err(hdev, "failed to add connection parameters");
continue;
}
@@ -6383,6 +6387,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
if (skb_queue_empty(&req.cmd_q) ||
!hdev_is_powered(hdev) ||
hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
+ hci_req_purge(&req);
rp.instance = cp->instance;
err = mgmt_cmd_complete(sk, hdev->id,
MGMT_OP_REMOVE_ADVERTISING,
diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c
index ee92c925ecc5..03e3c89c3046 100644
--- a/net/bluetooth/selftest.c
+++ b/net/bluetooth/selftest.c
@@ -138,12 +138,12 @@ static const u8 dhkey_3[32] __initconst = {
0x7c, 0x1c, 0xf9, 0x49, 0xe6, 0xd7, 0xaa, 0x70,
};
-static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32],
- const u8 pub_a[64], const u8 pub_b[64],
- const u8 dhkey[32])
+static int __init test_ecdh_sample(struct crypto_kpp *tfm, const u8 priv_a[32],
+ const u8 priv_b[32], const u8 pub_a[64],
+ const u8 pub_b[64], const u8 dhkey[32])
{
u8 *tmp, *dhkey_a, *dhkey_b;
- int ret = 0;
+ int ret;
tmp = kmalloc(64, GFP_KERNEL);
if (!tmp)
@@ -152,19 +152,32 @@ static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32],
dhkey_a = &tmp[0];
dhkey_b = &tmp[32];
- compute_ecdh_secret(pub_b, priv_a, dhkey_a);
- compute_ecdh_secret(pub_a, priv_b, dhkey_b);
+ ret = set_ecdh_privkey(tfm, priv_a);
+ if (ret)
+ goto out;
+
+ ret = compute_ecdh_secret(tfm, pub_b, dhkey_a);
+ if (ret)
+ goto out;
if (memcmp(dhkey_a, dhkey, 32)) {
ret = -EINVAL;
goto out;
}
+ ret = set_ecdh_privkey(tfm, priv_b);
+ if (ret)
+ goto out;
+
+ ret = compute_ecdh_secret(tfm, pub_a, dhkey_b);
+ if (ret)
+ goto out;
+
if (memcmp(dhkey_b, dhkey, 32))
ret = -EINVAL;
-
+ /* fall through*/
out:
- kfree(dhkey_a);
+ kfree(tmp);
return ret;
}
@@ -185,30 +198,43 @@ static const struct file_operations test_ecdh_fops = {
static int __init test_ecdh(void)
{
+ struct crypto_kpp *tfm;
ktime_t calltime, delta, rettime;
- unsigned long long duration;
+ unsigned long long duration = 0;
int err;
calltime = ktime_get();
- err = test_ecdh_sample(priv_a_1, priv_b_1, pub_a_1, pub_b_1, dhkey_1);
+ tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ if (IS_ERR(tfm)) {
+ BT_ERR("Unable to create ECDH crypto context");
+ err = PTR_ERR(tfm);
+ goto done;
+ }
+
+ err = test_ecdh_sample(tfm, priv_a_1, priv_b_1, pub_a_1, pub_b_1,
+ dhkey_1);
if (err) {
BT_ERR("ECDH sample 1 failed");
goto done;
}
- err = test_ecdh_sample(priv_a_2, priv_b_2, pub_a_2, pub_b_2, dhkey_2);
+ err = test_ecdh_sample(tfm, priv_a_2, priv_b_2, pub_a_2, pub_b_2,
+ dhkey_2);
if (err) {
BT_ERR("ECDH sample 2 failed");
goto done;
}
- err = test_ecdh_sample(priv_a_3, priv_a_3, pub_a_3, pub_a_3, dhkey_3);
+ err = test_ecdh_sample(tfm, priv_a_3, priv_a_3, pub_a_3, pub_a_3,
+ dhkey_3);
if (err) {
BT_ERR("ECDH sample 3 failed");
goto done;
}
+ crypto_free_kpp(tfm);
+
rettime = ktime_get();
delta = ktime_sub(rettime, calltime);
duration = (unsigned long long) ktime_to_ns(delta) >> 10;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index a0ef89772c36..01117ae84f1d 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -26,6 +26,7 @@
#include <crypto/algapi.h>
#include <crypto/b128ops.h>
#include <crypto/hash.h>
+#include <crypto/kpp.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -83,7 +84,6 @@ enum {
struct smp_dev {
/* Secure Connections OOB data */
u8 local_pk[64];
- u8 local_sk[32];
u8 local_rand[16];
bool debug_key;
@@ -92,6 +92,7 @@ struct smp_dev {
struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
+ struct crypto_kpp *tfm_ecdh;
};
struct smp_chan {
@@ -124,13 +125,13 @@ struct smp_chan {
/* Secure Connections variables */
u8 local_pk[64];
- u8 local_sk[32];
u8 remote_pk[64];
u8 dhkey[32];
u8 mackey[16];
struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
+ struct crypto_kpp *tfm_ecdh;
};
/* These debug key values are defined in the SMP section of the core
@@ -565,22 +566,22 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
BT_DBG("Using debug keys");
+ err = set_ecdh_privkey(smp->tfm_ecdh, debug_sk);
+ if (err)
+ return err;
memcpy(smp->local_pk, debug_pk, 64);
- memcpy(smp->local_sk, debug_sk, 32);
smp->debug_key = true;
} else {
while (true) {
- /* Seed private key with random number */
- get_random_bytes(smp->local_sk, 32);
-
- /* Generate local key pair for Secure Connections */
- if (!generate_ecdh_keys(smp->local_pk, smp->local_sk))
- return -EIO;
+ /* Generate key pair for Secure Connections */
+ err = generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk);
+ if (err)
+ return err;
/* This is unlikely, but we need to check that
* we didn't accidentially generate a debug key.
*/
- if (crypto_memneq(smp->local_sk, debug_sk, 32))
+ if (crypto_memneq(smp->local_pk, debug_pk, 64))
break;
}
smp->debug_key = false;
@@ -588,7 +589,6 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
SMP_DBG("OOB Public Key X: %32phN", smp->local_pk);
SMP_DBG("OOB Public Key Y: %32phN", smp->local_pk + 32);
- SMP_DBG("OOB Private Key: %32phN", smp->local_sk);
get_random_bytes(smp->local_rand, 16);
@@ -771,6 +771,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
crypto_free_cipher(smp->tfm_aes);
crypto_free_shash(smp->tfm_cmac);
+ crypto_free_kpp(smp->tfm_ecdh);
/* Ensure that we don't leave any debug key around if debug key
* support hasn't been explicitly enabled.
@@ -995,7 +996,8 @@ static u8 smp_random(struct smp_chan *smp)
return SMP_UNSPECIFIED;
if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) {
- BT_ERR("Pairing failed (confirmation values mismatch)");
+ bt_dev_err(hcon->hdev, "pairing failed "
+ "(confirmation values mismatch)");
return SMP_CONFIRM_FAILED;
}
@@ -1209,7 +1211,7 @@ static void sc_generate_ltk(struct smp_chan *smp)
key = hci_find_link_key(hdev, &hcon->dst);
if (!key) {
- BT_ERR("%s No Link Key found to generate LTK", hdev->name);
+ bt_dev_err(hdev, "no Link Key found to generate LTK");
return;
}
@@ -1391,16 +1393,19 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(smp->tfm_aes)) {
BT_ERR("Unable to create AES crypto context");
- kzfree(smp);
- return NULL;
+ goto zfree_smp;
}
smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
if (IS_ERR(smp->tfm_cmac)) {
BT_ERR("Unable to create CMAC crypto context");
- crypto_free_cipher(smp->tfm_aes);
- kzfree(smp);
- return NULL;
+ goto free_cipher;
+ }
+
+ smp->tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ if (IS_ERR(smp->tfm_ecdh)) {
+ BT_ERR("Unable to create ECDH crypto context");
+ goto free_shash;
}
smp->conn = conn;
@@ -1413,6 +1418,14 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
hci_conn_hold(conn->hcon);
return smp;
+
+free_shash:
+ crypto_free_shash(smp->tfm_cmac);
+free_cipher:
+ crypto_free_cipher(smp->tfm_aes);
+zfree_smp:
+ kzfree(smp);
+ return NULL;
}
static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16])
@@ -1883,7 +1896,6 @@ static u8 sc_send_public_key(struct smp_chan *smp)
smp_dev = chan->data;
memcpy(smp->local_pk, smp_dev->local_pk, 64);
- memcpy(smp->local_sk, smp_dev->local_sk, 32);
memcpy(smp->lr, smp_dev->local_rand, 16);
if (smp_dev->debug_key)
@@ -1894,22 +1906,20 @@ static u8 sc_send_public_key(struct smp_chan *smp)
if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
BT_DBG("Using debug keys");
+ if (set_ecdh_privkey(smp->tfm_ecdh, debug_sk))
+ return SMP_UNSPECIFIED;
memcpy(smp->local_pk, debug_pk, 64);
- memcpy(smp->local_sk, debug_sk, 32);
set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags);
} else {
while (true) {
- /* Seed private key with random number */
- get_random_bytes(smp->local_sk, 32);
-
- /* Generate local key pair for Secure Connections */
- if (!generate_ecdh_keys(smp->local_pk, smp->local_sk))
+ /* Generate key pair for Secure Connections */
+ if (generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk))
return SMP_UNSPECIFIED;
/* This is unlikely, but we need to check that
* we didn't accidentially generate a debug key.
*/
- if (crypto_memneq(smp->local_sk, debug_sk, 32))
+ if (crypto_memneq(smp->local_pk, debug_pk, 64))
break;
}
}
@@ -1917,7 +1927,6 @@ static u8 sc_send_public_key(struct smp_chan *smp)
done:
SMP_DBG("Local Public Key X: %32phN", smp->local_pk);
SMP_DBG("Local Public Key Y: %32phN", smp->local_pk + 32);
- SMP_DBG("Local Private Key: %32phN", smp->local_sk);
smp_send_cmd(smp->conn, SMP_CMD_PUBLIC_KEY, 64, smp->local_pk);
@@ -2059,11 +2068,11 @@ static int fixup_sc_false_positive(struct smp_chan *smp)
return SMP_UNSPECIFIED;
if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) {
- BT_ERR("Refusing SMP SC -> legacy fallback in SC-only mode");
+ bt_dev_err(hdev, "refusing legacy fallback in SC-only mode");
return SMP_UNSPECIFIED;
}
- BT_ERR("Trying to fall back to legacy SMP");
+ bt_dev_err(hdev, "trying to fall back to legacy SMP");
req = (void *) &smp->preq[1];
rsp = (void *) &smp->prsp[1];
@@ -2074,7 +2083,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp)
auth = req->auth_req & AUTH_REQ_MASK(hdev);
if (tk_request(conn, 0, auth, rsp->io_capability, req->io_capability)) {
- BT_ERR("Failed to fall back to legacy SMP");
+ bt_dev_err(hdev, "failed to fall back to legacy SMP");
return SMP_UNSPECIFIED;
}
@@ -2347,7 +2356,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
chan = conn->smp;
if (!chan) {
- BT_ERR("SMP security requested but not available");
+ bt_dev_err(hcon->hdev, "security requested but not available");
return 1;
}
@@ -2540,7 +2549,7 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn,
*/
if (!bacmp(&info->bdaddr, BDADDR_ANY) ||
!hci_is_identity_address(&info->bdaddr, info->addr_type)) {
- BT_ERR("Ignoring IRK with no identity address");
+ bt_dev_err(hcon->hdev, "ignoring IRK with no identity address");
goto distribute;
}
@@ -2645,6 +2654,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
struct l2cap_chan *chan = conn->smp;
struct smp_chan *smp = chan->data;
struct hci_dev *hdev = hcon->hdev;
+ struct crypto_kpp *tfm_ecdh;
struct smp_cmd_pairing_confirm cfm;
int err;
@@ -2677,7 +2687,18 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk);
SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32);
- if (!compute_ecdh_secret(smp->remote_pk, smp->local_sk, smp->dhkey))
+ /* Compute the shared secret on the same crypto tfm on which the private
+ * key was set/generated.
+ */
+ if (test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) {
+ struct smp_dev *smp_dev = chan->data;
+
+ tfm_ecdh = smp_dev->tfm_ecdh;
+ } else {
+ tfm_ecdh = smp->tfm_ecdh;
+ }
+
+ if (compute_ecdh_secret(tfm_ecdh, smp->remote_pk, smp->dhkey))
return SMP_UNSPECIFIED;
SMP_DBG("DHKey %32phN", smp->dhkey);
@@ -2933,8 +2954,8 @@ done:
return err;
drop:
- BT_ERR("%s unexpected SMP command 0x%02x from %pMR", hcon->hdev->name,
- code, &hcon->dst);
+ bt_dev_err(hcon->hdev, "unexpected SMP command 0x%02x from %pMR",
+ code, &hcon->dst);
kfree_skb(skb);
return 0;
}
@@ -3001,8 +3022,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
smp = smp_chan_create(conn);
if (!smp) {
- BT_ERR("%s unable to create SMP context for BR/EDR",
- hdev->name);
+ bt_dev_err(hdev, "unable to create SMP context for BR/EDR");
return;
}
@@ -3169,6 +3189,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
struct smp_dev *smp;
struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
+ struct crypto_kpp *tfm_ecdh;
if (cid == L2CAP_CID_SMP_BREDR) {
smp = NULL;
@@ -3194,8 +3215,18 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
return ERR_CAST(tfm_cmac);
}
+ tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ if (IS_ERR(tfm_ecdh)) {
+ BT_ERR("Unable to create ECDH crypto context");
+ crypto_free_shash(tfm_cmac);
+ crypto_free_cipher(tfm_aes);
+ kzfree(smp);
+ return ERR_CAST(tfm_ecdh);
+ }
+
smp->tfm_aes = tfm_aes;
smp->tfm_cmac = tfm_cmac;
+ smp->tfm_ecdh = tfm_ecdh;
smp->min_key_size = SMP_MIN_ENC_KEY_SIZE;
smp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
@@ -3205,6 +3236,7 @@ create_chan:
if (smp) {
crypto_free_cipher(smp->tfm_aes);
crypto_free_shash(smp->tfm_cmac);
+ crypto_free_kpp(smp->tfm_ecdh);
kzfree(smp);
}
return ERR_PTR(-ENOMEM);
@@ -3252,6 +3284,7 @@ static void smp_del_chan(struct l2cap_chan *chan)
chan->data = NULL;
crypto_free_cipher(smp->tfm_aes);
crypto_free_shash(smp->tfm_cmac);
+ crypto_free_kpp(smp->tfm_ecdh);
kzfree(smp);
}
@@ -3490,25 +3523,18 @@ void smp_unregister(struct hci_dev *hdev)
#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
-static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits)
+static int __init test_debug_key(struct crypto_kpp *tfm_ecdh)
{
- int i;
-
- for (i = 0; i < ndigits; i++)
- out[i] = __swab64(in[ndigits - 1 - i]);
-}
-
-static int __init test_debug_key(void)
-{
- u8 pk[64], sk[32];
-
- swap_digits((u64 *)debug_sk, (u64 *)sk, 4);
+ u8 pk[64];
+ int err;
- if (!generate_ecdh_keys(pk, sk))
- return -EINVAL;
+ err = set_ecdh_privkey(tfm_ecdh, debug_sk);
+ if (err)
+ return err;
- if (crypto_memneq(sk, debug_sk, 32))
- return -EINVAL;
+ err = generate_ecdh_public_key(tfm_ecdh, pk);
+ if (err)
+ return err;
if (crypto_memneq(pk, debug_pk, 64))
return -EINVAL;
@@ -3763,7 +3789,8 @@ static const struct file_operations test_smp_fops = {
};
static int __init run_selftests(struct crypto_cipher *tfm_aes,
- struct crypto_shash *tfm_cmac)
+ struct crypto_shash *tfm_cmac,
+ struct crypto_kpp *tfm_ecdh)
{
ktime_t calltime, delta, rettime;
unsigned long long duration;
@@ -3771,7 +3798,7 @@ static int __init run_selftests(struct crypto_cipher *tfm_aes,
calltime = ktime_get();
- err = test_debug_key();
+ err = test_debug_key(tfm_ecdh);
if (err) {
BT_ERR("debug_key test failed");
goto done;
@@ -3848,6 +3875,7 @@ int __init bt_selftest_smp(void)
{
struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
+ struct crypto_kpp *tfm_ecdh;
int err;
tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
@@ -3863,10 +3891,19 @@ int __init bt_selftest_smp(void)
return PTR_ERR(tfm_cmac);
}
- err = run_selftests(tfm_aes, tfm_cmac);
+ tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ if (IS_ERR(tfm_ecdh)) {
+ BT_ERR("Unable to create ECDH crypto context");
+ crypto_free_shash(tfm_cmac);
+ crypto_free_cipher(tfm_aes);
+ return PTR_ERR(tfm_ecdh);
+ }
+
+ err = run_selftests(tfm_aes, tfm_cmac, tfm_ecdh);
crypto_free_shash(tfm_cmac);
crypto_free_cipher(tfm_aes);
+ crypto_free_kpp(tfm_ecdh);
return err;
}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 6be41a44d688..a86e6687026e 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -133,7 +133,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
if (is_l2)
__skb_push(skb, ETH_HLEN);
if (is_direct_pkt_access)
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
retval = bpf_test_run(prog, skb, repeat, &duration);
if (!is_l2)
__skb_push(skb, ETH_HLEN);
@@ -162,6 +162,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
xdp.data_hard_start = data;
xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN;
+ xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + size;
retval = bpf_test_run(prog, &xdp, repeat, &duration);
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 40b1ede527ca..ac9ef337f0fa 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the IEEE 802.1d ethernet bridging layer.
#
@@ -7,7 +8,7 @@ obj-$(CONFIG_BRIDGE) += bridge.o
bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
br_ioctl.o br_stp.o br_stp_bpdu.o \
br_stp_if.o br_stp_timer.o br_netlink.o \
- br_netlink_tunnel.o
+ br_netlink_tunnel.o br_arp_nd_proxy.o
bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1407d1ba7577..6bf06e756df2 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -112,7 +112,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
/* Events that may cause spanning tree to refresh */
if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
event == NETDEV_CHANGE || event == NETDEV_DOWN)
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
return NOTIFY_DONE;
}
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
new file mode 100644
index 000000000000..2cf7716254be
--- /dev/null
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -0,0 +1,469 @@
+/*
+ * Handle bridge arp/nd proxy/suppress
+ *
+ * Copyright (C) 2017 Cumulus Networks
+ * Copyright (c) 2017 Roopa Prabhu <roopa@cumulusnetworks.com>
+ *
+ * Authors:
+ * Roopa Prabhu <roopa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/neighbour.h>
+#include <net/arp.h>
+#include <linux/if_vlan.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_checksum.h>
+#endif
+
+#include "br_private.h"
+
+void br_recalculate_neigh_suppress_enabled(struct net_bridge *br)
+{
+ struct net_bridge_port *p;
+ bool neigh_suppress = false;
+
+ list_for_each_entry(p, &br->port_list, list) {
+ if (p->flags & BR_NEIGH_SUPPRESS) {
+ neigh_suppress = true;
+ break;
+ }
+ }
+
+ br->neigh_suppress_enabled = neigh_suppress;
+}
+
+#if IS_ENABLED(CONFIG_INET)
+static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p,
+ struct net_device *dev, __be32 dest_ip, __be32 src_ip,
+ const unsigned char *dest_hw,
+ const unsigned char *src_hw,
+ const unsigned char *target_hw,
+ __be16 vlan_proto, u16 vlan_tci)
+{
+ struct net_bridge_vlan_group *vg;
+ struct sk_buff *skb;
+ u16 pvid;
+
+ netdev_dbg(dev, "arp send dev %s dst %pI4 dst_hw %pM src %pI4 src_hw %pM\n",
+ dev->name, &dest_ip, dest_hw, &src_ip, src_hw);
+
+ if (!vlan_tci) {
+ arp_send(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip,
+ dest_hw, src_hw, target_hw);
+ return;
+ }
+
+ skb = arp_create(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip,
+ dest_hw, src_hw, target_hw);
+ if (!skb)
+ return;
+
+ if (p)
+ vg = nbp_vlan_group_rcu(p);
+ else
+ vg = br_vlan_group_rcu(br);
+ pvid = br_get_pvid(vg);
+ if (pvid == (vlan_tci & VLAN_VID_MASK))
+ vlan_tci = 0;
+
+ if (vlan_tci)
+ __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
+
+ if (p) {
+ arp_xmit(skb);
+ } else {
+ skb_reset_mac_header(skb);
+ __skb_pull(skb, skb_network_offset(skb));
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->pkt_type = PACKET_HOST;
+
+ netif_rx_ni(skb);
+ }
+}
+
+static int br_chk_addr_ip(struct net_device *dev, void *data)
+{
+ __be32 ip = *(__be32 *)data;
+ struct in_device *in_dev;
+ __be32 addr = 0;
+
+ in_dev = __in_dev_get_rcu(dev);
+ if (in_dev)
+ addr = inet_confirm_addr(dev_net(dev), in_dev, 0, ip,
+ RT_SCOPE_HOST);
+
+ if (addr == ip)
+ return 1;
+
+ return 0;
+}
+
+static bool br_is_local_ip(struct net_device *dev, __be32 ip)
+{
+ if (br_chk_addr_ip(dev, &ip))
+ return true;
+
+ /* check if ip is configured on upper dev */
+ if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &ip))
+ return true;
+
+ return false;
+}
+
+void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
+ u16 vid, struct net_bridge_port *p)
+{
+ struct net_device *dev = br->dev;
+ struct net_device *vlandev = dev;
+ struct neighbour *n;
+ struct arphdr *parp;
+ u8 *arpptr, *sha;
+ __be32 sip, tip;
+
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+
+ if ((dev->flags & IFF_NOARP) ||
+ !pskb_may_pull(skb, arp_hdr_len(dev)))
+ return;
+
+ parp = arp_hdr(skb);
+
+ if (parp->ar_pro != htons(ETH_P_IP) ||
+ parp->ar_hln != dev->addr_len ||
+ parp->ar_pln != 4)
+ return;
+
+ arpptr = (u8 *)parp + sizeof(struct arphdr);
+ sha = arpptr;
+ arpptr += dev->addr_len; /* sha */
+ memcpy(&sip, arpptr, sizeof(sip));
+ arpptr += sizeof(sip);
+ arpptr += dev->addr_len; /* tha */
+ memcpy(&tip, arpptr, sizeof(tip));
+
+ if (ipv4_is_loopback(tip) ||
+ ipv4_is_multicast(tip))
+ return;
+
+ if (br->neigh_suppress_enabled) {
+ if (p && (p->flags & BR_NEIGH_SUPPRESS))
+ return;
+ if (ipv4_is_zeronet(sip) || sip == tip) {
+ /* prevent flooding to neigh suppress ports */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+ }
+
+ if (parp->ar_op != htons(ARPOP_REQUEST))
+ return;
+
+ if (vid != 0) {
+ vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto,
+ vid);
+ if (!vlandev)
+ return;
+ }
+
+ if (br->neigh_suppress_enabled && br_is_local_ip(vlandev, tip)) {
+ /* its our local ip, so don't proxy reply
+ * and don't forward to neigh suppress ports
+ */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+
+ n = neigh_lookup(&arp_tbl, &tip, vlandev);
+ if (n) {
+ struct net_bridge_fdb_entry *f;
+
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_release(n);
+ return;
+ }
+
+ f = br_fdb_find_rcu(br, n->ha, vid);
+ if (f) {
+ bool replied = false;
+
+ if ((p && (p->flags & BR_PROXYARP)) ||
+ (f->dst && (f->dst->flags & (BR_PROXYARP_WIFI |
+ BR_NEIGH_SUPPRESS)))) {
+ if (!vid)
+ br_arp_send(br, p, skb->dev, sip, tip,
+ sha, n->ha, sha, 0, 0);
+ else
+ br_arp_send(br, p, skb->dev, sip, tip,
+ sha, n->ha, sha,
+ skb->vlan_proto,
+ skb_vlan_tag_get(skb));
+ replied = true;
+ }
+
+ /* If we have replied or as long as we know the
+ * mac, indicate to arp replied
+ */
+ if (replied || br->neigh_suppress_enabled)
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ }
+
+ neigh_release(n);
+ }
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *msg)
+{
+ struct nd_msg *m;
+
+ m = skb_header_pointer(skb, skb_network_offset(skb) +
+ sizeof(struct ipv6hdr), sizeof(*msg), msg);
+ if (!m)
+ return NULL;
+
+ if (m->icmph.icmp6_code != 0 ||
+ (m->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION &&
+ m->icmph.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT))
+ return NULL;
+
+ return m;
+}
+
+static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
+ struct sk_buff *request, struct neighbour *n,
+ __be16 vlan_proto, u16 vlan_tci, struct nd_msg *ns)
+{
+ struct net_device *dev = request->dev;
+ struct net_bridge_vlan_group *vg;
+ struct sk_buff *reply;
+ struct nd_msg *na;
+ struct ipv6hdr *pip6;
+ int na_olen = 8; /* opt hdr + ETH_ALEN for target */
+ int ns_olen;
+ int i, len;
+ u8 *daddr;
+ u16 pvid;
+
+ if (!dev)
+ return;
+
+ len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
+ sizeof(*na) + na_olen + dev->needed_tailroom;
+
+ reply = alloc_skb(len, GFP_ATOMIC);
+ if (!reply)
+ return;
+
+ reply->protocol = htons(ETH_P_IPV6);
+ reply->dev = dev;
+ skb_reserve(reply, LL_RESERVED_SPACE(dev));
+ skb_push(reply, sizeof(struct ethhdr));
+ skb_set_mac_header(reply, 0);
+
+ daddr = eth_hdr(request)->h_source;
+
+ /* Do we need option processing ? */
+ ns_olen = request->len - (skb_network_offset(request) +
+ sizeof(struct ipv6hdr)) - sizeof(*ns);
+ for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) {
+ if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
+ daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
+ break;
+ }
+ }
+
+ /* Ethernet header */
+ ether_addr_copy(eth_hdr(reply)->h_dest, daddr);
+ ether_addr_copy(eth_hdr(reply)->h_source, n->ha);
+ eth_hdr(reply)->h_proto = htons(ETH_P_IPV6);
+ reply->protocol = htons(ETH_P_IPV6);
+
+ skb_pull(reply, sizeof(struct ethhdr));
+ skb_set_network_header(reply, 0);
+ skb_put(reply, sizeof(struct ipv6hdr));
+
+ /* IPv6 header */
+ pip6 = ipv6_hdr(reply);
+ memset(pip6, 0, sizeof(struct ipv6hdr));
+ pip6->version = 6;
+ pip6->priority = ipv6_hdr(request)->priority;
+ pip6->nexthdr = IPPROTO_ICMPV6;
+ pip6->hop_limit = 255;
+ pip6->daddr = ipv6_hdr(request)->saddr;
+ pip6->saddr = *(struct in6_addr *)n->primary_key;
+
+ skb_pull(reply, sizeof(struct ipv6hdr));
+ skb_set_transport_header(reply, 0);
+
+ na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen);
+
+ /* Neighbor Advertisement */
+ memset(na, 0, sizeof(*na) + na_olen);
+ na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
+ na->icmph.icmp6_router = 0; /* XXX: should be 1 ? */
+ na->icmph.icmp6_override = 1;
+ na->icmph.icmp6_solicited = 1;
+ na->target = ns->target;
+ ether_addr_copy(&na->opt[2], n->ha);
+ na->opt[0] = ND_OPT_TARGET_LL_ADDR;
+ na->opt[1] = na_olen >> 3;
+
+ na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr,
+ &pip6->daddr,
+ sizeof(*na) + na_olen,
+ IPPROTO_ICMPV6,
+ csum_partial(na, sizeof(*na) + na_olen, 0));
+
+ pip6->payload_len = htons(sizeof(*na) + na_olen);
+
+ skb_push(reply, sizeof(struct ipv6hdr));
+ skb_push(reply, sizeof(struct ethhdr));
+
+ reply->ip_summed = CHECKSUM_UNNECESSARY;
+
+ if (p)
+ vg = nbp_vlan_group_rcu(p);
+ else
+ vg = br_vlan_group_rcu(br);
+ pvid = br_get_pvid(vg);
+ if (pvid == (vlan_tci & VLAN_VID_MASK))
+ vlan_tci = 0;
+
+ if (vlan_tci)
+ __vlan_hwaccel_put_tag(reply, vlan_proto, vlan_tci);
+
+ netdev_dbg(dev, "nd send dev %s dst %pI6 dst_hw %pM src %pI6 src_hw %pM\n",
+ dev->name, &pip6->daddr, daddr, &pip6->saddr, n->ha);
+
+ if (p) {
+ dev_queue_xmit(reply);
+ } else {
+ skb_reset_mac_header(reply);
+ __skb_pull(reply, skb_network_offset(reply));
+ reply->ip_summed = CHECKSUM_UNNECESSARY;
+ reply->pkt_type = PACKET_HOST;
+
+ netif_rx_ni(reply);
+ }
+}
+
+static int br_chk_addr_ip6(struct net_device *dev, void *data)
+{
+ struct in6_addr *addr = (struct in6_addr *)data;
+
+ if (ipv6_chk_addr(dev_net(dev), addr, dev, 0))
+ return 1;
+
+ return 0;
+}
+
+static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr)
+
+{
+ if (br_chk_addr_ip6(dev, addr))
+ return true;
+
+ /* check if ip is configured on upper dev */
+ if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, addr))
+ return true;
+
+ return false;
+}
+
+void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
+ u16 vid, struct net_bridge_port *p, struct nd_msg *msg)
+{
+ struct net_device *dev = br->dev;
+ struct net_device *vlandev = NULL;
+ struct in6_addr *saddr, *daddr;
+ struct ipv6hdr *iphdr;
+ struct neighbour *n;
+
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+
+ if (p && (p->flags & BR_NEIGH_SUPPRESS))
+ return;
+
+ if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT &&
+ !msg->icmph.icmp6_solicited) {
+ /* prevent flooding to neigh suppress ports */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+
+ if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
+ return;
+
+ iphdr = ipv6_hdr(skb);
+ saddr = &iphdr->saddr;
+ daddr = &iphdr->daddr;
+
+ if (ipv6_addr_any(saddr) || !ipv6_addr_cmp(saddr, daddr)) {
+ /* prevent flooding to neigh suppress ports */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+
+ if (vid != 0) {
+ /* build neigh table lookup on the vlan device */
+ vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto,
+ vid);
+ if (!vlandev)
+ return;
+ } else {
+ vlandev = dev;
+ }
+
+ if (br_is_local_ip6(vlandev, &msg->target)) {
+ /* its our own ip, so don't proxy reply
+ * and don't forward to arp suppress ports
+ */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+
+ n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, vlandev);
+ if (n) {
+ struct net_bridge_fdb_entry *f;
+
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_release(n);
+ return;
+ }
+
+ f = br_fdb_find_rcu(br, n->ha, vid);
+ if (f) {
+ bool replied = false;
+
+ if (f->dst && (f->dst->flags & BR_NEIGH_SUPPRESS)) {
+ if (vid != 0)
+ br_nd_send(br, p, skb, n,
+ skb->vlan_proto,
+ skb_vlan_tag_get(skb), msg);
+ else
+ br_nd_send(br, p, skb, n, 0, 0, msg);
+ replied = true;
+ }
+
+ /* If we have replied or as long as we know the
+ * mac, indicate to NEIGH_SUPPRESS ports that we
+ * have replied
+ */
+ if (replied || br->neigh_suppress_enabled)
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ }
+ neigh_release(n);
+ }
+}
+#endif
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 861ae2a165f4..af5b8c87f590 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -39,6 +39,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
const struct nf_br_ops *nf_ops;
const unsigned char *dest;
+ struct ethhdr *eth;
u16 vid = 0;
rcu_read_lock();
@@ -53,14 +54,34 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
brstats->tx_bytes += skb->len;
u64_stats_update_end(&brstats->syncp);
+ br_switchdev_frame_unmark(skb);
BR_INPUT_SKB_CB(skb)->brdev = dev;
skb_reset_mac_header(skb);
+ eth = eth_hdr(skb);
skb_pull(skb, ETH_HLEN);
if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
goto out;
+ if (IS_ENABLED(CONFIG_INET) &&
+ (eth->h_proto == htons(ETH_P_ARP) ||
+ eth->h_proto == htons(ETH_P_RARP)) &&
+ br->neigh_suppress_enabled) {
+ br_do_proxy_suppress_arp(skb, br, vid, NULL);
+ } else if (IS_ENABLED(CONFIG_IPV6) &&
+ skb->protocol == htons(ETH_P_IPV6) &&
+ br->neigh_suppress_enabled &&
+ pskb_may_pull(skb, sizeof(struct ipv6hdr) +
+ sizeof(struct nd_msg)) &&
+ ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
+ struct nd_msg *msg, _msg;
+
+ msg = br_is_nd_neigh_msg(skb, &_msg);
+ if (msg)
+ br_do_suppress_nd(skb, br, vid, NULL, msg);
+ }
+
dest = eth_hdr(skb)->h_dest;
if (is_broadcast_ether_addr(dest)) {
br_flood(br, skb, BR_PKT_BROADCAST, false, true);
@@ -319,12 +340,13 @@ void br_netpoll_disable(struct net_bridge_port *p)
#endif
-static int br_add_slave(struct net_device *dev, struct net_device *slave_dev)
+static int br_add_slave(struct net_device *dev, struct net_device *slave_dev,
+ struct netlink_ext_ack *extack)
{
struct net_bridge *br = netdev_priv(dev);
- return br_add_if(br, slave_dev);
+ return br_add_if(br, slave_dev, extack);
}
static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
@@ -399,7 +421,7 @@ void br_dev_setup(struct net_device *dev)
br->bridge_id.prio[0] = 0x80;
br->bridge_id.prio[1] = 0x00;
- ether_addr_copy(br->group_addr, eth_reserved_addr_base);
+ ether_addr_copy(br->group_addr, eth_stp_addr);
br->stp_enabled = BR_NO_STP;
br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index a5e4a736a984..4ea5c8bbe286 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -25,6 +25,7 @@
#include <asm/unaligned.h>
#include <linux/if_vlan.h>
#include <net/switchdev.h>
+#include <trace/events/bridge.h>
#include "br_private.h"
static struct kmem_cache *br_fdb_cache __read_mostly;
@@ -169,29 +170,13 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr)
}
}
-static void fdb_del_external_learn(struct net_bridge_fdb_entry *f)
-{
- struct switchdev_obj_port_fdb fdb = {
- .obj = {
- .orig_dev = f->dst->dev,
- .id = SWITCHDEV_OBJ_ID_PORT_FDB,
- .flags = SWITCHDEV_F_DEFER,
- },
- .vid = f->vlan_id,
- };
-
- ether_addr_copy(fdb.addr, f->addr.addr);
- switchdev_port_obj_del(f->dst->dev, &fdb.obj);
-}
-
static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
{
+ trace_fdb_delete(br, f);
+
if (f->is_static)
fdb_del_hw_addr(br, f->addr.addr);
- if (f->added_by_external_learn)
- fdb_del_external_learn(f);
-
hlist_del_init_rcu(&f->hlist);
fdb_notify(br, f, RTM_DELNEIGH);
call_rcu(&f->rcu, fdb_rcu_free);
@@ -598,8 +583,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
fdb->updated = now;
if (unlikely(added_by_user))
fdb->added_by_user = 1;
- if (unlikely(fdb_modified))
+ if (unlikely(fdb_modified)) {
+ trace_br_fdb_update(br, source, addr, vid, added_by_user);
fdb_notify(br, fdb, RTM_NEWNEIGH);
+ }
}
} else {
spin_lock(&br->hash_lock);
@@ -608,6 +595,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
if (fdb) {
if (unlikely(added_by_user))
fdb->added_by_user = 1;
+ trace_br_fdb_update(br, source, addr, vid, added_by_user);
fdb_notify(br, fdb, RTM_NEWNEIGH);
}
}
@@ -888,6 +876,8 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_bridge *br = NULL;
int err = 0;
+ trace_br_fdb_add(ndm, dev, addr, vid, nlh_flags);
+
if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
return -EINVAL;
@@ -1084,6 +1074,8 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
bool modified = false;
int err = 0;
+ trace_br_fdb_external_learn_add(br, p, addr, vid);
+
spin_lock_bh(&br->hash_lock);
head = &br->hash[br_mac_hash(addr, vid)];
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 48fb17417fac..b4eed113d2ec 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -204,7 +204,7 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
/* Do not flood to ports that enable proxy ARP */
if (p->flags & BR_PROXYARP)
continue;
- if ((p->flags & BR_PROXYARP_WIFI) &&
+ if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) &&
BR_INPUT_SKB_CB(skb)->proxyarp_replied)
continue;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f3aef22931ab..9ba4ed65c52b 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -271,7 +271,7 @@ static void del_nbp(struct net_bridge_port *p)
br_stp_disable_port(p);
spin_unlock_bh(&br->lock);
- br_ifinfo_notify(RTM_DELLINK, p);
+ br_ifinfo_notify(RTM_DELLINK, NULL, p);
list_del_rcu(&p->list);
if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom)
@@ -310,6 +310,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
del_nbp(p);
}
+ br_recalculate_neigh_suppress_enabled(br);
+
br_fdb_delete_by_port(br, NULL, 0, 1);
cancel_delayed_work_sync(&br->gc_work);
@@ -480,7 +482,8 @@ netdev_features_t br_features_recompute(struct net_bridge *br,
}
/* called with RTNL */
-int br_add_if(struct net_bridge *br, struct net_device *dev)
+int br_add_if(struct net_bridge *br, struct net_device *dev,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_port *p;
int err = 0;
@@ -500,16 +503,22 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
return -EINVAL;
/* No bridging of bridges */
- if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
+ if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
+ NL_SET_ERR_MSG(extack,
+ "Can not enslave a bridge to a bridge");
return -ELOOP;
+ }
/* Device is already being bridged */
if (br_port_exists(dev))
return -EBUSY;
/* No bridging devices that dislike that (e.g. wireless) */
- if (dev->priv_flags & IFF_DONT_BRIDGE)
+ if (dev->priv_flags & IFF_DONT_BRIDGE) {
+ NL_SET_ERR_MSG(extack,
+ "Device does not allow enslaving to a bridge");
return -EOPNOTSUPP;
+ }
p = new_nbp(br, dev);
if (IS_ERR(p))
@@ -540,7 +549,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
dev->priv_flags |= IFF_BRIDGE_PORT;
- err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL);
+ err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack);
if (err)
goto err5;
@@ -580,7 +589,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
br_stp_enable_port(p);
spin_unlock_bh(&br->lock);
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
if (changed_addr)
call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
@@ -653,4 +662,7 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
if (mask & BR_AUTO_MASK)
nbp_update_port_count(br);
+
+ if (mask & BR_NEIGH_SUPPRESS)
+ br_recalculate_neigh_suppress_enabled(br);
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 7637f58c1226..7f98a7d25866 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -71,62 +71,6 @@ static int br_pass_frame_up(struct sk_buff *skb)
br_netif_receive_skb);
}
-static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
- u16 vid, struct net_bridge_port *p)
-{
- struct net_device *dev = br->dev;
- struct neighbour *n;
- struct arphdr *parp;
- u8 *arpptr, *sha;
- __be32 sip, tip;
-
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
-
- if ((dev->flags & IFF_NOARP) ||
- !pskb_may_pull(skb, arp_hdr_len(dev)))
- return;
-
- parp = arp_hdr(skb);
-
- if (parp->ar_pro != htons(ETH_P_IP) ||
- parp->ar_op != htons(ARPOP_REQUEST) ||
- parp->ar_hln != dev->addr_len ||
- parp->ar_pln != 4)
- return;
-
- arpptr = (u8 *)parp + sizeof(struct arphdr);
- sha = arpptr;
- arpptr += dev->addr_len; /* sha */
- memcpy(&sip, arpptr, sizeof(sip));
- arpptr += sizeof(sip);
- arpptr += dev->addr_len; /* tha */
- memcpy(&tip, arpptr, sizeof(tip));
-
- if (ipv4_is_loopback(tip) ||
- ipv4_is_multicast(tip))
- return;
-
- n = neigh_lookup(&arp_tbl, &tip, dev);
- if (n) {
- struct net_bridge_fdb_entry *f;
-
- if (!(n->nud_state & NUD_VALID)) {
- neigh_release(n);
- return;
- }
-
- f = br_fdb_find_rcu(br, n->ha, vid);
- if (f && ((p->flags & BR_PROXYARP) ||
- (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) {
- arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip,
- sha, n->ha, sha);
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
- }
-
- neigh_release(n);
- }
-}
-
/* note: already called with rcu_read_lock */
int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
@@ -171,15 +115,29 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
BR_INPUT_SKB_CB(skb)->brdev = br->dev;
- if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
- br_do_proxy_arp(skb, br, vid, p);
+ if (IS_ENABLED(CONFIG_INET) &&
+ (skb->protocol == htons(ETH_P_ARP) ||
+ skb->protocol == htons(ETH_P_RARP))) {
+ br_do_proxy_suppress_arp(skb, br, vid, p);
+ } else if (IS_ENABLED(CONFIG_IPV6) &&
+ skb->protocol == htons(ETH_P_IPV6) &&
+ br->neigh_suppress_enabled &&
+ pskb_may_pull(skb, sizeof(struct ipv6hdr) +
+ sizeof(struct nd_msg)) &&
+ ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
+ struct nd_msg *msg, _msg;
+
+ msg = br_is_nd_neigh_msg(skb, &_msg);
+ if (msg)
+ br_do_suppress_nd(skb, br, vid, p, msg);
+ }
switch (pkt_type) {
case BR_PKT_MULTICAST:
mdst = br_mdb_get(br, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
br_multicast_querier_exists(br, eth_hdr(skb))) {
- if ((mdst && mdst->mglist) ||
+ if ((mdst && mdst->host_joined) ||
br_multicast_is_router(br)) {
local_rcv = true;
br->dev->stats.multicast++;
@@ -289,6 +247,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
*
* Others reserved for future standardization
*/
+ fwd_mask |= p->group_fwd_mask;
switch (dest[5]) {
case 0x00: /* Bridge Group Address */
/* If STP is turned off,
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 7970f8540cbb..73b957fd639d 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -98,7 +98,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
return -EINVAL;
if (isadd)
- ret = br_add_if(br, dev);
+ ret = br_add_if(br, dev, NULL);
else
ret = br_del_if(br, dev);
@@ -293,7 +293,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
if (!ret) {
if (p)
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
else
netdev_state_change(br->dev);
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index a0b11e7d67d9..b0f4c734900b 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/err.h>
#include <linux/igmp.h>
#include <linux/kernel.h>
@@ -291,6 +292,46 @@ err:
kfree(priv);
}
+static void br_mdb_switchdev_host_port(struct net_device *dev,
+ struct net_device *lower_dev,
+ struct br_mdb_entry *entry, int type)
+{
+ struct switchdev_obj_port_mdb mdb = {
+ .obj = {
+ .id = SWITCHDEV_OBJ_ID_HOST_MDB,
+ .flags = SWITCHDEV_F_DEFER,
+ },
+ .vid = entry->vid,
+ };
+
+ if (entry->addr.proto == htons(ETH_P_IP))
+ ip_eth_mc_map(entry->addr.u.ip4, mdb.addr);
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr);
+#endif
+
+ mdb.obj.orig_dev = dev;
+ switch (type) {
+ case RTM_NEWMDB:
+ switchdev_port_obj_add(lower_dev, &mdb.obj);
+ break;
+ case RTM_DELMDB:
+ switchdev_port_obj_del(lower_dev, &mdb.obj);
+ break;
+ }
+}
+
+static void br_mdb_switchdev_host(struct net_device *dev,
+ struct br_mdb_entry *entry, int type)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter)
+ br_mdb_switchdev_host_port(dev, lower_dev, entry, type);
+}
+
static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
struct br_mdb_entry *entry, int type)
{
@@ -316,7 +357,7 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
#endif
mdb.obj.orig_dev = port_dev;
- if (port_dev && type == RTM_NEWMDB) {
+ if (p && port_dev && type == RTM_NEWMDB) {
complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
if (complete_info) {
complete_info->port = p;
@@ -326,10 +367,13 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
if (switchdev_port_obj_add(port_dev, &mdb.obj))
kfree(complete_info);
}
- } else if (port_dev && type == RTM_DELMDB) {
+ } else if (p && port_dev && type == RTM_DELMDB) {
switchdev_port_obj_del(port_dev, &mdb.obj);
}
+ if (!p)
+ br_mdb_switchdev_host(dev, entry, type);
+
skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
if (!skb)
goto errout;
@@ -352,7 +396,10 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
struct br_mdb_entry entry;
memset(&entry, 0, sizeof(entry));
- entry.ifindex = port->dev->ifindex;
+ if (port)
+ entry.ifindex = port->dev->ifindex;
+ else
+ entry.ifindex = dev->ifindex;
entry.addr.proto = group->proto;
entry.addr.u.ip4 = group->u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
@@ -654,7 +701,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
call_rcu_bh(&p->rcu, br_multicast_free_pg);
err = 0;
- if (!mp->ports && !mp->mglist &&
+ if (!mp->ports && !mp->host_joined &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
break;
@@ -713,9 +760,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
void br_mdb_init(void)
{
- rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL);
- rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL);
- rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL);
+ rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0);
+ rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0);
+ rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0);
}
void br_mdb_uninit(void)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 8dc5c8d69bcd..cb4729539b82 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -239,9 +239,9 @@ static void br_multicast_free_group(struct rcu_head *head)
kfree(mp);
}
-static void br_multicast_group_expired(unsigned long data)
+static void br_multicast_group_expired(struct timer_list *t)
{
- struct net_bridge_mdb_entry *mp = (void *)data;
+ struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer);
struct net_bridge *br = mp->br;
struct net_bridge_mdb_htable *mdb;
@@ -249,7 +249,8 @@ static void br_multicast_group_expired(unsigned long data)
if (!netif_running(br->dev) || timer_pending(&mp->timer))
goto out;
- mp->mglist = false;
+ mp->host_joined = false;
+ br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
if (mp->ports)
goto out;
@@ -292,7 +293,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
p->flags);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
- if (!mp->ports && !mp->mglist &&
+ if (!mp->ports && !mp->host_joined &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
@@ -302,9 +303,9 @@ static void br_multicast_del_pg(struct net_bridge *br,
WARN_ON(1);
}
-static void br_multicast_port_group_expired(unsigned long data)
+static void br_multicast_port_group_expired(struct timer_list *t)
{
- struct net_bridge_port_group *pg = (void *)data;
+ struct net_bridge_port_group *pg = from_timer(pg, t, timer);
struct net_bridge *br = pg->port->br;
spin_lock(&br->multicast_lock);
@@ -701,8 +702,7 @@ rehash:
mp->br = br;
mp->addr = *group;
- setup_timer(&mp->timer, br_multicast_group_expired,
- (unsigned long)mp);
+ timer_setup(&mp->timer, br_multicast_group_expired, 0);
hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]);
mdb->size++;
@@ -729,8 +729,7 @@ struct net_bridge_port_group *br_multicast_new_port_group(
p->flags = flags;
rcu_assign_pointer(p->next, next);
hlist_add_head(&p->mglist, &port->mglist);
- setup_timer(&p->timer, br_multicast_port_group_expired,
- (unsigned long)p);
+ timer_setup(&p->timer, br_multicast_port_group_expired, 0);
if (src)
memcpy(p->eth_addr, src, ETH_ALEN);
@@ -775,7 +774,10 @@ static int br_multicast_add_group(struct net_bridge *br,
goto err;
if (!port) {
- mp->mglist = true;
+ if (!mp->host_joined) {
+ mp->host_joined = true;
+ br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0);
+ }
mod_timer(&mp->timer, now + br->multicast_membership_interval);
goto out;
}
@@ -843,9 +845,10 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
}
#endif
-static void br_multicast_router_expired(unsigned long data)
+static void br_multicast_router_expired(struct timer_list *t)
{
- struct net_bridge_port *port = (void *)data;
+ struct net_bridge_port *port =
+ from_timer(port, t, multicast_router_timer);
struct net_bridge *br = port->br;
spin_lock(&br->multicast_lock);
@@ -859,8 +862,32 @@ out:
spin_unlock(&br->multicast_lock);
}
-static void br_multicast_local_router_expired(unsigned long data)
+static void br_mc_router_state_change(struct net_bridge *p,
+ bool is_mc_router)
+{
+ struct switchdev_attr attr = {
+ .orig_dev = p->dev,
+ .id = SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
+ .flags = SWITCHDEV_F_DEFER,
+ .u.mrouter = is_mc_router,
+ };
+
+ switchdev_port_attr_set(p->dev, &attr);
+}
+
+static void br_multicast_local_router_expired(struct timer_list *t)
{
+ struct net_bridge *br = from_timer(br, t, multicast_router_timer);
+
+ spin_lock(&br->multicast_lock);
+ if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
+ br->multicast_router == MDB_RTR_TYPE_PERM ||
+ timer_pending(&br->multicast_router_timer))
+ goto out;
+
+ br_mc_router_state_change(br, false);
+out:
+ spin_unlock(&br->multicast_lock);
}
static void br_multicast_querier_expired(struct net_bridge *br,
@@ -876,17 +903,17 @@ out:
spin_unlock(&br->multicast_lock);
}
-static void br_ip4_multicast_querier_expired(unsigned long data)
+static void br_ip4_multicast_querier_expired(struct timer_list *t)
{
- struct net_bridge *br = (void *)data;
+ struct net_bridge *br = from_timer(br, t, ip4_other_query.timer);
br_multicast_querier_expired(br, &br->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_querier_expired(unsigned long data)
+static void br_ip6_multicast_querier_expired(struct timer_list *t)
{
- struct net_bridge *br = (void *)data;
+ struct net_bridge *br = from_timer(br, t, ip6_other_query.timer);
br_multicast_querier_expired(br, &br->ip6_own_query);
}
@@ -987,17 +1014,17 @@ out:
spin_unlock(&br->multicast_lock);
}
-static void br_ip4_multicast_port_query_expired(unsigned long data)
+static void br_ip4_multicast_port_query_expired(struct timer_list *t)
{
- struct net_bridge_port *port = (void *)data;
+ struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer);
br_multicast_port_query_expired(port, &port->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_port_query_expired(unsigned long data)
+static void br_ip6_multicast_port_query_expired(struct timer_list *t)
{
- struct net_bridge_port *port = (void *)data;
+ struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer);
br_multicast_port_query_expired(port, &port->ip6_own_query);
}
@@ -1019,13 +1046,13 @@ int br_multicast_add_port(struct net_bridge_port *port)
{
port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
- setup_timer(&port->multicast_router_timer, br_multicast_router_expired,
- (unsigned long)port);
- setup_timer(&port->ip4_own_query.timer,
- br_ip4_multicast_port_query_expired, (unsigned long)port);
+ timer_setup(&port->multicast_router_timer,
+ br_multicast_router_expired, 0);
+ timer_setup(&port->ip4_own_query.timer,
+ br_ip4_multicast_port_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
- setup_timer(&port->ip6_own_query.timer,
- br_ip6_multicast_port_query_expired, (unsigned long)port);
+ timer_setup(&port->ip6_own_query.timer,
+ br_ip6_multicast_port_query_expired, 0);
#endif
br_mc_disabled_update(port->dev, port->br->multicast_disabled);
@@ -1364,9 +1391,12 @@ static void br_multicast_mark_router(struct net_bridge *br,
unsigned long now = jiffies;
if (!port) {
- if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY)
+ if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
+ if (!timer_pending(&br->multicast_router_timer))
+ br_mc_router_state_change(br, true);
mod_timer(&br->multicast_router_timer,
now + br->multicast_querier_interval);
+ }
return;
}
@@ -1451,7 +1481,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
max_delay *= br->multicast_last_member_count;
- if (mp->mglist &&
+ if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1535,7 +1565,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
goto out;
max_delay *= br->multicast_last_member_count;
- if (mp->mglist &&
+ if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1596,7 +1626,7 @@ br_multicast_leave_group(struct net_bridge *br,
br_mdb_notify(br->dev, port, group, RTM_DELMDB,
p->flags);
- if (!mp->ports && !mp->mglist &&
+ if (!mp->ports && !mp->host_joined &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
}
@@ -1636,7 +1666,7 @@ br_multicast_leave_group(struct net_bridge *br,
br->multicast_last_member_interval;
if (!port) {
- if (mp->mglist &&
+ if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, time) :
try_to_del_timer_sync(&mp->timer) >= 0)) {
@@ -1906,17 +1936,17 @@ static void br_multicast_query_expired(struct net_bridge *br,
spin_unlock(&br->multicast_lock);
}
-static void br_ip4_multicast_query_expired(unsigned long data)
+static void br_ip4_multicast_query_expired(struct timer_list *t)
{
- struct net_bridge *br = (void *)data;
+ struct net_bridge *br = from_timer(br, t, ip4_own_query.timer);
br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
}
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_query_expired(unsigned long data)
+static void br_ip6_multicast_query_expired(struct timer_list *t)
{
- struct net_bridge *br = (void *)data;
+ struct net_bridge *br = from_timer(br, t, ip6_own_query.timer);
br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
}
@@ -1951,17 +1981,17 @@ void br_multicast_init(struct net_bridge *br)
br->has_ipv6_addr = 1;
spin_lock_init(&br->multicast_lock);
- setup_timer(&br->multicast_router_timer,
+ timer_setup(&br->multicast_router_timer,
br_multicast_local_router_expired, 0);
- setup_timer(&br->ip4_other_query.timer,
- br_ip4_multicast_querier_expired, (unsigned long)br);
- setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired,
- (unsigned long)br);
+ timer_setup(&br->ip4_other_query.timer,
+ br_ip4_multicast_querier_expired, 0);
+ timer_setup(&br->ip4_own_query.timer,
+ br_ip4_multicast_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
- setup_timer(&br->ip6_other_query.timer,
- br_ip6_multicast_querier_expired, (unsigned long)br);
- setup_timer(&br->ip6_own_query.timer, br_ip6_multicast_query_expired,
- (unsigned long)br);
+ timer_setup(&br->ip6_other_query.timer,
+ br_ip6_multicast_querier_expired, 0);
+ timer_setup(&br->ip6_own_query.timer,
+ br_ip6_multicast_query_expired, 0);
#endif
}
@@ -2042,9 +2072,14 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
switch (val) {
case MDB_RTR_TYPE_DISABLED:
case MDB_RTR_TYPE_PERM:
+ br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM);
del_timer(&br->multicast_router_timer);
- /* fall through */
+ br->multicast_router = val;
+ err = 0;
+ break;
case MDB_RTR_TYPE_TEMP_QUERY:
+ if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
+ br_mc_router_state_change(br, false);
br->multicast_router = val;
err = 0;
break;
@@ -2184,6 +2219,18 @@ bool br_multicast_enabled(const struct net_device *dev)
}
EXPORT_SYMBOL_GPL(br_multicast_enabled);
+bool br_multicast_router(const struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ bool is_router;
+
+ spin_lock_bh(&br->multicast_lock);
+ is_router = br_multicast_is_router(br);
+ spin_unlock_bh(&br->multicast_lock);
+ return is_router;
+}
+EXPORT_SYMBOL_GPL(br_multicast_router);
+
int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
{
unsigned long max_delay;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2261e5194c82..c2eea1b8737a 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -887,7 +887,7 @@ EXPORT_SYMBOL_GPL(br_netfilter_enable);
/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
* br_dev_queue_push_xmit is called afterwards */
-static struct nf_hook_ops br_nf_ops[] __read_mostly = {
+static const struct nf_hook_ops br_nf_ops[] = {
{
.hook = br_nf_pre_routing,
.pf = NFPROTO_BRIDGE,
@@ -985,22 +985,25 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
int (*okfn)(struct net *, struct sock *,
struct sk_buff *))
{
- struct nf_hook_entry *elem;
+ const struct nf_hook_entries *e;
struct nf_hook_state state;
+ struct nf_hook_ops **ops;
+ unsigned int i;
int ret;
- for (elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
- elem && nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF;
- elem = rcu_dereference(elem->next))
- ;
-
- if (!elem)
+ e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
+ if (!e)
return okfn(net, sk, skb);
+ ops = nf_hook_entries_get_hook_ops(e);
+ for (i = 0; i < e->num_hook_entries &&
+ ops[i]->priority <= NF_BR_PRI_BRNF; i++)
+ ;
+
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
sk, net, okfn);
- ret = nf_hook_slow(skb, &state, elem);
+ ret = nf_hook_slow(skb, &state, e, i);
if (ret == 1)
ret = okfn(net, sk, skb);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 3bc890716c89..d0ef0a8e8831 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -138,6 +138,7 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(1) /* IFLA_BRPORT_PROXYARP */
+ nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */
+ nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */
+ + nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
@@ -152,6 +153,7 @@ static inline size_t br_port_info_size(void)
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */
#endif
+ + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */
+ 0;
}
@@ -208,7 +210,10 @@ static int br_port_fill_attrs(struct sk_buff *skb,
p->topology_change_ack) ||
nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) ||
nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags &
- BR_VLAN_TUNNEL)))
+ BR_VLAN_TUNNEL)) ||
+ nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask) ||
+ nla_put_u8(skb, IFLA_BRPORT_NEIGH_SUPPRESS,
+ !!(p->flags & BR_NEIGH_SUPPRESS)))
return -EMSGSIZE;
timerval = br_timer_value(&p->message_age_timer);
@@ -356,14 +361,14 @@ nla_put_failure:
* Contains port and master info as well as carrier and bridge state.
*/
static int br_fill_ifinfo(struct sk_buff *skb,
- struct net_bridge_port *port,
+ const struct net_bridge_port *port,
u32 pid, u32 seq, int event, unsigned int flags,
u32 filter_mask, const struct net_device *dev)
{
+ u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
struct net_bridge *br;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
- u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
if (port)
br = port->br;
@@ -449,28 +454,36 @@ nla_put_failure:
return -EMSGSIZE;
}
-/*
- * Notify listeners of a change in port information
- */
-void br_ifinfo_notify(int event, struct net_bridge_port *port)
+/* Notify listeners of a change in bridge or port information */
+void br_ifinfo_notify(int event, const struct net_bridge *br,
+ const struct net_bridge_port *port)
{
- struct net *net;
+ u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED;
+ struct net_device *dev;
struct sk_buff *skb;
int err = -ENOBUFS;
- u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED;
+ struct net *net;
+ u16 port_no = 0;
- if (!port)
+ if (WARN_ON(!port && !br))
return;
- net = dev_net(port->dev);
- br_debug(port->br, "port %u(%s) event %d\n",
- (unsigned int)port->port_no, port->dev->name, event);
+ if (port) {
+ dev = port->dev;
+ br = port->br;
+ port_no = port->port_no;
+ } else {
+ dev = br->dev;
+ }
- skb = nlmsg_new(br_nlmsg_size(port->dev, filter), GFP_ATOMIC);
+ net = dev_net(dev);
+ br_debug(br, "port %u(%s) event %d\n", port_no, dev->name, event);
+
+ skb = nlmsg_new(br_nlmsg_size(dev, filter), GFP_ATOMIC);
if (skb == NULL)
goto errout;
- err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, port->dev);
+ err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, dev);
if (err < 0) {
/* -EMSGSIZE implies BUG in br_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -483,7 +496,6 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_LINK, err);
}
-
/*
* Dump information about all ports, in response to GETLINK
*/
@@ -501,8 +513,9 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
}
static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
- int cmd, struct bridge_vlan_info *vinfo)
+ int cmd, struct bridge_vlan_info *vinfo, bool *changed)
{
+ bool curr_change;
int err = 0;
switch (cmd) {
@@ -511,22 +524,27 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
/* if the MASTER flag is set this will act on the global
* per-VLAN entry as well
*/
- err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
- if (err)
- break;
+ err = nbp_vlan_add(p, vinfo->vid, vinfo->flags,
+ &curr_change);
} else {
vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY;
- err = br_vlan_add(br, vinfo->vid, vinfo->flags);
+ err = br_vlan_add(br, vinfo->vid, vinfo->flags,
+ &curr_change);
}
+ if (curr_change)
+ *changed = true;
break;
case RTM_DELLINK:
if (p) {
- nbp_vlan_delete(p, vinfo->vid);
- if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
- br_vlan_delete(p->br, vinfo->vid);
- } else {
- br_vlan_delete(br, vinfo->vid);
+ if (!nbp_vlan_delete(p, vinfo->vid))
+ *changed = true;
+
+ if ((vinfo->flags & BRIDGE_VLAN_INFO_MASTER) &&
+ !br_vlan_delete(p->br, vinfo->vid))
+ *changed = true;
+ } else if (!br_vlan_delete(br, vinfo->vid)) {
+ *changed = true;
}
break;
}
@@ -537,7 +555,8 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
static int br_process_vlan_info(struct net_bridge *br,
struct net_bridge_port *p, int cmd,
struct bridge_vlan_info *vinfo_curr,
- struct bridge_vlan_info **vinfo_last)
+ struct bridge_vlan_info **vinfo_last,
+ bool *changed)
{
if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK)
return -EINVAL;
@@ -567,22 +586,22 @@ static int br_process_vlan_info(struct net_bridge *br,
sizeof(struct bridge_vlan_info));
for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) {
tmp_vinfo.vid = v;
- err = br_vlan_info(br, p, cmd, &tmp_vinfo);
+ err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed);
if (err)
break;
}
*vinfo_last = NULL;
- return 0;
+ return err;
}
- return br_vlan_info(br, p, cmd, vinfo_curr);
+ return br_vlan_info(br, p, cmd, vinfo_curr, changed);
}
static int br_afspec(struct net_bridge *br,
struct net_bridge_port *p,
struct nlattr *af_spec,
- int cmd)
+ int cmd, bool *changed)
{
struct bridge_vlan_info *vinfo_curr = NULL;
struct bridge_vlan_info *vinfo_last = NULL;
@@ -602,7 +621,8 @@ static int br_afspec(struct net_bridge *br,
return err;
err = br_process_vlan_tunnel_info(br, p, cmd,
&tinfo_curr,
- &tinfo_last);
+ &tinfo_last,
+ changed);
if (err)
return err;
break;
@@ -611,7 +631,7 @@ static int br_afspec(struct net_bridge *br,
return -EINVAL;
vinfo_curr = nla_data(attr);
err = br_process_vlan_info(br, p, cmd, vinfo_curr,
- &vinfo_last);
+ &vinfo_last, changed);
if (err)
return err;
break;
@@ -637,6 +657,9 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 },
[IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 },
[IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 },
+ [IFLA_BRPORT_VLAN_TUNNEL] = { .type = NLA_U8 },
+ [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 },
+ [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
};
/* Change the state of the port and notify spanning tree */
@@ -773,6 +796,20 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
return err;
}
#endif
+
+ if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) {
+ u16 fwd_mask = nla_get_u16(tb[IFLA_BRPORT_GROUP_FWD_MASK]);
+
+ if (fwd_mask & BR_GROUPFWD_MACPAUSE)
+ return -EINVAL;
+ p->group_fwd_mask = fwd_mask;
+ }
+
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS,
+ BR_NEIGH_SUPPRESS);
+ if (err)
+ return err;
+
br_port_flags_change(p, old_flags ^ p->flags);
return 0;
}
@@ -780,10 +817,12 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
/* Change state and parameters on port. */
int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
{
+ struct net_bridge *br = (struct net_bridge *)netdev_priv(dev);
+ struct nlattr *tb[IFLA_BRPORT_MAX + 1];
+ struct net_bridge_port *p;
struct nlattr *protinfo;
struct nlattr *afspec;
- struct net_bridge_port *p;
- struct nlattr *tb[IFLA_BRPORT_MAX + 1];
+ bool changed = false;
int err = 0;
protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);
@@ -819,15 +858,14 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
}
if (err)
goto out;
+ changed = true;
}
- if (afspec) {
- err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
- afspec, RTM_SETLINK);
- }
+ if (afspec)
+ err = br_afspec(br, p, afspec, RTM_SETLINK, &changed);
- if (err == 0)
- br_ifinfo_notify(RTM_NEWLINK, p);
+ if (changed)
+ br_ifinfo_notify(RTM_NEWLINK, br, p);
out:
return err;
}
@@ -835,8 +873,10 @@ out:
/* Delete port information */
int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
{
- struct nlattr *afspec;
+ struct net_bridge *br = (struct net_bridge *)netdev_priv(dev);
struct net_bridge_port *p;
+ struct nlattr *afspec;
+ bool changed = false;
int err = 0;
afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
@@ -848,13 +888,12 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
if (!p && !(dev->priv_flags & IFF_EBRIDGE))
return -EINVAL;
- err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
- afspec, RTM_DELLINK);
- if (err == 0)
+ err = br_afspec(br, p, afspec, RTM_DELLINK, &changed);
+ if (changed)
/* Send RTM_NEWLINK because userspace
* expects RTM_NEWLINK for vlan dels
*/
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, br, p);
return err;
}
diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index 3712c7f0e00c..da8cb99fd259 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c
@@ -198,7 +198,7 @@ static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX +
};
static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd,
- u16 vid, u32 tun_id)
+ u16 vid, u32 tun_id, bool *changed)
{
int err = 0;
@@ -208,9 +208,12 @@ static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd,
switch (cmd) {
case RTM_SETLINK:
err = nbp_vlan_tunnel_info_add(p, vid, tun_id);
+ if (!err)
+ *changed = true;
break;
case RTM_DELLINK:
- nbp_vlan_tunnel_info_delete(p, vid);
+ if (!nbp_vlan_tunnel_info_delete(p, vid))
+ *changed = true;
break;
}
@@ -254,7 +257,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr,
int br_process_vlan_tunnel_info(struct net_bridge *br,
struct net_bridge_port *p, int cmd,
struct vtunnel_info *tinfo_curr,
- struct vtunnel_info *tinfo_last)
+ struct vtunnel_info *tinfo_last,
+ bool *changed)
{
int err;
@@ -272,7 +276,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
return -EINVAL;
t = tinfo_last->tunid;
for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) {
- err = br_vlan_tunnel_info(p, cmd, v, t);
+ err = br_vlan_tunnel_info(p, cmd, v, t, changed);
if (err)
return err;
t++;
@@ -283,7 +287,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
if (tinfo_last->flags)
return -EINVAL;
err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid,
- tinfo_curr->tunid);
+ tinfo_curr->tunid, changed);
if (err)
return err;
memset(tinfo_last, 0, sizeof(struct vtunnel_info));
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index fd9ee73e0a6d..1312b8d20ec3 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -36,7 +36,14 @@
/* Control of forwarding link local multicast */
#define BR_GROUPFWD_DEFAULT 0
/* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */
-#define BR_GROUPFWD_RESTRICTED 0x0007u
+enum {
+ BR_GROUPFWD_STP = BIT(0),
+ BR_GROUPFWD_MACPAUSE = BIT(1),
+ BR_GROUPFWD_LACP = BIT(2),
+};
+
+#define BR_GROUPFWD_RESTRICTED (BR_GROUPFWD_STP | BR_GROUPFWD_MACPAUSE | \
+ BR_GROUPFWD_LACP)
/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */
#define BR_GROUPFWD_8021AD 0xB801u
@@ -202,7 +209,7 @@ struct net_bridge_mdb_entry
struct rcu_head rcu;
struct timer_list timer;
struct br_ip addr;
- bool mglist;
+ bool host_joined;
};
struct net_bridge_mdb_htable
@@ -268,6 +275,7 @@ struct net_bridge_port {
#ifdef CONFIG_NET_SWITCHDEV
int offload_fwd_mark;
#endif
+ u16 group_fwd_mask;
};
#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
@@ -396,6 +404,7 @@ struct net_bridge {
#ifdef CONFIG_NET_SWITCHDEV
int offload_fwd_mark;
#endif
+ bool neigh_suppress_enabled;
};
struct br_input_skb_cb {
@@ -558,7 +567,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
void br_port_carrier_check(struct net_bridge_port *p);
int br_add_bridge(struct net *net, const char *name);
int br_del_bridge(struct net *net, const char *name);
-int br_add_if(struct net_bridge *br, struct net_device *dev);
+int br_add_if(struct net_bridge *br, struct net_device *dev,
+ struct netlink_ext_ack *extack);
int br_del_if(struct net_bridge *br, struct net_device *dev);
int br_min_mtu(const struct net_bridge *br);
netdev_features_t br_features_recompute(struct net_bridge *br,
@@ -793,7 +803,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
const struct net_bridge_port *port,
struct net_bridge_vlan_group *vg,
struct sk_buff *skb);
-int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
+int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
+ bool *changed);
int br_vlan_delete(struct net_bridge *br, u16 vid);
void br_vlan_flush(struct net_bridge *br);
struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid);
@@ -806,7 +817,8 @@ int br_vlan_set_stats(struct net_bridge *br, unsigned long val);
int br_vlan_init(struct net_bridge *br);
int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid);
-int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
+int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
+ bool *changed);
int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
void nbp_vlan_flush(struct net_bridge_port *port);
int nbp_vlan_init(struct net_bridge_port *port);
@@ -893,8 +905,10 @@ static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
return skb;
}
-static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
+static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
+ bool *changed)
{
+ *changed = false;
return -EOPNOTSUPP;
}
@@ -916,8 +930,10 @@ static inline int br_vlan_init(struct net_bridge *br)
return 0;
}
-static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
+static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
+ bool *changed)
{
+ *changed = false;
return -EOPNOTSUPP;
}
@@ -1055,7 +1071,8 @@ extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr)
extern struct rtnl_link_ops br_link_ops;
int br_netlink_init(void);
void br_netlink_fini(void);
-void br_ifinfo_notify(int event, struct net_bridge_port *port);
+void br_ifinfo_notify(int event, const struct net_bridge *br,
+ const struct net_bridge_port *port);
int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev,
@@ -1091,6 +1108,11 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
unsigned long mask);
void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb,
int type);
+
+static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
+{
+ skb->offload_fwd_mark = 0;
+}
#else
static inline int nbp_switchdev_mark_set(struct net_bridge_port *p)
{
@@ -1119,6 +1141,17 @@ static inline void
br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
{
}
+
+static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
+{
+}
#endif /* CONFIG_NET_SWITCHDEV */
+/* br_arp_nd_proxy.c */
+void br_recalculate_neigh_suppress_enabled(struct net_bridge *br);
+void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
+ u16 vid, struct net_bridge_port *p);
+void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
+ u16 vid, struct net_bridge_port *p, struct nd_msg *msg);
+struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *m);
#endif
diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h
index 4a447a378ab3..a259471bfd78 100644
--- a/net/bridge/br_private_tunnel.h
+++ b/net/bridge/br_private_tunnel.h
@@ -26,7 +26,8 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
struct net_bridge_port *p,
int cmd,
struct vtunnel_info *tinfo_curr,
- struct vtunnel_info *tinfo_last);
+ struct vtunnel_info *tinfo_last,
+ bool *changed);
int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg);
int br_fill_vlan_tunnel_info(struct sk_buff *skb,
struct net_bridge_vlan_group *vg);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 8f56c2d1f1a7..b6941961a876 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -123,7 +123,7 @@ static void br_root_port_block(const struct net_bridge *br,
(unsigned int) p->port_no, p->dev->name);
br_set_state(p, BR_STATE_LISTENING);
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
if (br->forward_delay > 0)
mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
@@ -403,7 +403,7 @@ static void br_make_blocking(struct net_bridge_port *p)
br_topology_change_detection(p->br);
br_set_state(p, BR_STATE_BLOCKING);
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
del_timer(&p->forward_delay_timer);
}
@@ -426,7 +426,7 @@ static void br_make_forwarding(struct net_bridge_port *p)
else
br_set_state(p, BR_STATE_LEARNING);
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
if (br->forward_delay != 0)
mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 89110319ef0f..808e2b914015 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -96,7 +96,7 @@ void br_stp_enable_port(struct net_bridge_port *p)
{
br_init_port(p);
br_port_state_selection(p->br);
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
}
/* called under bridge lock */
@@ -111,7 +111,7 @@ void br_stp_disable_port(struct net_bridge_port *p)
p->topology_change_ack = 0;
p->config_pending = 0;
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
del_timer(&p->message_age_timer);
del_timer(&p->forward_delay_timer);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 60b6fe277a8b..e7739de5f0e1 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -31,9 +31,9 @@ static int br_is_designated_for_some_port(const struct net_bridge *br)
return 0;
}
-static void br_hello_timer_expired(unsigned long arg)
+static void br_hello_timer_expired(struct timer_list *t)
{
- struct net_bridge *br = (struct net_bridge *)arg;
+ struct net_bridge *br = from_timer(br, t, hello_timer);
br_debug(br, "hello timer expired\n");
spin_lock(&br->lock);
@@ -47,9 +47,9 @@ static void br_hello_timer_expired(unsigned long arg)
spin_unlock(&br->lock);
}
-static void br_message_age_timer_expired(unsigned long arg)
+static void br_message_age_timer_expired(struct timer_list *t)
{
- struct net_bridge_port *p = (struct net_bridge_port *) arg;
+ struct net_bridge_port *p = from_timer(p, t, message_age_timer);
struct net_bridge *br = p->br;
const bridge_id *id = &p->designated_bridge;
int was_root;
@@ -80,9 +80,9 @@ static void br_message_age_timer_expired(unsigned long arg)
spin_unlock(&br->lock);
}
-static void br_forward_delay_timer_expired(unsigned long arg)
+static void br_forward_delay_timer_expired(struct timer_list *t)
{
- struct net_bridge_port *p = (struct net_bridge_port *) arg;
+ struct net_bridge_port *p = from_timer(p, t, forward_delay_timer);
struct net_bridge *br = p->br;
br_debug(br, "port %u(%s) forward delay timer\n",
@@ -99,14 +99,14 @@ static void br_forward_delay_timer_expired(unsigned long arg)
netif_carrier_on(br->dev);
}
rcu_read_lock();
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
rcu_read_unlock();
spin_unlock(&br->lock);
}
-static void br_tcn_timer_expired(unsigned long arg)
+static void br_tcn_timer_expired(struct timer_list *t)
{
- struct net_bridge *br = (struct net_bridge *) arg;
+ struct net_bridge *br = from_timer(br, t, tcn_timer);
br_debug(br, "tcn timer expired\n");
spin_lock(&br->lock);
@@ -118,9 +118,9 @@ static void br_tcn_timer_expired(unsigned long arg)
spin_unlock(&br->lock);
}
-static void br_topology_change_timer_expired(unsigned long arg)
+static void br_topology_change_timer_expired(struct timer_list *t)
{
- struct net_bridge *br = (struct net_bridge *) arg;
+ struct net_bridge *br = from_timer(br, t, topology_change_timer);
br_debug(br, "topo change timer expired\n");
spin_lock(&br->lock);
@@ -129,9 +129,9 @@ static void br_topology_change_timer_expired(unsigned long arg)
spin_unlock(&br->lock);
}
-static void br_hold_timer_expired(unsigned long arg)
+static void br_hold_timer_expired(struct timer_list *t)
{
- struct net_bridge_port *p = (struct net_bridge_port *) arg;
+ struct net_bridge_port *p = from_timer(p, t, hold_timer);
br_debug(p->br, "port %u(%s) hold timer expired\n",
(unsigned int) p->port_no, p->dev->name);
@@ -144,27 +144,17 @@ static void br_hold_timer_expired(unsigned long arg)
void br_stp_timer_init(struct net_bridge *br)
{
- setup_timer(&br->hello_timer, br_hello_timer_expired,
- (unsigned long) br);
-
- setup_timer(&br->tcn_timer, br_tcn_timer_expired,
- (unsigned long) br);
-
- setup_timer(&br->topology_change_timer,
- br_topology_change_timer_expired,
- (unsigned long) br);
+ timer_setup(&br->hello_timer, br_hello_timer_expired, 0);
+ timer_setup(&br->tcn_timer, br_tcn_timer_expired, 0);
+ timer_setup(&br->topology_change_timer,
+ br_topology_change_timer_expired, 0);
}
void br_stp_port_timer_init(struct net_bridge_port *p)
{
- setup_timer(&p->message_age_timer, br_message_age_timer_expired,
- (unsigned long) p);
-
- setup_timer(&p->forward_delay_timer, br_forward_delay_timer_expired,
- (unsigned long) p);
-
- setup_timer(&p->hold_timer, br_hold_timer_expired,
- (unsigned long) p);
+ timer_setup(&p->message_age_timer, br_message_age_timer_expired, 0);
+ timer_setup(&p->forward_delay_timer, br_forward_delay_timer_expired, 0);
+ timer_setup(&p->hold_timer, br_hold_timer_expired, 0);
}
/* Report ticks left (in USER_HZ) used for API */
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 181a44d0f1da..9700e0f3307b 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/netdevice.h>
@@ -115,7 +116,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
void
br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
{
- if (!fdb->added_by_user)
+ if (!fdb->added_by_user || !fdb->dst)
return;
switch (type) {
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 5d5d413a6cf8..0254c35b2bf0 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -165,6 +165,23 @@ static int store_flush(struct net_bridge_port *p, unsigned long v)
}
static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
+static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf)
+{
+ return sprintf(buf, "%#x\n", p->group_fwd_mask);
+}
+
+static int store_group_fwd_mask(struct net_bridge_port *p,
+ unsigned long v)
+{
+ if (v & BR_GROUPFWD_MACPAUSE)
+ return -EINVAL;
+ p->group_fwd_mask = v;
+
+ return 0;
+}
+static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask,
+ store_group_fwd_mask);
+
BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD);
BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
@@ -174,6 +191,7 @@ BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD);
BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD);
+BRPORT_ATTR_FLAG(neigh_suppress, BR_NEIGH_SUPPRESS);
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -223,6 +241,8 @@ static const struct brport_attribute *brport_attrs[] = {
&brport_attr_proxyarp_wifi,
&brport_attr_multicast_flood,
&brport_attr_broadcast_flood,
+ &brport_attr_group_fwd_mask,
+ &brport_attr_neigh_suppress,
NULL
};
@@ -260,7 +280,7 @@ static ssize_t brport_store(struct kobject *kobj,
ret = brport_attr->store(p, val);
spin_unlock_bh(&p->br->lock);
if (!ret) {
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
ret = count;
}
}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 233a30040c91..51935270c651 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -32,27 +32,34 @@ static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid)
return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params);
}
-static void __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid)
+static bool __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid)
{
if (vg->pvid == vid)
- return;
+ return false;
smp_wmb();
vg->pvid = vid;
+
+ return true;
}
-static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
+static bool __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
{
if (vg->pvid != vid)
- return;
+ return false;
smp_wmb();
vg->pvid = 0;
+
+ return true;
}
-static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
+/* return true if anything changed, false otherwise */
+static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
{
struct net_bridge_vlan_group *vg;
+ u16 old_flags = v->flags;
+ bool ret;
if (br_vlan_is_master(v))
vg = br_vlan_group(v->br);
@@ -60,14 +67,16 @@ static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
vg = nbp_vlan_group(v->port);
if (flags & BRIDGE_VLAN_INFO_PVID)
- __vlan_add_pvid(vg, v->vid);
+ ret = __vlan_add_pvid(vg, v->vid);
else
- __vlan_delete_pvid(vg, v->vid);
+ ret = __vlan_delete_pvid(vg, v->vid);
if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
v->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
else
v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED;
+
+ return ret || !!(old_flags ^ v->flags);
}
static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
@@ -151,8 +160,10 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid
vg = br_vlan_group(br);
masterv = br_vlan_find(vg, vid);
if (!masterv) {
+ bool changed;
+
/* missing global ctx, create it now */
- if (br_vlan_add(br, vid, 0))
+ if (br_vlan_add(br, vid, 0, &changed))
return NULL;
masterv = br_vlan_find(vg, vid);
if (WARN_ON(!masterv))
@@ -232,8 +243,11 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
/* need to work on the master vlan too */
if (flags & BRIDGE_VLAN_INFO_MASTER) {
- err = br_vlan_add(br, v->vid, flags |
- BRIDGE_VLAN_INFO_BRENTRY);
+ bool changed;
+
+ err = br_vlan_add(br, v->vid,
+ flags | BRIDGE_VLAN_INFO_BRENTRY,
+ &changed);
if (err)
goto out_filt;
}
@@ -550,8 +564,9 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
/* Must be protected by RTNL.
* Must be called with vid in range from 1 to 4094 inclusive.
+ * changed must be true only if the vlan was created or updated
*/
-int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
+int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *vlan;
@@ -559,6 +574,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
ASSERT_RTNL();
+ *changed = false;
vg = br_vlan_group(br);
vlan = br_vlan_find(vg, vid);
if (vlan) {
@@ -576,8 +592,11 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
refcount_inc(&vlan->refcnt);
vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
vg->num_vlans++;
+ *changed = true;
}
- __vlan_add_flags(vlan, flags);
+ if (__vlan_add_flags(vlan, flags))
+ *changed = true;
+
return 0;
}
@@ -600,6 +619,8 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
if (ret) {
free_percpu(vlan->stats);
kfree(vlan);
+ } else {
+ *changed = true;
}
return ret;
@@ -824,9 +845,10 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
const struct net_bridge_vlan *pvent;
struct net_bridge_vlan_group *vg;
struct net_bridge_port *p;
+ unsigned long *changed;
+ bool vlchange;
u16 old_pvid;
int err = 0;
- unsigned long *changed;
if (!pvid) {
br_vlan_disable_default_pvid(br);
@@ -850,7 +872,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
err = br_vlan_add(br, pvid,
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED |
- BRIDGE_VLAN_INFO_BRENTRY);
+ BRIDGE_VLAN_INFO_BRENTRY,
+ &vlchange);
if (err)
goto out;
br_vlan_delete(br, old_pvid);
@@ -869,7 +892,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
err = nbp_vlan_add(p, pvid,
BRIDGE_VLAN_INFO_PVID |
- BRIDGE_VLAN_INFO_UNTAGGED);
+ BRIDGE_VLAN_INFO_UNTAGGED,
+ &vlchange);
if (err)
goto err_port;
nbp_vlan_delete(p, old_pvid);
@@ -890,7 +914,8 @@ err_port:
if (old_pvid)
nbp_vlan_add(p, old_pvid,
BRIDGE_VLAN_INFO_PVID |
- BRIDGE_VLAN_INFO_UNTAGGED);
+ BRIDGE_VLAN_INFO_UNTAGGED,
+ &vlchange);
nbp_vlan_delete(p, pvid);
}
@@ -899,7 +924,8 @@ err_port:
br_vlan_add(br, old_pvid,
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED |
- BRIDGE_VLAN_INFO_BRENTRY);
+ BRIDGE_VLAN_INFO_BRENTRY,
+ &vlchange);
br_vlan_delete(br, pvid);
}
goto out;
@@ -931,6 +957,7 @@ int br_vlan_init(struct net_bridge *br)
{
struct net_bridge_vlan_group *vg;
int ret = -ENOMEM;
+ bool changed;
vg = kzalloc(sizeof(*vg), GFP_KERNEL);
if (!vg)
@@ -947,7 +974,7 @@ int br_vlan_init(struct net_bridge *br)
rcu_assign_pointer(br->vlgrp, vg);
ret = br_vlan_add(br, 1,
BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED |
- BRIDGE_VLAN_INFO_BRENTRY);
+ BRIDGE_VLAN_INFO_BRENTRY, &changed);
if (ret)
goto err_vlan_add;
@@ -992,9 +1019,12 @@ int nbp_vlan_init(struct net_bridge_port *p)
INIT_LIST_HEAD(&vg->vlan_list);
rcu_assign_pointer(p->vlgrp, vg);
if (p->br->default_pvid) {
+ bool changed;
+
ret = nbp_vlan_add(p, p->br->default_pvid,
BRIDGE_VLAN_INFO_PVID |
- BRIDGE_VLAN_INFO_UNTAGGED);
+ BRIDGE_VLAN_INFO_UNTAGGED,
+ &changed);
if (ret)
goto err_vlan_add;
}
@@ -1016,8 +1046,10 @@ err_vlan_enabled:
/* Must be protected by RTNL.
* Must be called with vid in range from 1 to 4094 inclusive.
+ * changed must be true only if the vlan was created or updated
*/
-int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
+int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
+ bool *changed)
{
struct switchdev_obj_port_vlan v = {
.obj.orig_dev = port->dev,
@@ -1031,13 +1063,15 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
ASSERT_RTNL();
+ *changed = false;
vlan = br_vlan_find(nbp_vlan_group(port), vid);
if (vlan) {
/* Pass the flags to the hardware bridge */
ret = switchdev_port_obj_add(port->dev, &v.obj);
if (ret && ret != -EOPNOTSUPP)
return ret;
- __vlan_add_flags(vlan, flags);
+ *changed = __vlan_add_flags(vlan, flags);
+
return 0;
}
@@ -1050,6 +1084,8 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
ret = __vlan_add(vlan, flags);
if (ret)
kfree(vlan);
+ else
+ *changed = true;
return ret;
}
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index be4d0cea78ce..2f28e16de6c7 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the netfilter modules for Link Layer filtering on a bridge.
#
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index d06968bdf5ec..2b46c50abce0 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -64,14 +64,14 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (NF_INVF(info, EBT_IP_DPORT,
dst < info->dport[0] ||
dst > info->dport[1]))
- return false;
+ return false;
}
if (info->bitmask & EBT_IP_SPORT) {
u32 src = ntohs(pptr->src);
if (NF_INVF(info, EBT_IP_SPORT,
src < info->sport[0] ||
src > info->sport[1]))
- return false;
+ return false;
}
}
return true;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 4617491be41e..2a5a52a53ec4 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -89,7 +89,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (NF_INVF(info, EBT_IP6_SPORT,
src < info->sport[0] ||
src > info->sport[1]))
- return false;
+ return false;
}
if ((info->bitmask & EBT_IP6_ICMP6) &&
NF_INVF(info, EBT_IP6_ICMP6,
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 2585b100ebbb..276b60262981 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -65,8 +65,8 @@ static int ebt_broute(struct sk_buff *skb)
static int __net_init broute_net_init(struct net *net)
{
- net->xt.broute_table = ebt_register_table(net, &broute_table, NULL);
- return PTR_ERR_OR_ZERO(net->xt.broute_table);
+ return ebt_register_table(net, &broute_table, NULL,
+ &net->xt.broute_table);
}
static void __net_exit broute_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index f22ef7c21913..c41da5fac84f 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -70,7 +70,7 @@ ebt_out_hook(void *priv, struct sk_buff *skb,
return ebt_do_table(skb, state, state->net->xt.frame_filter);
}
-static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
+static const struct nf_hook_ops ebt_ops_filter[] = {
{
.hook = ebt_in_hook,
.pf = NFPROTO_BRIDGE,
@@ -93,8 +93,8 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
static int __net_init frame_filter_net_init(struct net *net)
{
- net->xt.frame_filter = ebt_register_table(net, &frame_filter, ebt_ops_filter);
- return PTR_ERR_OR_ZERO(net->xt.frame_filter);
+ return ebt_register_table(net, &frame_filter, ebt_ops_filter,
+ &net->xt.frame_filter);
}
static void __net_exit frame_filter_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 2f7a4f314406..08df7406ecb3 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -48,7 +48,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
return 0;
}
-static struct ebt_table frame_nat = {
+static const struct ebt_table frame_nat = {
.name = "nat",
.table = &initial_table,
.valid_hooks = NAT_VALID_HOOKS,
@@ -70,7 +70,7 @@ ebt_nat_out(void *priv, struct sk_buff *skb,
return ebt_do_table(skb, state, state->net->xt.frame_nat);
}
-static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
+static const struct nf_hook_ops ebt_ops_nat[] = {
{
.hook = ebt_nat_out,
.pf = NFPROTO_BRIDGE,
@@ -93,8 +93,8 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
static int __net_init frame_nat_net_init(struct net *net)
{
- net->xt.frame_nat = ebt_register_table(net, &frame_nat, ebt_ops_nat);
- return PTR_ERR_OR_ZERO(net->xt.frame_nat);
+ return ebt_register_table(net, &frame_nat, ebt_ops_nat,
+ &net->xt.frame_nat);
}
static void __net_exit frame_nat_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 9c6e619f452b..37817d25b63d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -252,13 +252,11 @@ unsigned int ebt_do_table(struct sk_buff *skb,
}
if (verdict == EBT_RETURN) {
letsreturn:
-#ifdef CONFIG_NETFILTER_DEBUG
- if (sp == 0) {
- BUGPRINT("RETURN on base chain");
+ if (WARN(sp == 0, "RETURN on base chain")) {
/* act like this is EBT_CONTINUE */
goto letscontinue;
}
-#endif
+
sp--;
/* put all the local variables right */
i = cs[sp].n;
@@ -271,26 +269,24 @@ letsreturn:
}
if (verdict == EBT_CONTINUE)
goto letscontinue;
-#ifdef CONFIG_NETFILTER_DEBUG
- if (verdict < 0) {
- BUGPRINT("bogus standard verdict\n");
+
+ if (WARN(verdict < 0, "bogus standard verdict\n")) {
read_unlock_bh(&table->lock);
return NF_DROP;
}
-#endif
+
/* jump to a udc */
cs[sp].n = i + 1;
cs[sp].chaininfo = chaininfo;
cs[sp].e = ebt_next_entry(point);
i = 0;
chaininfo = (struct ebt_entries *) (base + verdict);
-#ifdef CONFIG_NETFILTER_DEBUG
- if (chaininfo->distinguisher) {
- BUGPRINT("jump to non-chain\n");
+
+ if (WARN(chaininfo->distinguisher, "jump to non-chain\n")) {
read_unlock_bh(&table->lock);
return NF_DROP;
}
-#endif
+
nentries = chaininfo->nentries;
point = (struct ebt_entry *)chaininfo->data;
counter_base = cb_base + chaininfo->counter_offset;
@@ -1069,15 +1065,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
#ifdef CONFIG_AUDIT
if (audit_enabled) {
- struct audit_buffer *ab;
-
- ab = audit_log_start(current->audit_context, GFP_KERNEL,
- AUDIT_NETFILTER_CFG);
- if (ab) {
- audit_log_format(ab, "table=%s family=%u entries=%u",
- repl->name, AF_BRIDGE, repl->nentries);
- audit_log_end(ab);
- }
+ audit_log(current->audit_context, GFP_KERNEL,
+ AUDIT_NETFILTER_CFG,
+ "table=%s family=%u entries=%u",
+ repl->name, AF_BRIDGE, repl->nentries);
}
#endif
return ret;
@@ -1178,9 +1169,8 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
kfree(table);
}
-struct ebt_table *
-ebt_register_table(struct net *net, const struct ebt_table *input_table,
- const struct nf_hook_ops *ops)
+int ebt_register_table(struct net *net, const struct ebt_table *input_table,
+ const struct nf_hook_ops *ops, struct ebt_table **res)
{
struct ebt_table_info *newinfo;
struct ebt_table *t, *table;
@@ -1192,7 +1182,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table,
repl->entries == NULL || repl->entries_size == 0 ||
repl->counters != NULL || input_table->private != NULL) {
BUGPRINT("Bad table data for ebt_register_table!!!\n");
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
/* Don't add one table to multiple lists. */
@@ -1261,16 +1251,18 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table,
list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]);
mutex_unlock(&ebt_mutex);
+ WRITE_ONCE(*res, table);
+
if (!ops)
- return table;
+ return 0;
ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
if (ret) {
__ebt_unregister_table(net, table);
- return ERR_PTR(ret);
+ *res = NULL;
}
- return table;
+ return ret;
free_unlock:
mutex_unlock(&ebt_mutex);
free_chainstack:
@@ -1285,7 +1277,7 @@ free_newinfo:
free_table:
kfree(table);
out:
- return ERR_PTR(ret);
+ return ret;
}
void ebt_unregister_table(struct net *net, struct ebt_table *table,
@@ -2120,9 +2112,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
for (i = 0, j = 1 ; j < 4 ; j++, i++) {
struct compat_ebt_entry_mwt *match32;
unsigned int size;
- char *buf = buf_start;
+ char *buf = buf_start + offsets[i];
- buf = buf_start + offsets[i];
if (offsets[i] > offsets[j])
return -EINVAL;
diff --git a/net/caif/Makefile b/net/caif/Makefile
index cc2b51154d03..4f6c0517cdfb 100644
--- a/net/caif/Makefile
+++ b/net/caif/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
ccflags-$(CONFIG_CAIF_DEBUG) := -DDEBUG
caif-y := caif_dev.o \
diff --git a/net/can/Makefile b/net/can/Makefile
index 10936754e3f2..1242bbbfe57f 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux Controller Area Network core.
#
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 88edac0f3e36..003b2d6d655f 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -78,7 +78,7 @@ MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
static struct kmem_cache *rcv_cache __read_mostly;
/* table of registered CAN protocols */
-static const struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
+static const struct can_proto __rcu *proto_tab[CAN_NPROTO] __read_mostly;
static DEFINE_MUTEX(proto_tab_lock);
static atomic_t skbcounter = ATOMIC_INIT(0);
@@ -788,7 +788,7 @@ int can_proto_register(const struct can_proto *cp)
mutex_lock(&proto_tab_lock);
- if (proto_tab[proto]) {
+ if (rcu_access_pointer(proto_tab[proto])) {
pr_err("can: protocol %d already registered\n", proto);
err = -EBUSY;
} else
@@ -812,7 +812,7 @@ void can_proto_unregister(const struct can_proto *cp)
int proto = cp->protocol;
mutex_lock(&proto_tab_lock);
- BUG_ON(proto_tab[proto] != cp);
+ BUG_ON(rcu_access_pointer(proto_tab[proto]) != cp);
RCU_INIT_POINTER(proto_tab[proto], NULL);
mutex_unlock(&proto_tab_lock);
@@ -875,15 +875,20 @@ static int can_pernet_init(struct net *net)
spin_lock_init(&net->can.can_rcvlists_lock);
net->can.can_rx_alldev_list =
kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL);
-
+ if (!net->can.can_rx_alldev_list)
+ goto out;
net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL);
+ if (!net->can.can_stats)
+ goto out_free_alldev_list;
net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL);
+ if (!net->can.can_pstats)
+ goto out_free_can_stats;
if (IS_ENABLED(CONFIG_PROC_FS)) {
/* the statistics are updated every second (timer triggered) */
if (stats_timer) {
- setup_timer(&net->can.can_stattimer, can_stat_update,
- (unsigned long)net);
+ timer_setup(&net->can.can_stattimer, can_stat_update,
+ 0);
mod_timer(&net->can.can_stattimer,
round_jiffies(jiffies + HZ));
}
@@ -892,6 +897,13 @@ static int can_pernet_init(struct net *net)
}
return 0;
+
+ out_free_can_stats:
+ kfree(net->can.can_stats);
+ out_free_alldev_list:
+ kfree(net->can.can_rx_alldev_list);
+ out:
+ return -ENOMEM;
}
static void can_pernet_exit(struct net *net)
diff --git a/net/can/af_can.h b/net/can/af_can.h
index d0ef45bb2a72..eca6463c6213 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -113,6 +113,6 @@ struct s_pstats {
/* function prototypes for the CAN networklayer procfs (proc.c) */
void can_init_proc(struct net *net);
void can_remove_proc(struct net *net);
-void can_stat_update(unsigned long data);
+void can_stat_update(struct timer_list *t);
#endif /* AF_CAN_H */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 47a8748d953a..13690334efa3 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1493,13 +1493,14 @@ static int bcm_init(struct sock *sk)
static int bcm_release(struct socket *sock)
{
struct sock *sk = sock->sk;
- struct net *net = sock_net(sk);
+ struct net *net;
struct bcm_sock *bo;
struct bcm_op *op, *next;
- if (sk == NULL)
+ if (!sk)
return 0;
+ net = sock_net(sk);
bo = bcm_sk(sk);
/* remove bcm_ops, timer, rx_unregister(), etc. */
diff --git a/net/can/gw.c b/net/can/gw.c
index 29748d844c3f..73a02af4b5d7 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1031,15 +1031,15 @@ static __init int cgw_module_init(void)
notifier.notifier_call = cgw_notifier;
register_netdevice_notifier(&notifier);
- if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, NULL)) {
+ if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, 0)) {
unregister_netdevice_notifier(&notifier);
kmem_cache_destroy(cgw_cache);
return -ENOBUFS;
}
/* Only the first call to __rtnl_register can fail */
- __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, NULL);
- __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, NULL);
+ __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, 0);
+ __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, 0);
return 0;
}
diff --git a/net/can/proc.c b/net/can/proc.c
index 83045f00c63c..d979b3dc49a6 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -115,9 +115,9 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
return rate;
}
-void can_stat_update(unsigned long data)
+void can_stat_update(struct timer_list *t)
{
- struct net *net = (struct net *)data;
+ struct net *net = from_timer(net, t, can.can_stattimer);
struct s_stats *can_stats = net->can.can_stats;
unsigned long j = jiffies; /* snapshot */
@@ -221,7 +221,7 @@ static int can_stats_proc_show(struct seq_file *m, void *v)
seq_putc(m, '\n');
- if (net->can.can_stattimer.function == can_stat_update) {
+ if (net->can.can_stattimer.function == (TIMER_FUNC_TYPE)can_stat_update) {
seq_printf(m, " %8ld %% total match ratio (RXMR)\n",
can_stats->total_rx_match_ratio);
@@ -291,7 +291,7 @@ static int can_reset_stats_proc_show(struct seq_file *m, void *v)
user_reset = 1;
- if (net->can.can_stattimer.function == can_stat_update) {
+ if (net->can.can_stattimer.function == (TIMER_FUNC_TYPE)can_stat_update) {
seq_printf(m, "Scheduled statistic reset #%ld.\n",
can_pstats->stats_reset + 1);
} else {
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index 6a5180903e7b..b4bded4b5396 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for CEPH filesystem.
#
diff --git a/net/ceph/armor.c b/net/ceph/armor.c
index 1fc1ee11dfa2..0db8065928df 100644
--- a/net/ceph/armor.c
+++ b/net/ceph/armor.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/errno.h>
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index 48bb8d95195b..dbde2b3c3c15 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index df45e467c81f..41d2a0c72236 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h
index 62021535ae4a..860ed9875791 100644
--- a/net/ceph/auth_none.h
+++ b/net/ceph/auth_none.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _FS_CEPH_AUTH_NONE_H
#define _FS_CEPH_AUTH_NONE_H
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 8757fb87dab8..2f4a1baf5f52 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 48e9ad41bd2a..454cb54568af 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _FS_CEPH_AUTH_X_H
#define _FS_CEPH_AUTH_X_H
diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h
index 671d30576c4f..32c13d763b9a 100644
--- a/net/ceph/auth_x_protocol.h
+++ b/net/ceph/auth_x_protocol.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __FS_CEPH_AUTH_X_PROTOCOL
#define __FS_CEPH_AUTH_X_PROTOCOL
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index add5f921a0ff..5622763ad402 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c
index dcbe67ff3e2b..756a2dc10d27 100644
--- a/net/ceph/ceph_fs.c
+++ b/net/ceph/ceph_fs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Some non-inline ceph helpers
*/
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c
index 19b7d8aa915c..10e01494993c 100644
--- a/net/ceph/ceph_strings.c
+++ b/net/ceph/ceph_strings.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Ceph string constants
*/
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index 08ada893f01e..8d2032b2f225 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/types.h>
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 4b428f46a8ca..3d70244bc1b6 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#ifdef __KERNEL__
# include <linux/slab.h>
# include <linux/crush/crush.h>
diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c
index ed123af49eba..e5cc603cdb17 100644
--- a/net/ceph/crush/hash.c
+++ b/net/ceph/crush/hash.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#ifdef __KERNEL__
# include <linux/crush/hash.h>
#else
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 46008d5ac504..489610ac1cdd 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index 58d83aa7740f..bb45c7d43739 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _FS_CEPH_CRYPTO_H
#define _FS_CEPH_CRYPTO_H
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index fa5233e0d01c..1eef6806aa1a 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/device.h>
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index a67298c7e0cd..ad93342c90d7 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/crc32c.h>
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 875675765531..9ae1bab8c05d 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
@@ -676,7 +677,8 @@ bad:
/*
* Do a synchronous statfs().
*/
-int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
+int ceph_monc_do_statfs(struct ceph_mon_client *monc, u64 data_pool,
+ struct ceph_statfs *buf)
{
struct ceph_mon_generic_request *req;
struct ceph_mon_statfs *h;
@@ -696,6 +698,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
goto out;
req->u.st = buf;
+ req->request->hdr.version = cpu_to_le16(2);
mutex_lock(&monc->mutex);
register_generic_request(req);
@@ -705,6 +708,8 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
h->monhdr.session_mon = cpu_to_le16(-1);
h->monhdr.session_mon_tid = 0;
h->fsid = monc->monmap->fsid;
+ h->contains_data_pool = (data_pool != CEPH_NOPOOL);
+ h->data_pool = cpu_to_le64(data_pool);
send_generic_request(monc, req);
mutex_unlock(&monc->mutex);
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
index aaed59a47b1d..72571535883f 100644
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/err.h>
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index dcfbdd74dfd1..2814dba5902d 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
@@ -863,8 +864,6 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
dst->cls.method_len = src->cls.method_len;
dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
break;
- case CEPH_OSD_OP_STARTSYNC:
- break;
case CEPH_OSD_OP_WATCH:
dst->watch.cookie = cpu_to_le64(src->watch.cookie);
dst->watch.ver = cpu_to_le64(0);
@@ -916,9 +915,6 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
* if the file was recently truncated, we include information about its
* old and new size so that the object can be updated appropriately. (we
* avoid synchronously deleting truncated objects because it's slow.)
- *
- * if @do_sync, include a 'startsync' command so that the osd will flush
- * data quickly.
*/
struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
struct ceph_file_layout *layout,
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index f358d0bfa76b..0da27c66349a 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
@@ -2445,19 +2446,34 @@ static void apply_upmap(struct ceph_osdmap *osdmap,
pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
if (pg) {
- for (i = 0; i < raw->size; i++) {
- for (j = 0; j < pg->pg_upmap_items.len; j++) {
- int from = pg->pg_upmap_items.from_to[j][0];
- int to = pg->pg_upmap_items.from_to[j][1];
-
- if (from == raw->osds[i]) {
- if (!(to != CRUSH_ITEM_NONE &&
- to < osdmap->max_osd &&
- osdmap->osd_weight[to] == 0))
- raw->osds[i] = to;
+ /*
+ * Note: this approach does not allow a bidirectional swap,
+ * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
+ */
+ for (i = 0; i < pg->pg_upmap_items.len; i++) {
+ int from = pg->pg_upmap_items.from_to[i][0];
+ int to = pg->pg_upmap_items.from_to[i][1];
+ int pos = -1;
+ bool exists = false;
+
+ /* make sure replacement doesn't already appear */
+ for (j = 0; j < raw->size; j++) {
+ int osd = raw->osds[j];
+
+ if (osd == to) {
+ exists = true;
break;
}
+ /* ignore mapping if target is marked out */
+ if (osd == from && pos < 0 &&
+ !(to != CRUSH_ITEM_NONE &&
+ to < osdmap->max_osd &&
+ osdmap->osd_weight[to] == 0)) {
+ pos = j;
+ }
}
+ if (!exists && pos >= 0)
+ raw->osds[pos] = to;
}
}
}
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index ce09f73be759..2ea0564771d2 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/slab.h>
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 1a7c9a79a53c..a3d0adc828e6 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
@@ -24,9 +25,9 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
return ERR_PTR(-ENOMEM);
while (got < num_pages) {
- rc = get_user_pages_unlocked(
+ rc = get_user_pages_fast(
(unsigned long)data + ((unsigned long)got * PAGE_SIZE),
- num_pages - got, pages + got, write_page ? FOLL_WRITE : 0);
+ num_pages - got, write_page, pages + got);
if (rc < 0)
break;
BUG_ON(rc == 0);
diff --git a/net/ceph/string_table.c b/net/ceph/string_table.c
index 22fb96efcf34..3191d9d160a2 100644
--- a/net/ceph/string_table.c
+++ b/net/ceph/string_table.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/slab.h>
#include <linux/gfp.h>
#include <linux/string.h>
diff --git a/net/compat.c b/net/compat.c
index 6ded6c821d7a..22381719718c 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -185,6 +185,13 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
}
+ /*
+ * check the length of messages copied in is the same as the
+ * what we get from the first loop
+ */
+ if ((char *)kcmsg - (char *)kcmsg_base != kcmlen)
+ goto Einval;
+
/* Ok, looks like we made it. Hook it up and return success. */
kmsg->msg_control = kcmsg_base;
kmsg->msg_controllen = kcmlen;
diff --git a/net/core/Makefile b/net/core/Makefile
index 79f9479e9658..1fd0a9c88b1b 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux networking core.
#
@@ -9,9 +10,9 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
- sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
+ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
+ fib_notifier.o
-obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ee5647bd91b3..522873ed120b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* SUCS NET3:
*
@@ -169,20 +170,26 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
int *peeked, int *off, int *err,
struct sk_buff **last)
{
+ bool peek_at_off = false;
struct sk_buff *skb;
- int _off = *off;
+ int _off = 0;
+
+ if (unlikely(flags & MSG_PEEK && *off >= 0)) {
+ peek_at_off = true;
+ _off = *off;
+ }
*last = queue->prev;
skb_queue_walk(queue, skb) {
if (flags & MSG_PEEK) {
- if (_off >= skb->len && (skb->len || _off ||
- skb->peeked)) {
+ if (peek_at_off && _off >= skb->len &&
+ (_off || skb->peeked)) {
_off -= skb->len;
continue;
}
if (!skb->len) {
skb = skb_set_peeked(skb);
- if (unlikely(IS_ERR(skb))) {
+ if (IS_ERR(skb)) {
*err = PTR_ERR(skb);
return NULL;
}
@@ -356,7 +363,7 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
if (flags & MSG_PEEK) {
err = -ENOENT;
spin_lock_bh(&sk_queue->lock);
- if (skb == skb_peek(sk_queue)) {
+ if (skb->next) {
__skb_unlink(skb, sk_queue);
refcount_dec(&skb->users);
if (destructor)
@@ -573,27 +580,12 @@ fault:
}
EXPORT_SYMBOL(skb_copy_datagram_from_iter);
-/**
- * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
- * @skb: buffer to copy
- * @from: the source to copy from
- *
- * The function will first copy up to headlen, and then pin the userspace
- * pages and build frags through them.
- *
- * Returns 0, -EFAULT or -EMSGSIZE.
- */
-int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+ struct iov_iter *from, size_t length)
{
- int len = iov_iter_count(from);
- int copy = min_t(int, skb_headlen(skb), len);
- int frag = 0;
+ int frag = skb_shinfo(skb)->nr_frags;
- /* copy up to skb headlen */
- if (skb_copy_datagram_from_iter(skb, 0, from, copy))
- return -EFAULT;
-
- while (iov_iter_count(from)) {
+ while (length && iov_iter_count(from)) {
struct page *pages[MAX_SKB_FRAGS];
size_t start;
ssize_t copied;
@@ -603,18 +595,24 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
if (frag == MAX_SKB_FRAGS)
return -EMSGSIZE;
- copied = iov_iter_get_pages(from, pages, ~0U,
+ copied = iov_iter_get_pages(from, pages, length,
MAX_SKB_FRAGS - frag, &start);
if (copied < 0)
return -EFAULT;
iov_iter_advance(from, copied);
+ length -= copied;
truesize = PAGE_ALIGN(copied + start);
skb->data_len += copied;
skb->len += copied;
skb->truesize += truesize;
- refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+ if (sk && sk->sk_type == SOCK_STREAM) {
+ sk->sk_wmem_queued += truesize;
+ sk_mem_charge(sk, truesize);
+ } else {
+ refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+ }
while (copied) {
int size = min_t(int, copied, PAGE_SIZE - start);
skb_fill_page_desc(skb, frag++, pages[n], start, size);
@@ -625,6 +623,28 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
}
return 0;
}
+EXPORT_SYMBOL(__zerocopy_sg_from_iter);
+
+/**
+ * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
+ * @skb: buffer to copy
+ * @from: the source to copy from
+ *
+ * The function will first copy up to headlen, and then pin the userspace
+ * pages and build frags through them.
+ *
+ * Returns 0, -EFAULT or -EMSGSIZE.
+ */
+int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+{
+ int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
+
+ /* copy up to skb headlen */
+ if (skb_copy_datagram_from_iter(skb, 0, from, copy))
+ return -EFAULT;
+
+ return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
+}
EXPORT_SYMBOL(zerocopy_sg_from_iter);
static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
diff --git a/net/core/dev.c b/net/core/dev.c
index ce15a06d5558..8ee29f4f5fa9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -144,6 +144,8 @@
#include <linux/netfilter_ingress.h>
#include <linux/crash_dump.h>
#include <linux/sctp.h>
+#include <net/udp_tunnel.h>
+#include <linux/net_namespace.h>
#include "net-sysfs.h"
@@ -161,7 +163,6 @@ static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
- struct net_device *dev,
struct netdev_notifier_info *info);
static struct napi_struct *napi_by_id(unsigned int napi_id);
@@ -187,6 +188,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id);
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
+static DEFINE_MUTEX(ifalias_mutex);
+
/* protects napi_hash addition/deletion and napi_gen_id */
static DEFINE_SPINLOCK(napi_hash_lock);
@@ -1061,7 +1064,10 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
unsigned long *inuse;
struct net_device *d;
- p = strnchr(name, IFNAMSIZ-1, '%');
+ if (!dev_valid_name(name))
+ return -EINVAL;
+
+ p = strchr(name, '%');
if (p) {
/*
* Verify the string as this thing may have come from
@@ -1092,8 +1098,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
free_page((unsigned long) inuse);
}
- if (buf != name)
- snprintf(buf, IFNAMSIZ, name, i);
+ snprintf(buf, IFNAMSIZ, name, i);
if (!__dev_get_by_name(net, buf))
return i;
@@ -1101,7 +1106,21 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
* when the name is long and there isn't enough space left
* for the digits, or if all bits are used.
*/
- return -ENFILE;
+ return p ? -ENFILE : -EEXIST;
+}
+
+static int dev_alloc_name_ns(struct net *net,
+ struct net_device *dev,
+ const char *name)
+{
+ char buf[IFNAMSIZ];
+ int ret;
+
+ BUG_ON(!net);
+ ret = __dev_alloc_name(net, name, buf);
+ if (ret >= 0)
+ strlcpy(dev->name, buf, IFNAMSIZ);
+ return ret;
}
/**
@@ -1120,50 +1139,16 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
int dev_alloc_name(struct net_device *dev, const char *name)
{
- char buf[IFNAMSIZ];
- struct net *net;
- int ret;
-
- BUG_ON(!dev_net(dev));
- net = dev_net(dev);
- ret = __dev_alloc_name(net, name, buf);
- if (ret >= 0)
- strlcpy(dev->name, buf, IFNAMSIZ);
- return ret;
+ return dev_alloc_name_ns(dev_net(dev), dev, name);
}
EXPORT_SYMBOL(dev_alloc_name);
-static int dev_alloc_name_ns(struct net *net,
- struct net_device *dev,
- const char *name)
-{
- char buf[IFNAMSIZ];
- int ret;
-
- ret = __dev_alloc_name(net, name, buf);
- if (ret >= 0)
- strlcpy(dev->name, buf, IFNAMSIZ);
- return ret;
-}
-
-static int dev_get_valid_name(struct net *net,
- struct net_device *dev,
- const char *name)
+int dev_get_valid_name(struct net *net, struct net_device *dev,
+ const char *name)
{
- BUG_ON(!net);
-
- if (!dev_valid_name(name))
- return -EINVAL;
-
- if (strchr(name, '%'))
- return dev_alloc_name_ns(net, dev, name);
- else if (__dev_get_by_name(net, name))
- return -EEXIST;
- else if (dev->name != name)
- strlcpy(dev->name, name, IFNAMSIZ);
-
- return 0;
+ return dev_alloc_name_ns(net, dev, name);
}
+EXPORT_SYMBOL(dev_get_valid_name);
/**
* dev_change_name - change name of a device
@@ -1264,29 +1249,53 @@ rollback:
*/
int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
{
- char *new_ifalias;
-
- ASSERT_RTNL();
+ struct dev_ifalias *new_alias = NULL;
if (len >= IFALIASZ)
return -EINVAL;
- if (!len) {
- kfree(dev->ifalias);
- dev->ifalias = NULL;
- return 0;
+ if (len) {
+ new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
+ if (!new_alias)
+ return -ENOMEM;
+
+ memcpy(new_alias->ifalias, alias, len);
+ new_alias->ifalias[len] = 0;
}
- new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
- if (!new_ifalias)
- return -ENOMEM;
- dev->ifalias = new_ifalias;
- memcpy(dev->ifalias, alias, len);
- dev->ifalias[len] = 0;
+ mutex_lock(&ifalias_mutex);
+ rcu_swap_protected(dev->ifalias, new_alias,
+ mutex_is_locked(&ifalias_mutex));
+ mutex_unlock(&ifalias_mutex);
+
+ if (new_alias)
+ kfree_rcu(new_alias, rcuhead);
return len;
}
+/**
+ * dev_get_alias - get ifalias of a device
+ * @dev: device
+ * @name: buffer to store name of ifalias
+ * @len: size of buffer
+ *
+ * get ifalias for a device. Caller must make sure dev cannot go
+ * away, e.g. rcu read lock or own a reference count to device.
+ */
+int dev_get_alias(const struct net_device *dev, char *name, size_t len)
+{
+ const struct dev_ifalias *alias;
+ int ret = 0;
+
+ rcu_read_lock();
+ alias = rcu_dereference(dev->ifalias);
+ if (alias)
+ ret = snprintf(name, len, "%s", alias->ifalias);
+ rcu_read_unlock();
+
+ return ret;
+}
/**
* netdev_features_change - device changes features
@@ -1311,10 +1320,11 @@ EXPORT_SYMBOL(netdev_features_change);
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- struct netdev_notifier_change_info change_info;
+ struct netdev_notifier_change_info change_info = {
+ .info.dev = dev,
+ };
- change_info.flags_changed = 0;
- call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGE,
&change_info.info);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
}
@@ -1413,7 +1423,7 @@ int dev_open(struct net_device *dev)
}
EXPORT_SYMBOL(dev_open);
-static int __dev_close_many(struct list_head *head)
+static void __dev_close_many(struct list_head *head)
{
struct net_device *dev;
@@ -1455,23 +1465,18 @@ static int __dev_close_many(struct list_head *head)
dev->flags &= ~IFF_UP;
netpoll_poll_enable(dev);
}
-
- return 0;
}
-static int __dev_close(struct net_device *dev)
+static void __dev_close(struct net_device *dev)
{
- int retval;
LIST_HEAD(single);
list_add(&dev->close_list, &single);
- retval = __dev_close_many(&single);
+ __dev_close_many(&single);
list_del(&single);
-
- return retval;
}
-int dev_close_many(struct list_head *head, bool unlink)
+void dev_close_many(struct list_head *head, bool unlink)
{
struct net_device *dev, *tmp;
@@ -1488,8 +1493,6 @@ int dev_close_many(struct list_head *head, bool unlink)
if (unlink)
list_del_init(&dev->close_list);
}
-
- return 0;
}
EXPORT_SYMBOL(dev_close_many);
@@ -1502,7 +1505,7 @@ EXPORT_SYMBOL(dev_close_many);
* is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
* chain.
*/
-int dev_close(struct net_device *dev)
+void dev_close(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
LIST_HEAD(single);
@@ -1511,7 +1514,6 @@ int dev_close(struct net_device *dev)
dev_close_many(&single, true);
list_del(&single);
}
- return 0;
}
EXPORT_SYMBOL(dev_close);
@@ -1543,9 +1545,10 @@ EXPORT_SYMBOL(dev_disable_lro);
static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
struct net_device *dev)
{
- struct netdev_notifier_info info;
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ };
- netdev_notifier_info_init(&info, dev);
return nb->notifier_call(nb, val, &info);
}
@@ -1670,11 +1673,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
*/
static int call_netdevice_notifiers_info(unsigned long val,
- struct net_device *dev,
struct netdev_notifier_info *info)
{
ASSERT_RTNL();
- netdev_notifier_info_init(info, dev);
return raw_notifier_call_chain(&netdev_chain, val, info);
}
@@ -1689,9 +1690,11 @@ static int call_netdevice_notifiers_info(unsigned long val,
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- struct netdev_notifier_info info;
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ };
- return call_netdevice_notifiers_info(val, dev, &info);
+ return call_netdevice_notifiers_info(val, &info);
}
EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -1860,7 +1863,7 @@ static inline int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev,
struct net_device *orig_dev)
{
- if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+ if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
return -ENOMEM;
refcount_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
@@ -1955,8 +1958,12 @@ again:
goto again;
}
out_unlock:
- if (pt_prev)
- pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
+ if (pt_prev) {
+ if (!skb_orphan_frags_rx(skb2, GFP_ATOMIC))
+ pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
+ else
+ kfree_skb(skb2);
+ }
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
@@ -2015,6 +2022,7 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
return 0;
}
+EXPORT_SYMBOL(netdev_txq_to_tc);
#ifdef CONFIG_XPS
static DEFINE_MUTEX(xps_map_mutex);
@@ -2738,8 +2746,7 @@ EXPORT_SYMBOL(skb_mac_gso_segment);
static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
{
if (tx_path)
- return skb->ip_summed != CHECKSUM_PARTIAL &&
- skb->ip_summed != CHECKSUM_UNNECESSARY;
+ return skb->ip_summed != CHECKSUM_PARTIAL;
return skb->ip_summed == CHECKSUM_NONE;
}
@@ -3249,22 +3256,22 @@ EXPORT_SYMBOL(dev_loopback_xmit);
static struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
- struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
+ struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
struct tcf_result cl_res;
- if (!cl)
+ if (!miniq)
return skb;
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
- qdisc_bstats_cpu_update(cl->q, skb);
+ mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_cpu_drop(cl->q);
+ mini_qdisc_qstats_cpu_drop(miniq);
*ret = NET_XMIT_DROP;
kfree_skb(skb);
return NULL;
@@ -3729,7 +3736,7 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
flow_table = rcu_dereference(rxqueue->rps_flow_table);
if (flow_table && flow_id <= flow_table->mask) {
rflow = &flow_table->flows[flow_id];
- cpu = ACCESS_ONCE(rflow->cpu);
+ cpu = READ_ONCE(rflow->cpu);
if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
((int)(per_cpu(softnet_data, cpu).input_queue_head -
rflow->last_qtail) <
@@ -3865,6 +3872,142 @@ drop:
return NET_RX_DROP;
}
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+ struct bpf_prog *xdp_prog)
+{
+ u32 metalen, act = XDP_DROP;
+ struct xdp_buff xdp;
+ void *orig_data;
+ int hlen, off;
+ u32 mac_len;
+
+ /* Reinjected packets coming from act_mirred or similar should
+ * not get XDP generic processing.
+ */
+ if (skb_cloned(skb))
+ return XDP_PASS;
+
+ /* XDP packets must be linear and must have sufficient headroom
+ * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
+ * native XDP provides, thus we need to do it here as well.
+ */
+ if (skb_is_nonlinear(skb) ||
+ skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+ int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+ int troom = skb->tail + skb->data_len - skb->end;
+
+ /* In case we have to go down the path and also linearize,
+ * then lets do the pskb_expand_head() work just once here.
+ */
+ if (pskb_expand_head(skb,
+ hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+ troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
+ goto do_drop;
+ if (troom > 0 && __skb_linearize(skb))
+ goto do_drop;
+ }
+
+ /* The XDP program wants to see the packet starting at the MAC
+ * header.
+ */
+ mac_len = skb->data - skb_mac_header(skb);
+ hlen = skb_headlen(skb) + mac_len;
+ xdp.data = skb->data - mac_len;
+ xdp.data_meta = xdp.data;
+ xdp.data_end = xdp.data + hlen;
+ xdp.data_hard_start = skb->data - skb_headroom(skb);
+ orig_data = xdp.data;
+
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+ off = xdp.data - orig_data;
+ if (off > 0)
+ __skb_pull(skb, off);
+ else if (off < 0)
+ __skb_push(skb, -off);
+ skb->mac_header += off;
+
+ switch (act) {
+ case XDP_REDIRECT:
+ case XDP_TX:
+ __skb_push(skb, mac_len);
+ break;
+ case XDP_PASS:
+ metalen = xdp.data - xdp.data_meta;
+ if (metalen)
+ skb_metadata_set(skb, metalen);
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fall through */
+ case XDP_ABORTED:
+ trace_xdp_exception(skb->dev, xdp_prog, act);
+ /* fall through */
+ case XDP_DROP:
+ do_drop:
+ kfree_skb(skb);
+ break;
+ }
+
+ return act;
+}
+
+/* When doing generic XDP we have to bypass the qdisc layer and the
+ * network taps in order to match in-driver-XDP behavior.
+ */
+void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
+{
+ struct net_device *dev = skb->dev;
+ struct netdev_queue *txq;
+ bool free_skb = true;
+ int cpu, rc;
+
+ txq = netdev_pick_tx(dev, skb, NULL);
+ cpu = smp_processor_id();
+ HARD_TX_LOCK(dev, txq, cpu);
+ if (!netif_xmit_stopped(txq)) {
+ rc = netdev_start_xmit(skb, dev, txq, 0);
+ if (dev_xmit_complete(rc))
+ free_skb = false;
+ }
+ HARD_TX_UNLOCK(dev, txq);
+ if (free_skb) {
+ trace_xdp_exception(dev, xdp_prog, XDP_TX);
+ kfree_skb(skb);
+ }
+}
+EXPORT_SYMBOL_GPL(generic_xdp_tx);
+
+static struct static_key generic_xdp_needed __read_mostly;
+
+int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
+{
+ if (xdp_prog) {
+ u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+ int err;
+
+ if (act != XDP_PASS) {
+ switch (act) {
+ case XDP_REDIRECT:
+ err = xdp_do_generic_redirect(skb->dev, skb,
+ xdp_prog);
+ if (err)
+ goto out_redir;
+ /* fallthru to submit skb */
+ case XDP_TX:
+ generic_xdp_tx(skb, xdp_prog);
+ break;
+ }
+ return XDP_DROP;
+ }
+ }
+ return XDP_PASS;
+out_redir:
+ kfree_skb(skb);
+ return XDP_DROP;
+}
+EXPORT_SYMBOL_GPL(do_xdp_generic);
+
static int netif_rx_internal(struct sk_buff *skb)
{
int ret;
@@ -3872,6 +4015,24 @@ static int netif_rx_internal(struct sk_buff *skb)
net_timestamp_check(netdev_tstamp_prequeue, skb);
trace_netif_rx(skb);
+
+ if (static_key_false(&generic_xdp_needed)) {
+ int ret;
+
+ preempt_disable();
+ rcu_read_lock();
+ ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+ rcu_read_unlock();
+ preempt_enable();
+
+ /* Consider XDP consuming the packet a success from
+ * the netdev point of view we do not want to count
+ * this as an error.
+ */
+ if (ret != XDP_PASS)
+ return NET_RX_SUCCESS;
+ }
+
#ifdef CONFIG_RPS
if (static_key_false(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -4010,7 +4171,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
#ifdef CONFIG_NET_CLS_ACT
- struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+ struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
struct tcf_result cl_res;
/* If there's at least one ingress present somewhere (so
@@ -4018,8 +4179,9 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
* that are not configured with an ingress qdisc will bail
* out here.
*/
- if (!cl)
+ if (!miniq)
return skb;
+
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
@@ -4027,15 +4189,15 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
qdisc_skb_cb(skb)->pkt_len = skb->len;
skb->tc_at_ingress = 1;
- qdisc_bstats_cpu_update(cl->q, skb);
+ mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_cpu_drop(cl->q);
+ mini_qdisc_qstats_cpu_drop(miniq);
kfree_skb(skb);
return NULL;
case TC_ACT_STOLEN:
@@ -4292,7 +4454,7 @@ skip_classify:
}
if (pt_prev) {
- if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+ if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
goto drop;
else
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
@@ -4313,6 +4475,33 @@ out:
return ret;
}
+/**
+ * netif_receive_skb_core - special purpose version of netif_receive_skb
+ * @skb: buffer to process
+ *
+ * More direct receive version of netif_receive_skb(). It should
+ * only be used by callers that have a need to skip RPS and Generic XDP.
+ * Caller must also take care of handling if (page_is_)pfmemalloc.
+ *
+ * This function may only be called from softirq context and interrupts
+ * should be enabled.
+ *
+ * Return values (usually ignored):
+ * NET_RX_SUCCESS: no congestion
+ * NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb_core(struct sk_buff *skb)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __netif_receive_skb_core(skb, false);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(netif_receive_skb_core);
+
static int __netif_receive_skb(struct sk_buff *skb)
{
int ret;
@@ -4338,9 +4527,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
return ret;
}
-static struct static_key generic_xdp_needed __read_mostly;
-
-static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
+static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
{
struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
struct bpf_prog *new = xdp->prog;
@@ -4373,89 +4560,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
return ret;
}
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
- struct bpf_prog *xdp_prog)
-{
- struct xdp_buff xdp;
- u32 act = XDP_DROP;
- void *orig_data;
- int hlen, off;
- u32 mac_len;
-
- /* Reinjected packets coming from act_mirred or similar should
- * not get XDP generic processing.
- */
- if (skb_cloned(skb))
- return XDP_PASS;
-
- if (skb_linearize(skb))
- goto do_drop;
-
- /* The XDP program wants to see the packet starting at the MAC
- * header.
- */
- mac_len = skb->data - skb_mac_header(skb);
- hlen = skb_headlen(skb) + mac_len;
- xdp.data = skb->data - mac_len;
- xdp.data_end = xdp.data + hlen;
- xdp.data_hard_start = skb->data - skb_headroom(skb);
- orig_data = xdp.data;
-
- act = bpf_prog_run_xdp(xdp_prog, &xdp);
-
- off = xdp.data - orig_data;
- if (off > 0)
- __skb_pull(skb, off);
- else if (off < 0)
- __skb_push(skb, -off);
-
- switch (act) {
- case XDP_TX:
- __skb_push(skb, mac_len);
- /* fall through */
- case XDP_PASS:
- break;
-
- default:
- bpf_warn_invalid_xdp_action(act);
- /* fall through */
- case XDP_ABORTED:
- trace_xdp_exception(skb->dev, xdp_prog, act);
- /* fall through */
- case XDP_DROP:
- do_drop:
- kfree_skb(skb);
- break;
- }
-
- return act;
-}
-
-/* When doing generic XDP we have to bypass the qdisc layer and the
- * network taps in order to match in-driver-XDP behavior.
- */
-static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
-{
- struct net_device *dev = skb->dev;
- struct netdev_queue *txq;
- bool free_skb = true;
- int cpu, rc;
-
- txq = netdev_pick_tx(dev, skb, NULL);
- cpu = smp_processor_id();
- HARD_TX_LOCK(dev, txq, cpu);
- if (!netif_xmit_stopped(txq)) {
- rc = netdev_start_xmit(skb, dev, txq, 0);
- if (dev_xmit_complete(rc))
- free_skb = false;
- }
- HARD_TX_UNLOCK(dev, txq);
- if (free_skb) {
- trace_xdp_exception(dev, xdp_prog, XDP_TX);
- kfree_skb(skb);
- }
-}
-
static int netif_receive_skb_internal(struct sk_buff *skb)
{
int ret;
@@ -4465,23 +4569,20 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
- rcu_read_lock();
-
if (static_key_false(&generic_xdp_needed)) {
- struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+ int ret;
- if (xdp_prog) {
- u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+ preempt_disable();
+ rcu_read_lock();
+ ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+ rcu_read_unlock();
+ preempt_enable();
- if (act != XDP_PASS) {
- rcu_read_unlock();
- if (act == XDP_TX)
- generic_xdp_tx(skb, xdp_prog);
- return NET_RX_DROP;
- }
- }
+ if (ret != XDP_PASS)
+ return NET_RX_DROP;
}
+ rcu_read_lock();
#ifdef CONFIG_RPS
if (static_key_false(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -4653,6 +4754,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
diffs |= p->vlan_tci ^ skb->vlan_tci;
diffs |= skb_metadata_dst_cmp(p, skb);
+ diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
skb_mac_header(skb));
@@ -5289,6 +5391,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
* Ideally, a new ndo_busy_poll_stop() could avoid another round.
*/
rc = napi->poll(napi, BUSY_POLL_BUDGET);
+ trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
netpoll_poll_unlock(have_poll_lock);
if (rc == BUSY_POLL_BUDGET)
__napi_schedule(napi);
@@ -5667,12 +5770,13 @@ EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
* Find out if a device is linked to an upper device and return true in case
* it is. The caller must hold the RTNL lock.
*/
-static bool netdev_has_any_upper_dev(struct net_device *dev)
+bool netdev_has_any_upper_dev(struct net_device *dev)
{
ASSERT_RTNL();
return !list_empty(&dev->adj_list.upper);
}
+EXPORT_SYMBOL(netdev_has_any_upper_dev);
/**
* netdev_master_upper_dev_get - Get master upper device
@@ -6184,9 +6288,19 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
- void *upper_priv, void *upper_info)
-{
- struct netdev_notifier_changeupper_info changeupper_info;
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_changeupper_info changeupper_info = {
+ .info = {
+ .dev = dev,
+ .extack = extack,
+ },
+ .upper_dev = upper_dev,
+ .master = master,
+ .linking = true,
+ .upper_info = upper_info,
+ };
int ret = 0;
ASSERT_RTNL();
@@ -6204,12 +6318,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
- changeupper_info.upper_dev = upper_dev;
- changeupper_info.master = master;
- changeupper_info.linking = true;
- changeupper_info.upper_info = upper_info;
-
- ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
@@ -6220,7 +6329,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (ret)
return ret;
- ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
@@ -6245,9 +6354,11 @@ rollback:
* returns zero.
*/
int netdev_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ struct netlink_ext_ack *extack)
{
- return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
+ return __netdev_upper_dev_link(dev, upper_dev, false,
+ NULL, NULL, extack);
}
EXPORT_SYMBOL(netdev_upper_dev_link);
@@ -6266,10 +6377,11 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
*/
int netdev_master_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev,
- void *upper_priv, void *upper_info)
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack)
{
return __netdev_upper_dev_link(dev, upper_dev, true,
- upper_priv, upper_info);
+ upper_priv, upper_info, extack);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);
@@ -6284,20 +6396,24 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
- struct netdev_notifier_changeupper_info changeupper_info;
+ struct netdev_notifier_changeupper_info changeupper_info = {
+ .info = {
+ .dev = dev,
+ },
+ .upper_dev = upper_dev,
+ .linking = false,
+ };
ASSERT_RTNL();
- changeupper_info.upper_dev = upper_dev;
changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
- changeupper_info.linking = false;
- call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
- call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -6313,11 +6429,13 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink);
void netdev_bonding_info_change(struct net_device *dev,
struct netdev_bonding_info *bonding_info)
{
- struct netdev_notifier_bonding_info info;
+ struct netdev_notifier_bonding_info info = {
+ .info.dev = dev,
+ };
memcpy(&info.bonding_info, bonding_info,
sizeof(struct netdev_bonding_info));
- call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+ call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
&info.info);
}
EXPORT_SYMBOL(netdev_bonding_info_change);
@@ -6443,11 +6561,13 @@ EXPORT_SYMBOL(dev_get_nest_level);
void netdev_lower_state_changed(struct net_device *lower_dev,
void *lower_state_info)
{
- struct netdev_notifier_changelowerstate_info changelowerstate_info;
+ struct netdev_notifier_changelowerstate_info changelowerstate_info = {
+ .info.dev = lower_dev,
+ };
ASSERT_RTNL();
changelowerstate_info.lower_state_info = lower_state_info;
- call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
&changelowerstate_info.info);
}
EXPORT_SYMBOL(netdev_lower_state_changed);
@@ -6689,8 +6809,12 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
*/
ret = 0;
- if ((old_flags ^ flags) & IFF_UP)
- ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
+ if ((old_flags ^ flags) & IFF_UP) {
+ if (old_flags & IFF_UP)
+ __dev_close(dev);
+ else
+ ret = __dev_open(dev);
+ }
if ((flags ^ dev->gflags) & IFF_PROMISC) {
int inc = (flags & IFF_PROMISC) ? 1 : -1;
@@ -6734,11 +6858,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
if (dev->flags & IFF_UP &&
(changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
- struct netdev_notifier_change_info change_info;
+ struct netdev_notifier_change_info change_info = {
+ .info = {
+ .dev = dev,
+ },
+ .flags_changed = changes,
+ };
- change_info.flags_changed = changes;
- call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
- &change_info.info);
+ call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
}
}
@@ -6945,26 +7072,26 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
}
EXPORT_SYMBOL(dev_change_proto_down);
-u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id)
+u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id)
{
- struct netdev_xdp xdp;
+ struct netdev_bpf xdp;
memset(&xdp, 0, sizeof(xdp));
xdp.command = XDP_QUERY_PROG;
/* Query must always succeed. */
- WARN_ON(xdp_op(dev, &xdp) < 0);
+ WARN_ON(bpf_op(dev, &xdp) < 0);
if (prog_id)
*prog_id = xdp.prog_id;
return xdp.prog_attached;
}
-static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
+static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
struct netlink_ext_ack *extack, u32 flags,
struct bpf_prog *prog)
{
- struct netdev_xdp xdp;
+ struct netdev_bpf xdp;
memset(&xdp, 0, sizeof(xdp));
if (flags & XDP_FLAGS_HW_MODE)
@@ -6975,7 +7102,7 @@ static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
xdp.flags = flags;
xdp.prog = prog;
- return xdp_op(dev, &xdp);
+ return bpf_op(dev, &xdp);
}
/**
@@ -6992,32 +7119,36 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
{
const struct net_device_ops *ops = dev->netdev_ops;
struct bpf_prog *prog = NULL;
- xdp_op_t xdp_op, xdp_chk;
+ bpf_op_t bpf_op, bpf_chk;
int err;
ASSERT_RTNL();
- xdp_op = xdp_chk = ops->ndo_xdp;
- if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
+ bpf_op = bpf_chk = ops->ndo_bpf;
+ if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
return -EOPNOTSUPP;
- if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
- xdp_op = generic_xdp_install;
- if (xdp_op == xdp_chk)
- xdp_chk = generic_xdp_install;
+ if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
+ bpf_op = generic_xdp_install;
+ if (bpf_op == bpf_chk)
+ bpf_chk = generic_xdp_install;
if (fd >= 0) {
- if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL))
+ if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL))
return -EEXIST;
if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
- __dev_xdp_attached(dev, xdp_op, NULL))
+ __dev_xdp_attached(dev, bpf_op, NULL))
return -EBUSY;
- prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+ if (bpf_op == ops->ndo_bpf)
+ prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
+ dev);
+ else
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
if (IS_ERR(prog))
return PTR_ERR(prog);
}
- err = dev_xdp_install(dev, xdp_op, extack, flags, prog);
+ err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
if (err < 0 && prog)
bpf_prog_put(prog);
@@ -7109,7 +7240,7 @@ static void rollback_registered_many(struct list_head *head)
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
- GFP_KERNEL);
+ GFP_KERNEL, NULL);
/*
* Flush the unicast and multicast chains
@@ -7235,24 +7366,6 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
features &= ~NETIF_F_GSO;
}
- /* UFO needs SG and checksumming */
- if (features & NETIF_F_UFO) {
- /* maybe split UFO into V4 and V6? */
- if (!(features & NETIF_F_HW_CSUM) &&
- ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) !=
- (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) {
- netdev_dbg(dev,
- "Dropping NETIF_F_UFO since no checksum offload features.\n");
- features &= ~NETIF_F_UFO;
- }
-
- if (!(features & NETIF_F_SG)) {
- netdev_dbg(dev,
- "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
- features &= ~NETIF_F_UFO;
- }
- }
-
/* GSO partial features require GSO partial be set */
if ((features & dev->gso_partial_features) &&
!(features & NETIF_F_GSO_PARTIAL)) {
@@ -7313,8 +7426,27 @@ sync_lower:
netdev_for_each_lower_dev(dev, lower, iter)
netdev_sync_lower_features(dev, lower, features);
- if (!err)
+ if (!err) {
+ netdev_features_t diff = features ^ dev->features;
+
+ if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) {
+ /* udp_tunnel_{get,drop}_rx_info both need
+ * NETIF_F_RX_UDP_TUNNEL_PORT enabled on the
+ * device, or they won't do anything.
+ * Thus we need to update dev->features
+ * *before* calling udp_tunnel_get_rx_info,
+ * but *after* calling udp_tunnel_drop_rx_info.
+ */
+ if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
+ dev->features = features;
+ udp_tunnel_get_rx_info(dev);
+ } else {
+ udp_tunnel_drop_rx_info(dev);
+ }
+ }
+
dev->features = features;
+ }
return err < 0 ? 0 : 1;
}
@@ -7516,6 +7648,12 @@ int register_netdevice(struct net_device *dev)
*/
dev->hw_features |= NETIF_F_SOFT_FEATURES;
dev->features |= NETIF_F_SOFT_FEATURES;
+
+ if (dev->netdev_ops->ndo_udp_tunnel_add) {
+ dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
+ dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
+ }
+
dev->wanted_features = dev->features & dev->hw_features;
if (!(dev->flags & IFF_LOOPBACK))
@@ -7939,7 +8077,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
unsigned int txqs, unsigned int rxqs)
{
struct net_device *dev;
- size_t alloc_size;
+ unsigned int alloc_size;
struct net_device *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -8189,7 +8327,7 @@ EXPORT_SYMBOL(unregister_netdev);
int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
{
- int err;
+ int err, new_nsid;
ASSERT_RTNL();
@@ -8245,7 +8383,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net)
+ new_nsid = peernet2id_alloc(dev_net(dev), net);
+ else
+ new_nsid = peernet2id(dev_net(dev), net);
+ rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid);
/*
* Flush the unicast and multicast chains
@@ -8507,6 +8649,8 @@ static void __net_exit netdev_exit(struct net *net)
{
kfree(net->dev_name_head);
kfree(net->dev_index_head);
+ if (net != &init_net)
+ WARN_ON_ONCE(!list_empty(&net->dev_base_head));
}
static struct pernet_operations __net_initdata netdev_net_ops = {
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 709a4e6fb447..7e690d0ccd05 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kmod.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
@@ -303,7 +304,18 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
case SIOCSIFTXQLEN:
if (ifr->ifr_qlen < 0)
return -EINVAL;
- dev->tx_queue_len = ifr->ifr_qlen;
+ if (dev->tx_queue_len ^ ifr->ifr_qlen) {
+ unsigned int orig_len = dev->tx_queue_len;
+
+ dev->tx_queue_len = ifr->ifr_qlen;
+ err = call_netdevice_notifiers(
+ NETDEV_CHANGE_TX_QUEUE_LEN, dev);
+ err = notifier_to_errno(err);
+ if (err) {
+ dev->tx_queue_len = orig_len;
+ return err;
+ }
+ }
return 0;
case SIOCSIFNAME:
diff --git a/net/core/devlink.c b/net/core/devlink.c
index a0adfc31a3fe..7d430c1d9c3e 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -29,6 +29,57 @@
#define CREATE_TRACE_POINTS
#include <trace/events/devlink.h>
+static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
+ {
+ .name = "destination mac",
+ .id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
+ .bitwidth = 48,
+ },
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ethernet = {
+ .name = "ethernet",
+ .id = DEVLINK_DPIPE_HEADER_ETHERNET,
+ .fields = devlink_dpipe_fields_ethernet,
+ .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet),
+ .global = true,
+};
+EXPORT_SYMBOL(devlink_dpipe_header_ethernet);
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = {
+ {
+ .name = "destination ip",
+ .id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
+ .bitwidth = 32,
+ },
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ipv4 = {
+ .name = "ipv4",
+ .id = DEVLINK_DPIPE_HEADER_IPV4,
+ .fields = devlink_dpipe_fields_ipv4,
+ .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4),
+ .global = true,
+};
+EXPORT_SYMBOL(devlink_dpipe_header_ipv4);
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = {
+ {
+ .name = "destination ip",
+ .id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP,
+ .bitwidth = 128,
+ },
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
+ .name = "ipv6",
+ .id = DEVLINK_DPIPE_HEADER_IPV6,
+ .fields = devlink_dpipe_fields_ipv6,
+ .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6),
+ .global = true,
+};
+EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
+
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
static LIST_HEAD(devlink_list);
@@ -1613,13 +1664,15 @@ static int devlink_dpipe_table_put(struct sk_buff *skb,
struct devlink_dpipe_table *table)
{
struct nlattr *table_attr;
+ u64 table_size;
+ table_size = table->table_ops->size_get(table->priv);
table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE);
if (!table_attr)
return -EMSGSIZE;
if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table->size,
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size,
DEVLINK_ATTR_PAD))
goto nla_put_failure;
if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
@@ -1960,6 +2013,28 @@ int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
}
EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
+void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
+
+{
+ unsigned int value_count, value_index;
+ struct devlink_dpipe_value *value;
+
+ value = entry->action_values;
+ value_count = entry->action_values_count;
+ for (value_index = 0; value_index < value_count; value_index++) {
+ kfree(value[value_index].value);
+ kfree(value[value_index].mask);
+ }
+
+ value = entry->match_values;
+ value_count = entry->match_values_count;
+ for (value_index = 0; value_index < value_count; value_index++) {
+ kfree(value[value_index].value);
+ kfree(value[value_index].mask);
+ }
+}
+EXPORT_SYMBOL(devlink_dpipe_entry_clear);
+
static int devlink_dpipe_entries_fill(struct genl_info *info,
enum devlink_command cmd, int flags,
struct devlink_dpipe_table *table)
@@ -2684,20 +2759,21 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
* @table_name: table name
* @table_ops: table ops
* @priv: priv
- * @size: size
* @counter_control_extern: external control for counters
*/
int devlink_dpipe_table_register(struct devlink *devlink,
const char *table_name,
struct devlink_dpipe_table_ops *table_ops,
- void *priv, u64 size,
- bool counter_control_extern)
+ void *priv, bool counter_control_extern)
{
struct devlink_dpipe_table *table;
if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
return -EEXIST;
+ if (WARN_ON(!table_ops->size_get))
+ return -EINVAL;
+
table = kzalloc(sizeof(*table), GFP_KERNEL);
if (!table)
return -ENOMEM;
@@ -2705,7 +2781,6 @@ int devlink_dpipe_table_register(struct devlink *devlink,
table->name = table_name;
table->table_ops = table_ops;
table->priv = priv;
- table->size = size;
table->counter_control_extern = counter_control_extern;
mutex_lock(&devlink_mutex);
diff --git a/net/core/dst.c b/net/core/dst.c
index 00aa972ad1a1..662a2d4a3d19 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -55,7 +55,7 @@ const struct dst_metrics dst_default_metrics = {
* We really want to avoid false sharing on this variable, and catch
* any writes on it.
*/
- .refcnt = ATOMIC_INIT(1),
+ .refcnt = REFCOUNT_INIT(1),
};
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
@@ -213,7 +213,7 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
unsigned long prev, new;
- atomic_set(&p->refcnt, 1);
+ refcount_set(&p->refcnt, 1);
memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
new = (unsigned long) p;
@@ -225,7 +225,7 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
if (prev & DST_METRICS_READ_ONLY)
p = NULL;
} else if (prev & DST_METRICS_REFCOUNTED) {
- if (atomic_dec_and_test(&old_p->refcnt))
+ if (refcount_dec_and_test(&old_p->refcnt))
kfree(old_p);
}
}
@@ -299,7 +299,8 @@ EXPORT_SYMBOL_GPL(metadata_dst_alloc);
void metadata_dst_free(struct metadata_dst *md_dst)
{
#ifdef CONFIG_DST_CACHE
- dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
+ if (md_dst->type == METADATA_IP_TUNNEL)
+ dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
#endif
kfree(md_dst);
}
@@ -321,3 +322,19 @@ metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags)
return md_dst;
}
EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
+
+void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
+{
+#ifdef CONFIG_DST_CACHE
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
+
+ if (one_md_dst->type == METADATA_IP_TUNNEL)
+ dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
+ }
+#endif
+ free_percpu(md_dst);
+}
+EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 674b6c9cec18..f8fcf450a36e 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -76,7 +76,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_LRO_BIT] = "rx-lro",
[NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
- [NETIF_F_UFO_BIT] = "tx-udp-fragmentation",
[NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust",
[NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
[NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation",
@@ -106,6 +105,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_HW_TC_BIT] = "hw-tc-offload",
[NETIF_F_HW_ESP_BIT] = "esp-hw-offload",
[NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
+ [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload",
};
static const char
@@ -299,9 +299,6 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
case ETHTOOL_GTSO:
case ETHTOOL_STSO:
return NETIF_F_ALL_TSO;
- case ETHTOOL_GUFO:
- case ETHTOOL_SUFO:
- return NETIF_F_UFO;
case ETHTOOL_GGSO:
case ETHTOOL_SGSO:
return NETIF_F_GSO;
@@ -406,6 +403,22 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
return 0;
}
+/* Given two link masks, AND them together and save the result in dst. */
+void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst,
+ struct ethtool_link_ksettings *src)
+{
+ unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS);
+ unsigned int idx = 0;
+
+ for (; idx < size; idx++) {
+ dst->link_modes.supported[idx] &=
+ src->link_modes.supported[idx];
+ dst->link_modes.advertising[idx] &=
+ src->link_modes.advertising[idx];
+ }
+}
+EXPORT_SYMBOL(ethtool_intersect_link_masks);
+
void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst,
u32 legacy_u32)
{
@@ -439,7 +452,7 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32);
/* return false if legacy contained non-0 deprecated fields
- * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated
+ * maxtxpkt/maxrxpkt. rest of ksettings always updated
*/
static bool
convert_legacy_settings_to_link_ksettings(
@@ -454,8 +467,7 @@ convert_legacy_settings_to_link_ksettings(
* deprecated legacy fields, and they should not use
* %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
*/
- if (legacy_settings->transceiver ||
- legacy_settings->maxtxpkt ||
+ if (legacy_settings->maxtxpkt ||
legacy_settings->maxrxpkt)
retval = false;
@@ -528,6 +540,8 @@ convert_link_ksettings_to_legacy_settings(
= link_ksettings->base.eth_tp_mdix;
legacy_settings->eth_tp_mdix_ctrl
= link_ksettings->base.eth_tp_mdix_ctrl;
+ legacy_settings->transceiver
+ = link_ksettings->base.transceiver;
return retval;
}
@@ -2515,6 +2529,33 @@ static int set_phy_tunable(struct net_device *dev, void __user *useraddr)
return ret;
}
+static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr)
+{
+ struct ethtool_fecparam fecparam = { ETHTOOL_GFECPARAM };
+
+ if (!dev->ethtool_ops->get_fecparam)
+ return -EOPNOTSUPP;
+
+ dev->ethtool_ops->get_fecparam(dev, &fecparam);
+
+ if (copy_to_user(useraddr, &fecparam, sizeof(fecparam)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr)
+{
+ struct ethtool_fecparam fecparam;
+
+ if (!dev->ethtool_ops->set_fecparam)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&fecparam, useraddr, sizeof(fecparam)))
+ return -EFAULT;
+
+ return dev->ethtool_ops->set_fecparam(dev, &fecparam);
+}
+
/* The main entry point in this file. Called from net/core/dev_ioctl.c */
int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -2555,7 +2596,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GPHYSTATS:
case ETHTOOL_GTSO:
case ETHTOOL_GPERMADDR:
- case ETHTOOL_GUFO:
case ETHTOOL_GGSO:
case ETHTOOL_GGRO:
case ETHTOOL_GFLAGS:
@@ -2574,6 +2614,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GTUNABLE:
case ETHTOOL_PHY_GTUNABLE:
case ETHTOOL_GLINKSETTINGS:
+ case ETHTOOL_GFECPARAM:
break;
default:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -2723,7 +2764,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GRXCSUM:
case ETHTOOL_GSG:
case ETHTOOL_GTSO:
- case ETHTOOL_GUFO:
case ETHTOOL_GGSO:
case ETHTOOL_GGRO:
rc = ethtool_get_one_feature(dev, useraddr, ethcmd);
@@ -2732,7 +2772,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SRXCSUM:
case ETHTOOL_SSG:
case ETHTOOL_STSO:
- case ETHTOOL_SUFO:
case ETHTOOL_SGSO:
case ETHTOOL_SGRO:
rc = ethtool_set_one_feature(dev, useraddr, ethcmd);
@@ -2785,6 +2824,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_PHY_STUNABLE:
rc = set_phy_tunable(dev, useraddr);
break;
+ case ETHTOOL_GFECPARAM:
+ rc = ethtool_get_fecparam(dev, useraddr);
+ break;
+ case ETHTOOL_SFECPARAM:
+ rc = ethtool_set_fecparam(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
new file mode 100644
index 000000000000..0c048bdeb016
--- /dev/null
+++ b/net/core/fib_notifier.c
@@ -0,0 +1,181 @@
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+
+static ATOMIC_NOTIFIER_HEAD(fib_chain);
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->net = net;
+ return nb->notifier_call(nb, event_type, info);
+}
+EXPORT_SYMBOL(call_fib_notifier);
+
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->net = net;
+ return atomic_notifier_call_chain(&fib_chain, event_type, info);
+}
+EXPORT_SYMBOL(call_fib_notifiers);
+
+static unsigned int fib_seq_sum(void)
+{
+ struct fib_notifier_ops *ops;
+ unsigned int fib_seq = 0;
+ struct net *net;
+
+ rtnl_lock();
+ for_each_net(net) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+ if (!try_module_get(ops->owner))
+ continue;
+ fib_seq += ops->fib_seq_read(net);
+ module_put(ops->owner);
+ }
+ rcu_read_unlock();
+ }
+ rtnl_unlock();
+
+ return fib_seq;
+}
+
+static int fib_net_dump(struct net *net, struct notifier_block *nb)
+{
+ struct fib_notifier_ops *ops;
+
+ list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+ int err;
+
+ if (!try_module_get(ops->owner))
+ continue;
+ err = ops->fib_dump(net, nb);
+ module_put(ops->owner);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static bool fib_dump_is_consistent(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb),
+ unsigned int fib_seq)
+{
+ atomic_notifier_chain_register(&fib_chain, nb);
+ if (fib_seq == fib_seq_sum())
+ return true;
+ atomic_notifier_chain_unregister(&fib_chain, nb);
+ if (cb)
+ cb(nb);
+ return false;
+}
+
+#define FIB_DUMP_MAX_RETRIES 5
+int register_fib_notifier(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb))
+{
+ int retries = 0;
+ int err;
+
+ do {
+ unsigned int fib_seq = fib_seq_sum();
+ struct net *net;
+
+ rcu_read_lock();
+ for_each_net_rcu(net) {
+ err = fib_net_dump(net, nb);
+ if (err)
+ goto err_fib_net_dump;
+ }
+ rcu_read_unlock();
+
+ if (fib_dump_is_consistent(nb, cb, fib_seq))
+ return 0;
+ } while (++retries < FIB_DUMP_MAX_RETRIES);
+
+ return -EBUSY;
+
+err_fib_net_dump:
+ rcu_read_unlock();
+ return err;
+}
+EXPORT_SYMBOL(register_fib_notifier);
+
+int unregister_fib_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&fib_chain, nb);
+}
+EXPORT_SYMBOL(unregister_fib_notifier);
+
+static int __fib_notifier_ops_register(struct fib_notifier_ops *ops,
+ struct net *net)
+{
+ struct fib_notifier_ops *o;
+
+ list_for_each_entry(o, &net->fib_notifier_ops, list)
+ if (ops->family == o->family)
+ return -EEXIST;
+ list_add_tail_rcu(&ops->list, &net->fib_notifier_ops);
+ return 0;
+}
+
+struct fib_notifier_ops *
+fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net)
+{
+ struct fib_notifier_ops *ops;
+ int err;
+
+ ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
+ if (!ops)
+ return ERR_PTR(-ENOMEM);
+
+ err = __fib_notifier_ops_register(ops, net);
+ if (err)
+ goto err_register;
+
+ return ops;
+
+err_register:
+ kfree(ops);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(fib_notifier_ops_register);
+
+void fib_notifier_ops_unregister(struct fib_notifier_ops *ops)
+{
+ list_del_rcu(&ops->list);
+ kfree_rcu(ops, rcu);
+}
+EXPORT_SYMBOL(fib_notifier_ops_unregister);
+
+static int __net_init fib_notifier_net_init(struct net *net)
+{
+ INIT_LIST_HEAD(&net->fib_notifier_ops);
+ return 0;
+}
+
+static void __net_exit fib_notifier_net_exit(struct net *net)
+{
+ WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops));
+}
+
+static struct pernet_operations fib_notifier_net_ops = {
+ .init = fib_notifier_net_init,
+ .exit = fib_notifier_net_exit,
+};
+
+static int __init fib_notifier_init(void)
+{
+ return register_pernet_subsys(&fib_notifier_net_ops);
+}
+
+subsys_initcall(fib_notifier_init);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index fdcb1bcd2afa..98e1066c3d55 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -299,6 +299,69 @@ out:
}
EXPORT_SYMBOL_GPL(fib_rules_lookup);
+static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_rule *rule, int family)
+{
+ struct fib_rule_notifier_info info = {
+ .info.family = family,
+ .rule = rule,
+ };
+
+ return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_fib_rule_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct fib_rule *rule,
+ struct fib_rules_ops *ops,
+ struct netlink_ext_ack *extack)
+{
+ struct fib_rule_notifier_info info = {
+ .info.family = ops->family,
+ .info.extack = extack,
+ .rule = rule,
+ };
+
+ ops->fib_rules_seq++;
+ return call_fib_notifiers(net, event_type, &info.info);
+}
+
+/* Called with rcu_read_lock() */
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family)
+{
+ struct fib_rules_ops *ops;
+ struct fib_rule *rule;
+
+ ops = lookup_rules_ops(net, family);
+ if (!ops)
+ return -EAFNOSUPPORT;
+ list_for_each_entry_rcu(rule, &ops->rules_list, list)
+ call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule,
+ family);
+ rules_ops_put(ops);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fib_rules_dump);
+
+unsigned int fib_rules_seq_read(struct net *net, int family)
+{
+ unsigned int fib_rules_seq;
+ struct fib_rules_ops *ops;
+
+ ASSERT_RTNL();
+
+ ops = lookup_rules_ops(net, family);
+ if (!ops)
+ return 0;
+ fib_rules_seq = ops->fib_rules_seq;
+ rules_ops_put(ops);
+
+ return fib_rules_seq;
+}
+EXPORT_SYMBOL_GPL(fib_rules_seq_read);
+
static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
struct fib_rules_ops *ops)
{
@@ -548,6 +611,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rule->tun_id)
ip_tunnel_need_metadata();
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
flush_route_cache(ops);
rules_ops_put(ops);
@@ -687,6 +751,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
+ NULL);
notify_rule_change(RTM_DELRULE, rule, ops, nlh,
NETLINK_CB(skb).portid);
fib_rule_put(rule);
@@ -956,16 +1022,22 @@ static int __net_init fib_rules_net_init(struct net *net)
return 0;
}
+static void __net_exit fib_rules_net_exit(struct net *net)
+{
+ WARN_ON_ONCE(!list_empty(&net->rules_ops));
+}
+
static struct pernet_operations fib_rules_net_ops = {
.init = fib_rules_net_init,
+ .exit = fib_rules_net_exit,
};
static int __init fib_rules_init(void)
{
int err;
- rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL);
+ rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, 0);
err = register_pernet_subsys(&fib_rules_net_ops);
if (err < 0)
diff --git a/net/core/filter.c b/net/core/filter.c
index 6280a602604c..1afa17935954 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -43,6 +43,7 @@
#include <linux/timer.h>
#include <linux/uaccess.h>
#include <asm/unaligned.h>
+#include <asm/cmpxchg.h>
#include <linux/filter.h>
#include <linux/ratelimit.h>
#include <linux/seccomp.h>
@@ -55,6 +56,7 @@
#include <net/sock_reuseport.h>
#include <net/busy_poll.h>
#include <net/tcp.h>
+#include <linux/bpf_trace.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -513,14 +515,27 @@ do_pass:
break;
}
- /* Convert JEQ into JNE when 'jump_true' is next insn. */
- if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
- insn->code = BPF_JMP | BPF_JNE | bpf_src;
+ /* Convert some jumps when 'jump_true' is next insn. */
+ if (fp->jt == 0) {
+ switch (BPF_OP(fp->code)) {
+ case BPF_JEQ:
+ insn->code = BPF_JMP | BPF_JNE | bpf_src;
+ break;
+ case BPF_JGT:
+ insn->code = BPF_JMP | BPF_JLE | bpf_src;
+ break;
+ case BPF_JGE:
+ insn->code = BPF_JMP | BPF_JLT | bpf_src;
+ break;
+ default:
+ goto jmp_rest;
+ }
+
target = i + fp->jf + 1;
BPF_EMIT_JMP;
break;
}
-
+jmp_rest:
/* Other jumps are mapped into two insns: Jxx and JA. */
target = i + fp->jt + 1;
insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
@@ -975,10 +990,14 @@ static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
- bool ret = __sk_filter_charge(sk, fp);
- if (ret)
- refcount_inc(&fp->refcnt);
- return ret;
+ if (!refcount_inc_not_zero(&fp->refcnt))
+ return false;
+
+ if (!__sk_filter_charge(sk, fp)) {
+ sk_filter_release(fp);
+ return false;
+ }
+ return true;
}
static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
@@ -1388,7 +1407,7 @@ static inline int bpf_try_make_writable(struct sk_buff *skb,
{
int err = __bpf_try_make_writable(skb, write_len);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return err;
}
@@ -1778,6 +1797,9 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
struct redirect_info {
u32 ifindex;
u32 flags;
+ struct bpf_map *map;
+ struct bpf_map *map_to_flush;
+ unsigned long map_owner;
};
static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@ -1818,6 +1840,47 @@ static const struct bpf_func_proto bpf_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
+ struct bpf_map *, map, u32, key, u64, flags)
+{
+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+
+ /* If user passes invalid input drop the packet. */
+ if (unlikely(flags))
+ return SK_DROP;
+
+ tcb->bpf.key = key;
+ tcb->bpf.flags = flags;
+ tcb->bpf.map = map;
+
+ return SK_PASS;
+}
+
+struct sock *do_sk_redirect_map(struct sk_buff *skb)
+{
+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+ struct sock *sk = NULL;
+
+ if (tcb->bpf.map) {
+ sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
+
+ tcb->bpf.key = 0;
+ tcb->bpf.map = NULL;
+ }
+
+ return sk;
+}
+
+static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
+ .func = bpf_sk_redirect_map,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{
return task_get_classid(skb);
@@ -1906,7 +1969,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
bpf_pull_mac_rcsum(skb);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -1928,7 +1991,7 @@ BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
ret = skb_vlan_pop(skb);
bpf_pull_mac_rcsum(skb);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2024,8 +2087,8 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
return ret;
if (skb_is_gso(skb)) {
- /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to
- * be changed into SKB_GSO_TCPV6.
+ /* SKB_GSO_TCPV4 needs to be changed into
+ * SKB_GSO_TCPV6.
*/
if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
@@ -2060,8 +2123,8 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
return ret;
if (skb_is_gso(skb)) {
- /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to
- * be changed into SKB_GSO_TCPV4.
+ /* SKB_GSO_TCPV6 needs to be changed into
+ * SKB_GSO_TCPV4.
*/
if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
@@ -2122,7 +2185,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
* need to be verified first.
*/
ret = bpf_skb_proto_xlat(skb, proto);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2247,7 +2310,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
bpf_skb_net_grow(skb, len_diff_abs);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2338,7 +2401,7 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
skb_gso_reset(skb);
}
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2378,7 +2441,7 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
skb_reset_mac_header(skb);
}
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return 0;
}
@@ -2391,14 +2454,26 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING,
};
+static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
+{
+ return xdp_data_meta_unsupported(xdp) ? 0 :
+ xdp->data - xdp->data_meta;
+}
+
BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
{
+ unsigned long metalen = xdp_get_metalen(xdp);
+ void *data_start = xdp->data_hard_start + metalen;
void *data = xdp->data + offset;
- if (unlikely(data < xdp->data_hard_start ||
+ if (unlikely(data < data_start ||
data > xdp->data_end - ETH_HLEN))
return -EINVAL;
+ if (metalen)
+ memmove(xdp->data_meta + offset,
+ xdp->data_meta, metalen);
+ xdp->data_meta += offset;
xdp->data = data;
return 0;
@@ -2412,6 +2487,326 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
+{
+ void *meta = xdp->data_meta + offset;
+ unsigned long metalen = xdp->data - meta;
+
+ if (xdp_data_meta_unsupported(xdp))
+ return -ENOTSUPP;
+ if (unlikely(meta < xdp->data_hard_start ||
+ meta > xdp->data))
+ return -EINVAL;
+ if (unlikely((metalen & (sizeof(__u32) - 1)) ||
+ (metalen > 32)))
+ return -EACCES;
+
+ xdp->data_meta = meta;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
+ .func = bpf_xdp_adjust_meta,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+static int __bpf_tx_xdp(struct net_device *dev,
+ struct bpf_map *map,
+ struct xdp_buff *xdp,
+ u32 index)
+{
+ int err;
+
+ if (!dev->netdev_ops->ndo_xdp_xmit) {
+ return -EOPNOTSUPP;
+ }
+
+ err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+ if (err)
+ return err;
+ dev->netdev_ops->ndo_xdp_flush(dev);
+ return 0;
+}
+
+static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
+ struct bpf_map *map,
+ struct xdp_buff *xdp,
+ u32 index)
+{
+ int err;
+
+ if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+ struct net_device *dev = fwd;
+
+ if (!dev->netdev_ops->ndo_xdp_xmit)
+ return -EOPNOTSUPP;
+
+ err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+ if (err)
+ return err;
+ __dev_map_insert_ctx(map, index);
+
+ } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
+ struct bpf_cpu_map_entry *rcpu = fwd;
+
+ err = cpu_map_enqueue(rcpu, xdp, dev_rx);
+ if (err)
+ return err;
+ __cpu_map_insert_ctx(map, index);
+ }
+ return 0;
+}
+
+void xdp_do_flush_map(void)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct bpf_map *map = ri->map_to_flush;
+
+ ri->map_to_flush = NULL;
+ if (map) {
+ switch (map->map_type) {
+ case BPF_MAP_TYPE_DEVMAP:
+ __dev_map_flush(map);
+ break;
+ case BPF_MAP_TYPE_CPUMAP:
+ __cpu_map_flush(map);
+ break;
+ default:
+ break;
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(xdp_do_flush_map);
+
+static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
+{
+ switch (map->map_type) {
+ case BPF_MAP_TYPE_DEVMAP:
+ return __dev_map_lookup_elem(map, index);
+ case BPF_MAP_TYPE_CPUMAP:
+ return __cpu_map_lookup_elem(map, index);
+ default:
+ return NULL;
+ }
+}
+
+static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
+ unsigned long aux)
+{
+ return (unsigned long)xdp_prog->aux != aux;
+}
+
+static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ unsigned long map_owner = ri->map_owner;
+ struct bpf_map *map = ri->map;
+ u32 index = ri->ifindex;
+ void *fwd = NULL;
+ int err;
+
+ ri->ifindex = 0;
+ ri->map = NULL;
+ ri->map_owner = 0;
+
+ if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
+ err = -EFAULT;
+ map = NULL;
+ goto err;
+ }
+
+ fwd = __xdp_map_lookup_elem(map, index);
+ if (!fwd) {
+ err = -EINVAL;
+ goto err;
+ }
+ if (ri->map_to_flush && ri->map_to_flush != map)
+ xdp_do_flush_map();
+
+ err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
+ if (unlikely(err))
+ goto err;
+
+ ri->map_to_flush = map;
+ _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+ return 0;
+err:
+ _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+ return err;
+}
+
+int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct net_device *fwd;
+ u32 index = ri->ifindex;
+ int err;
+
+ if (ri->map)
+ return xdp_do_redirect_map(dev, xdp, xdp_prog);
+
+ fwd = dev_get_by_index_rcu(dev_net(dev), index);
+ ri->ifindex = 0;
+ if (unlikely(!fwd)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
+ if (unlikely(err))
+ goto err;
+
+ _trace_xdp_redirect(dev, xdp_prog, index);
+ return 0;
+err:
+ _trace_xdp_redirect_err(dev, xdp_prog, index, err);
+ return err;
+}
+EXPORT_SYMBOL_GPL(xdp_do_redirect);
+
+static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
+{
+ unsigned int len;
+
+ if (unlikely(!(fwd->flags & IFF_UP)))
+ return -ENETDOWN;
+
+ len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
+ if (skb->len > len)
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb,
+ struct bpf_prog *xdp_prog)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ unsigned long map_owner = ri->map_owner;
+ struct bpf_map *map = ri->map;
+ struct net_device *fwd = NULL;
+ u32 index = ri->ifindex;
+ int err = 0;
+
+ ri->ifindex = 0;
+ ri->map = NULL;
+ ri->map_owner = 0;
+
+ if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
+ err = -EFAULT;
+ map = NULL;
+ goto err;
+ }
+ fwd = __xdp_map_lookup_elem(map, index);
+ if (unlikely(!fwd)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+ if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+ goto err;
+ skb->dev = fwd;
+ } else {
+ /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
+ err = -EBADRQC;
+ goto err;
+ }
+
+ _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+ return 0;
+err:
+ _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+ return err;
+}
+
+int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
+ struct bpf_prog *xdp_prog)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ u32 index = ri->ifindex;
+ struct net_device *fwd;
+ int err = 0;
+
+ if (ri->map)
+ return xdp_do_generic_redirect_map(dev, skb, xdp_prog);
+
+ ri->ifindex = 0;
+ fwd = dev_get_by_index_rcu(dev_net(dev), index);
+ if (unlikely(!fwd)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+ goto err;
+
+ skb->dev = fwd;
+ _trace_xdp_redirect(dev, xdp_prog, index);
+ return 0;
+err:
+ _trace_xdp_redirect_err(dev, xdp_prog, index, err);
+ return err;
+}
+EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
+
+BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+ if (unlikely(flags))
+ return XDP_ABORTED;
+
+ ri->ifindex = ifindex;
+ ri->flags = flags;
+ ri->map = NULL;
+ ri->map_owner = 0;
+
+ return XDP_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_xdp_redirect_proto = {
+ .func = bpf_xdp_redirect,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
+ unsigned long, map_owner)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+ if (unlikely(flags))
+ return XDP_ABORTED;
+
+ ri->ifindex = ifindex;
+ ri->flags = flags;
+ ri->map = map;
+ ri->map_owner = map_owner;
+
+ return XDP_REDIRECT;
+}
+
+/* Note, arg4 is hidden from users and populated by the verifier
+ * with the right pointer.
+ */
+static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
+ .func = bpf_xdp_redirect_map,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
bool bpf_helper_changes_pkt_data(void *func)
{
if (func == bpf_skb_vlan_push ||
@@ -2425,7 +2820,8 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_clone_redirect ||
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
- func == bpf_xdp_adjust_head)
+ func == bpf_xdp_adjust_head ||
+ func == bpf_xdp_adjust_meta)
return true;
return false;
@@ -2676,14 +3072,15 @@ static const struct bpf_func_proto *
bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
{
if (!md_dst) {
- /* Race is not possible, since it's called from verifier
- * that is holding verifier mutex.
- */
- md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
- METADATA_IP_TUNNEL,
- GFP_KERNEL);
- if (!md_dst)
+ struct metadata_dst __percpu *tmp;
+
+ tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
+ METADATA_IP_TUNNEL,
+ GFP_KERNEL);
+ if (!tmp)
return NULL;
+ if (cmpxchg(&md_dst, NULL, tmp))
+ metadata_dst_free_percpu(tmp);
}
switch (which) {
@@ -2836,15 +3233,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
sk->sk_prot->setsockopt == tcp_setsockopt) {
if (optname == TCP_CONGESTION) {
char name[TCP_CA_NAME_MAX];
+ bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN;
strncpy(name, optval, min_t(long, optlen,
TCP_CA_NAME_MAX-1));
name[TCP_CA_NAME_MAX-1] = 0;
- ret = tcp_set_congestion_control(sk, name, false);
- if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
- /* replacing an existing ca */
- tcp_reinit_congestion_control(sk,
- inet_csk(sk)->icsk_ca_ops);
+ ret = tcp_set_congestion_control(sk, name, false, reinit);
} else {
struct tcp_sock *tp = tcp_sk(sk);
@@ -2872,7 +3266,6 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
ret = -EINVAL;
}
}
- ret = -EINVAL;
#endif
} else {
ret = -EINVAL;
@@ -2882,7 +3275,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
static const struct bpf_func_proto bpf_setsockopt_proto = {
.func = bpf_setsockopt,
- .gpl_only = true,
+ .gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
@@ -2891,6 +3284,47 @@ static const struct bpf_func_proto bpf_setsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
+ int, level, int, optname, char *, optval, int, optlen)
+{
+ struct sock *sk = bpf_sock->sk;
+
+ if (!sk_fullsock(sk))
+ goto err_clear;
+
+#ifdef CONFIG_INET
+ if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
+ if (optname == TCP_CONGESTION) {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (!icsk->icsk_ca_ops || optlen <= 1)
+ goto err_clear;
+ strncpy(optval, icsk->icsk_ca_ops->name, optlen);
+ optval[optlen - 1] = 0;
+ } else {
+ goto err_clear;
+ }
+ } else {
+ goto err_clear;
+ }
+ return 0;
+#endif
+err_clear:
+ memset(optval, 0, optlen);
+ return -EINVAL;
+}
+
+static const struct bpf_func_proto bpf_getsockopt_proto = {
+ .func = bpf_getsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -2920,6 +3354,20 @@ bpf_base_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
+sock_filter_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ /* inet and inet6 sockets are created in a process
+ * context so there is always a valid uid/gid
+ */
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -3011,6 +3459,12 @@ xdp_func_proto(enum bpf_func_id func_id)
return &bpf_get_smp_processor_id_proto;
case BPF_FUNC_xdp_adjust_head:
return &bpf_xdp_adjust_head_proto;
+ case BPF_FUNC_xdp_adjust_meta:
+ return &bpf_xdp_adjust_meta_proto;
+ case BPF_FUNC_redirect:
+ return &bpf_xdp_redirect_proto;
+ case BPF_FUNC_redirect_map:
+ return &bpf_xdp_redirect_map_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -3049,6 +3503,34 @@ static const struct bpf_func_proto *
switch (func_id) {
case BPF_FUNC_setsockopt:
return &bpf_setsockopt_proto;
+ case BPF_FUNC_getsockopt:
+ return &bpf_getsockopt_proto;
+ case BPF_FUNC_sock_map_update:
+ return &bpf_sock_map_update_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_skb_store_bytes:
+ return &bpf_skb_store_bytes_proto;
+ case BPF_FUNC_skb_load_bytes:
+ return &bpf_skb_load_bytes_proto;
+ case BPF_FUNC_skb_pull_data:
+ return &bpf_skb_pull_data_proto;
+ case BPF_FUNC_skb_change_tail:
+ return &bpf_skb_change_tail_proto;
+ case BPF_FUNC_skb_change_head:
+ return &bpf_skb_change_head_proto;
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_cookie_proto;
+ case BPF_FUNC_get_socket_uid:
+ return &bpf_get_socket_uid_proto;
+ case BPF_FUNC_sk_redirect_map:
+ return &bpf_sk_redirect_map_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -3106,7 +3588,12 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (off + size > offsetofend(struct __sk_buff, cb[4]))
return false;
break;
+ case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
+ case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
+ case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
+ case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
case bpf_ctx_range(struct __sk_buff, data):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
if (size != size_default)
return false;
@@ -3133,7 +3620,9 @@ static bool sk_filter_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
+ case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
@@ -3155,6 +3644,8 @@ static bool lwt_is_valid_access(int off, int size,
{
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
+ case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
return false;
}
@@ -3188,6 +3679,8 @@ static bool sock_filter_is_valid_access(int off, int size,
if (type == BPF_WRITE) {
switch (off) {
case offsetof(struct bpf_sock, bound_dev_if):
+ case offsetof(struct bpf_sock, mark):
+ case offsetof(struct bpf_sock, priority):
break;
default:
return false;
@@ -3205,8 +3698,8 @@ static bool sock_filter_is_valid_access(int off, int size,
return true;
}
-static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
- const struct bpf_prog *prog)
+static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
+ const struct bpf_prog *prog, int drop_verdict)
{
struct bpf_insn *insn = insn_buf;
@@ -3233,7 +3726,7 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
* return TC_ACT_SHOT;
*/
*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
- *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, TC_ACT_SHOT);
+ *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
*insn++ = BPF_EXIT_INSN();
/* restore: */
@@ -3244,6 +3737,12 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
return insn - insn_buf;
}
+static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
+ const struct bpf_prog *prog)
+{
+ return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
+}
+
static bool tc_cls_act_is_valid_access(int off, int size,
enum bpf_access_type type,
struct bpf_insn_access_aux *info)
@@ -3265,9 +3764,14 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data):
info->reg_type = PTR_TO_PACKET;
break;
+ case bpf_ctx_range(struct __sk_buff, data_meta):
+ info->reg_type = PTR_TO_PACKET_META;
+ break;
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
+ case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+ return false;
}
return bpf_skb_is_valid_access(off, size, type, info);
@@ -3296,6 +3800,9 @@ static bool xdp_is_valid_access(int off, int size,
case offsetof(struct xdp_md, data):
info->reg_type = PTR_TO_PACKET;
break;
+ case offsetof(struct xdp_md, data_meta):
+ info->reg_type = PTR_TO_PACKET_META;
+ break;
case offsetof(struct xdp_md, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
@@ -3306,7 +3813,11 @@ static bool xdp_is_valid_access(int off, int size,
void bpf_warn_invalid_xdp_action(u32 act)
{
- WARN_ONCE(1, "Illegal XDP return value %u, expect packet loss\n", act);
+ const u32 act_max = XDP_REDIRECT;
+
+ WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
+ act > act_max ? "Illegal" : "Driver unsupported",
+ act);
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
@@ -3340,6 +3851,46 @@ static bool sock_ops_is_valid_access(int off, int size,
return __is_valid_sock_ops_access(off, size);
}
+static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
+ const struct bpf_prog *prog)
+{
+ return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
+}
+
+static bool sk_skb_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
+{
+ switch (off) {
+ case bpf_ctx_range(struct __sk_buff, tc_classid):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
+ return false;
+ }
+
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case bpf_ctx_range(struct __sk_buff, tc_index):
+ case bpf_ctx_range(struct __sk_buff, priority):
+ break;
+ default:
+ return false;
+ }
+ }
+
+ switch (off) {
+ case bpf_ctx_range(struct __sk_buff, mark):
+ return false;
+ case bpf_ctx_range(struct __sk_buff, data):
+ info->reg_type = PTR_TO_PACKET;
+ break;
+ case bpf_ctx_range(struct __sk_buff, data_end):
+ info->reg_type = PTR_TO_PACKET_END;
+ break;
+ }
+
+ return bpf_skb_is_valid_access(off, size, type, info);
+}
+
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -3485,6 +4036,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
offsetof(struct sk_buff, data));
break;
+ case offsetof(struct __sk_buff, data_meta):
+ off = si->off;
+ off -= offsetof(struct __sk_buff, data_meta);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct bpf_skb_data_end, data_meta);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+ si->src_reg, off);
+ break;
+
case offsetof(struct __sk_buff, data_end):
off = si->off;
off -= offsetof(struct __sk_buff, data_end);
@@ -3525,6 +4085,106 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#endif
break;
+ case offsetof(struct __sk_buff, family):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, sk));
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct sock_common,
+ skc_family,
+ 2, target_size));
+ break;
+ case offsetof(struct __sk_buff, remote_ip4):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct sock_common,
+ skc_daddr,
+ 4, target_size));
+ break;
+ case offsetof(struct __sk_buff, local_ip4):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ skc_rcv_saddr) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct sock_common,
+ skc_rcv_saddr,
+ 4, target_size));
+ break;
+ case offsetof(struct __sk_buff, remote_ip6[0]) ...
+ offsetof(struct __sk_buff, remote_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ skc_v6_daddr.s6_addr32[0]) != 4);
+
+ off = si->off;
+ off -= offsetof(struct __sk_buff, remote_ip6[0]);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common,
+ skc_v6_daddr.s6_addr32[0]) +
+ off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+ case offsetof(struct __sk_buff, local_ip6[0]) ...
+ offsetof(struct __sk_buff, local_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ skc_v6_rcv_saddr.s6_addr32[0]) != 4);
+
+ off = si->off;
+ off -= offsetof(struct __sk_buff, local_ip6[0]);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common,
+ skc_v6_rcv_saddr.s6_addr32[0]) +
+ off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+
+ case offsetof(struct __sk_buff, remote_port):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, sk));
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct sock_common,
+ skc_dport,
+ 2, target_size));
+#ifndef __BIG_ENDIAN_BITFIELD
+ *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
+#endif
+ break;
+
+ case offsetof(struct __sk_buff, local_port):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, sk));
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct sock_common,
+ skc_num, 2, target_size));
+ break;
}
return insn - insn_buf;
@@ -3549,6 +4209,28 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
offsetof(struct sock, sk_bound_dev_if));
break;
+ case offsetof(struct bpf_sock, mark):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ offsetof(struct sock, sk_mark));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ offsetof(struct sock, sk_mark));
+ break;
+
+ case offsetof(struct bpf_sock, priority):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ offsetof(struct sock, sk_priority));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ offsetof(struct sock, sk_priority));
+ break;
+
case offsetof(struct bpf_sock, family):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
@@ -3611,6 +4293,11 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, data));
break;
+ case offsetof(struct xdp_md, data_meta):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
+ si->dst_reg, si->src_reg,
+ offsetof(struct xdp_buff, data_meta));
+ break;
case offsetof(struct xdp_md, data_end):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
si->dst_reg, si->src_reg,
@@ -3754,61 +4441,120 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
-const struct bpf_verifier_ops sk_filter_prog_ops = {
+static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+ int off;
+
+ switch (si->off) {
+ case offsetof(struct __sk_buff, data_end):
+ off = si->off;
+ off -= offsetof(struct __sk_buff, data_end);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct tcp_skb_cb, bpf.data_end);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+ si->src_reg, off);
+ break;
+ default:
+ return bpf_convert_ctx_access(type, si, insn_buf, prog,
+ target_size);
+ }
+
+ return insn - insn_buf;
+}
+
+const struct bpf_verifier_ops sk_filter_verifier_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
-const struct bpf_verifier_ops tc_cls_act_prog_ops = {
+const struct bpf_prog_ops sk_filter_prog_ops = {
+};
+
+const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
.get_func_proto = tc_cls_act_func_proto,
.is_valid_access = tc_cls_act_is_valid_access,
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+};
+
+const struct bpf_prog_ops tc_cls_act_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops xdp_prog_ops = {
+const struct bpf_verifier_ops xdp_verifier_ops = {
.get_func_proto = xdp_func_proto,
.is_valid_access = xdp_is_valid_access,
.convert_ctx_access = xdp_convert_ctx_access,
+};
+
+const struct bpf_prog_ops xdp_prog_ops = {
.test_run = bpf_prog_test_run_xdp,
};
-const struct bpf_verifier_ops cg_skb_prog_ops = {
+const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops cg_skb_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops lwt_inout_prog_ops = {
+const struct bpf_verifier_ops lwt_inout_verifier_ops = {
.get_func_proto = lwt_inout_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops lwt_inout_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops lwt_xmit_prog_ops = {
+const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
.get_func_proto = lwt_xmit_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+};
+
+const struct bpf_prog_ops lwt_xmit_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops cg_sock_prog_ops = {
- .get_func_proto = bpf_base_func_proto,
+const struct bpf_verifier_ops cg_sock_verifier_ops = {
+ .get_func_proto = sock_filter_func_proto,
.is_valid_access = sock_filter_is_valid_access,
.convert_ctx_access = sock_filter_convert_ctx_access,
};
-const struct bpf_verifier_ops sock_ops_prog_ops = {
+const struct bpf_prog_ops cg_sock_prog_ops = {
+};
+
+const struct bpf_verifier_ops sock_ops_verifier_ops = {
.get_func_proto = sock_ops_func_proto,
.is_valid_access = sock_ops_is_valid_access,
.convert_ctx_access = sock_ops_convert_ctx_access,
};
+const struct bpf_prog_ops sock_ops_prog_ops = {
+};
+
+const struct bpf_verifier_ops sk_skb_verifier_ops = {
+ .get_func_proto = sk_skb_func_proto,
+ .is_valid_access = sk_skb_is_valid_access,
+ .convert_ctx_access = sk_skb_convert_ctx_access,
+ .gen_prologue = sk_skb_prologue,
+};
+
+const struct bpf_prog_ops sk_skb_prog_ops = {
+};
+
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/flow.c b/net/core/flow.c
deleted file mode 100644
index f7f5d1932a27..000000000000
--- a/net/core/flow.c
+++ /dev/null
@@ -1,516 +0,0 @@
-/* flow.c: Generic flow cache.
- *
- * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
- * Copyright (C) 2003 David S. Miller (davem@redhat.com)
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/jhash.h>
-#include <linux/interrupt.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/completion.h>
-#include <linux/percpu.h>
-#include <linux/bitops.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/mutex.h>
-#include <net/flow.h>
-#include <linux/atomic.h>
-#include <linux/security.h>
-#include <net/net_namespace.h>
-
-struct flow_cache_entry {
- union {
- struct hlist_node hlist;
- struct list_head gc_list;
- } u;
- struct net *net;
- u16 family;
- u8 dir;
- u32 genid;
- struct flowi key;
- struct flow_cache_object *object;
-};
-
-struct flow_flush_info {
- struct flow_cache *cache;
- atomic_t cpuleft;
- struct completion completion;
-};
-
-static struct kmem_cache *flow_cachep __read_mostly;
-
-#define flow_cache_hash_size(cache) (1U << (cache)->hash_shift)
-#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
-
-static void flow_cache_new_hashrnd(unsigned long arg)
-{
- struct flow_cache *fc = (void *) arg;
- int i;
-
- for_each_possible_cpu(i)
- per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
-
- fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
- add_timer(&fc->rnd_timer);
-}
-
-static int flow_entry_valid(struct flow_cache_entry *fle,
- struct netns_xfrm *xfrm)
-{
- if (atomic_read(&xfrm->flow_cache_genid) != fle->genid)
- return 0;
- if (fle->object && !fle->object->ops->check(fle->object))
- return 0;
- return 1;
-}
-
-static void flow_entry_kill(struct flow_cache_entry *fle,
- struct netns_xfrm *xfrm)
-{
- if (fle->object)
- fle->object->ops->delete(fle->object);
- kmem_cache_free(flow_cachep, fle);
-}
-
-static void flow_cache_gc_task(struct work_struct *work)
-{
- struct list_head gc_list;
- struct flow_cache_entry *fce, *n;
- struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
- flow_cache_gc_work);
-
- INIT_LIST_HEAD(&gc_list);
- spin_lock_bh(&xfrm->flow_cache_gc_lock);
- list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
- spin_unlock_bh(&xfrm->flow_cache_gc_lock);
-
- list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) {
- flow_entry_kill(fce, xfrm);
- atomic_dec(&xfrm->flow_cache_gc_count);
- }
-}
-
-static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
- unsigned int deleted,
- struct list_head *gc_list,
- struct netns_xfrm *xfrm)
-{
- if (deleted) {
- atomic_add(deleted, &xfrm->flow_cache_gc_count);
- fcp->hash_count -= deleted;
- spin_lock_bh(&xfrm->flow_cache_gc_lock);
- list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
- spin_unlock_bh(&xfrm->flow_cache_gc_lock);
- schedule_work(&xfrm->flow_cache_gc_work);
- }
-}
-
-static void __flow_cache_shrink(struct flow_cache *fc,
- struct flow_cache_percpu *fcp,
- unsigned int shrink_to)
-{
- struct flow_cache_entry *fle;
- struct hlist_node *tmp;
- LIST_HEAD(gc_list);
- unsigned int deleted = 0;
- struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
- flow_cache_global);
- unsigned int i;
-
- for (i = 0; i < flow_cache_hash_size(fc); i++) {
- unsigned int saved = 0;
-
- hlist_for_each_entry_safe(fle, tmp,
- &fcp->hash_table[i], u.hlist) {
- if (saved < shrink_to &&
- flow_entry_valid(fle, xfrm)) {
- saved++;
- } else {
- deleted++;
- hlist_del(&fle->u.hlist);
- list_add_tail(&fle->u.gc_list, &gc_list);
- }
- }
- }
-
- flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
-}
-
-static void flow_cache_shrink(struct flow_cache *fc,
- struct flow_cache_percpu *fcp)
-{
- unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
-
- __flow_cache_shrink(fc, fcp, shrink_to);
-}
-
-static void flow_new_hash_rnd(struct flow_cache *fc,
- struct flow_cache_percpu *fcp)
-{
- get_random_bytes(&fcp->hash_rnd, sizeof(u32));
- fcp->hash_rnd_recalc = 0;
- __flow_cache_shrink(fc, fcp, 0);
-}
-
-static u32 flow_hash_code(struct flow_cache *fc,
- struct flow_cache_percpu *fcp,
- const struct flowi *key,
- unsigned int keysize)
-{
- const u32 *k = (const u32 *) key;
- const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
-
- return jhash2(k, length, fcp->hash_rnd)
- & (flow_cache_hash_size(fc) - 1);
-}
-
-/* I hear what you're saying, use memcmp. But memcmp cannot make
- * important assumptions that we can here, such as alignment.
- */
-static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
- unsigned int keysize)
-{
- const flow_compare_t *k1, *k1_lim, *k2;
-
- k1 = (const flow_compare_t *) key1;
- k1_lim = k1 + keysize;
-
- k2 = (const flow_compare_t *) key2;
-
- do {
- if (*k1++ != *k2++)
- return 1;
- } while (k1 < k1_lim);
-
- return 0;
-}
-
-struct flow_cache_object *
-flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
- flow_resolve_t resolver, void *ctx)
-{
- struct flow_cache *fc = &net->xfrm.flow_cache_global;
- struct flow_cache_percpu *fcp;
- struct flow_cache_entry *fle, *tfle;
- struct flow_cache_object *flo;
- unsigned int keysize;
- unsigned int hash;
-
- local_bh_disable();
- fcp = this_cpu_ptr(fc->percpu);
-
- fle = NULL;
- flo = NULL;
-
- keysize = flow_key_size(family);
- if (!keysize)
- goto nocache;
-
- /* Packet really early in init? Making flow_cache_init a
- * pre-smp initcall would solve this. --RR */
- if (!fcp->hash_table)
- goto nocache;
-
- if (fcp->hash_rnd_recalc)
- flow_new_hash_rnd(fc, fcp);
-
- hash = flow_hash_code(fc, fcp, key, keysize);
- hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) {
- if (tfle->net == net &&
- tfle->family == family &&
- tfle->dir == dir &&
- flow_key_compare(key, &tfle->key, keysize) == 0) {
- fle = tfle;
- break;
- }
- }
-
- if (unlikely(!fle)) {
- if (fcp->hash_count > fc->high_watermark)
- flow_cache_shrink(fc, fcp);
-
- if (atomic_read(&net->xfrm.flow_cache_gc_count) >
- 2 * num_online_cpus() * fc->high_watermark) {
- flo = ERR_PTR(-ENOBUFS);
- goto ret_object;
- }
-
- fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
- if (fle) {
- fle->net = net;
- fle->family = family;
- fle->dir = dir;
- memcpy(&fle->key, key, keysize * sizeof(flow_compare_t));
- fle->object = NULL;
- hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
- fcp->hash_count++;
- }
- } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) {
- flo = fle->object;
- if (!flo)
- goto ret_object;
- flo = flo->ops->get(flo);
- if (flo)
- goto ret_object;
- } else if (fle->object) {
- flo = fle->object;
- flo->ops->delete(flo);
- fle->object = NULL;
- }
-
-nocache:
- flo = NULL;
- if (fle) {
- flo = fle->object;
- fle->object = NULL;
- }
- flo = resolver(net, key, family, dir, flo, ctx);
- if (fle) {
- fle->genid = atomic_read(&net->xfrm.flow_cache_genid);
- if (!IS_ERR(flo))
- fle->object = flo;
- else
- fle->genid--;
- } else {
- if (!IS_ERR_OR_NULL(flo))
- flo->ops->delete(flo);
- }
-ret_object:
- local_bh_enable();
- return flo;
-}
-EXPORT_SYMBOL(flow_cache_lookup);
-
-static void flow_cache_flush_tasklet(unsigned long data)
-{
- struct flow_flush_info *info = (void *)data;
- struct flow_cache *fc = info->cache;
- struct flow_cache_percpu *fcp;
- struct flow_cache_entry *fle;
- struct hlist_node *tmp;
- LIST_HEAD(gc_list);
- unsigned int deleted = 0;
- struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
- flow_cache_global);
- unsigned int i;
-
- fcp = this_cpu_ptr(fc->percpu);
- for (i = 0; i < flow_cache_hash_size(fc); i++) {
- hlist_for_each_entry_safe(fle, tmp,
- &fcp->hash_table[i], u.hlist) {
- if (flow_entry_valid(fle, xfrm))
- continue;
-
- deleted++;
- hlist_del(&fle->u.hlist);
- list_add_tail(&fle->u.gc_list, &gc_list);
- }
- }
-
- flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
-
- if (atomic_dec_and_test(&info->cpuleft))
- complete(&info->completion);
-}
-
-/*
- * Return whether a cpu needs flushing. Conservatively, we assume
- * the presence of any entries means the core may require flushing,
- * since the flow_cache_ops.check() function may assume it's running
- * on the same core as the per-cpu cache component.
- */
-static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
-{
- struct flow_cache_percpu *fcp;
- unsigned int i;
-
- fcp = per_cpu_ptr(fc->percpu, cpu);
- for (i = 0; i < flow_cache_hash_size(fc); i++)
- if (!hlist_empty(&fcp->hash_table[i]))
- return 0;
- return 1;
-}
-
-static void flow_cache_flush_per_cpu(void *data)
-{
- struct flow_flush_info *info = data;
- struct tasklet_struct *tasklet;
-
- tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet;
- tasklet->data = (unsigned long)info;
- tasklet_schedule(tasklet);
-}
-
-void flow_cache_flush(struct net *net)
-{
- struct flow_flush_info info;
- cpumask_var_t mask;
- int i, self;
-
- /* Track which cpus need flushing to avoid disturbing all cores. */
- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
- return;
- cpumask_clear(mask);
-
- /* Don't want cpus going down or up during this. */
- get_online_cpus();
- mutex_lock(&net->xfrm.flow_flush_sem);
- info.cache = &net->xfrm.flow_cache_global;
- for_each_online_cpu(i)
- if (!flow_cache_percpu_empty(info.cache, i))
- cpumask_set_cpu(i, mask);
- atomic_set(&info.cpuleft, cpumask_weight(mask));
- if (atomic_read(&info.cpuleft) == 0)
- goto done;
-
- init_completion(&info.completion);
-
- local_bh_disable();
- self = cpumask_test_and_clear_cpu(smp_processor_id(), mask);
- on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0);
- if (self)
- flow_cache_flush_tasklet((unsigned long)&info);
- local_bh_enable();
-
- wait_for_completion(&info.completion);
-
-done:
- mutex_unlock(&net->xfrm.flow_flush_sem);
- put_online_cpus();
- free_cpumask_var(mask);
-}
-
-static void flow_cache_flush_task(struct work_struct *work)
-{
- struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
- flow_cache_flush_work);
- struct net *net = container_of(xfrm, struct net, xfrm);
-
- flow_cache_flush(net);
-}
-
-void flow_cache_flush_deferred(struct net *net)
-{
- schedule_work(&net->xfrm.flow_cache_flush_work);
-}
-
-static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
-{
- struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
- unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
-
- if (!fcp->hash_table) {
- fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
- if (!fcp->hash_table) {
- pr_err("NET: failed to allocate flow cache sz %u\n", sz);
- return -ENOMEM;
- }
- fcp->hash_rnd_recalc = 1;
- fcp->hash_count = 0;
- tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
- }
- return 0;
-}
-
-static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node)
-{
- struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
-
- return flow_cache_cpu_prepare(fc, cpu);
-}
-
-static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node)
-{
- struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
- struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
-
- __flow_cache_shrink(fc, fcp, 0);
- return 0;
-}
-
-int flow_cache_init(struct net *net)
-{
- int i;
- struct flow_cache *fc = &net->xfrm.flow_cache_global;
-
- if (!flow_cachep)
- flow_cachep = kmem_cache_create("flow_cache",
- sizeof(struct flow_cache_entry),
- 0, SLAB_PANIC, NULL);
- spin_lock_init(&net->xfrm.flow_cache_gc_lock);
- INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list);
- INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
- INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
- mutex_init(&net->xfrm.flow_flush_sem);
- atomic_set(&net->xfrm.flow_cache_gc_count, 0);
-
- fc->hash_shift = 10;
- fc->low_watermark = 2 * flow_cache_hash_size(fc);
- fc->high_watermark = 4 * flow_cache_hash_size(fc);
-
- fc->percpu = alloc_percpu(struct flow_cache_percpu);
- if (!fc->percpu)
- return -ENOMEM;
-
- if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node))
- goto err;
-
- setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
- (unsigned long) fc);
- fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
- add_timer(&fc->rnd_timer);
-
- return 0;
-
-err:
- for_each_possible_cpu(i) {
- struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
- kfree(fcp->hash_table);
- fcp->hash_table = NULL;
- }
-
- free_percpu(fc->percpu);
- fc->percpu = NULL;
-
- return -ENOMEM;
-}
-EXPORT_SYMBOL(flow_cache_init);
-
-void flow_cache_fini(struct net *net)
-{
- int i;
- struct flow_cache *fc = &net->xfrm.flow_cache_global;
-
- del_timer_sync(&fc->rnd_timer);
-
- cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node);
-
- for_each_possible_cpu(i) {
- struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
- kfree(fcp->hash_table);
- fcp->hash_table = NULL;
- }
-
- free_percpu(fc->percpu);
- fc->percpu = NULL;
-}
-EXPORT_SYMBOL(flow_cache_fini);
-
-void __init flow_cache_hp_init(void)
-{
- int ret;
-
- ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE,
- "net/flow:prepare",
- flow_cache_cpu_up_prep,
- flow_cache_cpu_dead);
- WARN_ON(ret < 0);
-}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index fc5fc4594c90..15ce30063765 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -4,10 +4,13 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/if_vlan.h>
+#include <net/dsa.h>
+#include <net/dst_metadata.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/gre.h>
#include <net/pptp.h>
+#include <net/tipc.h>
#include <linux/igmp.h>
#include <linux/icmp.h>
#include <linux/sctp.h>
@@ -114,11 +117,101 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
}
EXPORT_SYMBOL(__skb_flow_get_ports);
-enum flow_dissect_ret {
- FLOW_DISSECT_RET_OUT_GOOD,
- FLOW_DISSECT_RET_OUT_BAD,
- FLOW_DISSECT_RET_OUT_PROTO_AGAIN,
-};
+static void
+skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ struct flow_dissector_key_control *ctrl;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL))
+ return;
+
+ ctrl = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ target_container);
+ ctrl->addr_type = type;
+}
+
+static void
+__skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ struct ip_tunnel_info *info;
+ struct ip_tunnel_key *key;
+
+ /* A quick check to see if there might be something to do. */
+ if (!dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS))
+ return;
+
+ info = skb_tunnel_info(skb);
+ if (!info)
+ return;
+
+ key = &info->key;
+
+ switch (ip_tunnel_info_af(info)) {
+ case AF_INET:
+ skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ flow_dissector,
+ target_container);
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+ struct flow_dissector_key_ipv4_addrs *ipv4;
+
+ ipv4 = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+ target_container);
+ ipv4->src = key->u.ipv4.src;
+ ipv4->dst = key->u.ipv4.dst;
+ }
+ break;
+ case AF_INET6:
+ skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ flow_dissector,
+ target_container);
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
+ struct flow_dissector_key_ipv6_addrs *ipv6;
+
+ ipv6 = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+ target_container);
+ ipv6->src = key->u.ipv6.src;
+ ipv6->dst = key->u.ipv6.dst;
+ }
+ break;
+ }
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_dissector_key_keyid *keyid;
+
+ keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ target_container);
+ keyid->keyid = tunnel_id_to_key32(key->tun_id);
+ }
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+ struct flow_dissector_key_ports *tp;
+
+ tp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ target_container);
+ tp->src = key->tp_src;
+ tp->dst = key->tp_dst;
+ }
+}
static enum flow_dissect_ret
__skb_flow_dissect_mpls(const struct sk_buff *skb,
@@ -340,7 +433,7 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
return FLOW_DISSECT_RET_OUT_GOOD;
- return FLOW_DISSECT_RET_OUT_PROTO_AGAIN;
+ return FLOW_DISSECT_RET_PROTO_AGAIN;
}
static void
@@ -401,6 +494,18 @@ __skb_flow_dissect_ipv6(const struct sk_buff *skb,
key_ip->ttl = iph->hop_limit;
}
+/* Maximum number of protocol headers that can be parsed in
+ * __skb_flow_dissect
+ */
+#define MAX_FLOW_DISSECT_HDRS 15
+
+static bool skb_flow_dissect_allowed(int *num_hdrs)
+{
+ ++*num_hdrs;
+
+ return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS);
+}
+
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -430,7 +535,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
+ enum flow_dissect_ret fdret;
bool skip_vlan = false;
+ int num_hdrs = 0;
u8 ip_proto = 0;
bool ret;
@@ -440,6 +547,19 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
skb->vlan_proto : skb->protocol;
nhoff = skb_network_offset(skb);
hlen = skb_headlen(skb);
+#if IS_ENABLED(CONFIG_NET_DSA)
+ if (unlikely(skb->dev && netdev_uses_dsa(skb->dev))) {
+ const struct dsa_device_ops *ops;
+ int offset;
+
+ ops = skb->dev->dsa_ptr->tag_ops;
+ if (ops->flow_dissect &&
+ !ops->flow_dissect(skb, &proto, &offset)) {
+ hlen -= offset;
+ nhoff += offset;
+ }
+ }
+#endif
}
/* It is ensured by skb_flow_dissector_init() that control key will
@@ -456,6 +576,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
FLOW_DISSECTOR_KEY_BASIC,
target_container);
+ __skb_flow_dissect_tunnel_info(skb, flow_dissector,
+ target_container);
+
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
struct ethhdr *eth = eth_hdr(skb);
@@ -468,14 +591,19 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
}
proto_again:
+ fdret = FLOW_DISSECT_RET_CONTINUE;
+
switch (proto) {
case htons(ETH_P_IP): {
const struct iphdr *iph;
struct iphdr _iph;
-ip:
+
iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
- if (!iph || iph->ihl < 5)
- goto out_bad;
+ if (!iph || iph->ihl < 5) {
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
+
nhoff += iph->ihl * 4;
ip_proto = iph->protocol;
@@ -495,19 +623,25 @@ ip:
key_control->flags |= FLOW_DIS_IS_FRAGMENT;
if (iph->frag_off & htons(IP_OFFSET)) {
- goto out_good;
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
} else {
key_control->flags |= FLOW_DIS_FIRST_FRAG;
- if (!(flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
- goto out_good;
+ if (!(flags &
+ FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) {
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
+ }
}
}
__skb_flow_dissect_ipv4(skb, flow_dissector,
target_container, data, iph);
- if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
- goto out_good;
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) {
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
+ }
break;
}
@@ -515,10 +649,11 @@ ip:
const struct ipv6hdr *iph;
struct ipv6hdr _iph;
-ipv6:
iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
- if (!iph)
- goto out_bad;
+ if (!iph) {
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
ip_proto = iph->nexthdr;
nhoff += sizeof(struct ipv6hdr);
@@ -547,15 +682,17 @@ ipv6:
target_container);
key_tags->flow_label = ntohl(flow_label);
}
- if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
- goto out_good;
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) {
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
+ }
}
__skb_flow_dissect_ipv6(skb, flow_dissector,
target_container, data, iph);
if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
- goto out_good;
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
break;
}
@@ -571,12 +708,17 @@ ipv6:
if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
data, hlen, &_vlan);
- if (!vlan)
- goto out_bad;
+ if (!vlan) {
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
+
proto = vlan->h_vlan_encapsulated_proto;
nhoff += sizeof(*vlan);
- if (skip_vlan)
- goto proto_again;
+ if (skip_vlan) {
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+ break;
+ }
}
skip_vlan = true;
@@ -599,7 +741,8 @@ ipv6:
}
}
- goto proto_again;
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+ break;
}
case htons(ETH_P_PPP_SES): {
struct {
@@ -607,86 +750,104 @@ ipv6:
__be16 proto;
} *hdr, _hdr;
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
- if (!hdr)
- goto out_bad;
+ if (!hdr) {
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
+
proto = hdr->proto;
nhoff += PPPOE_SES_HLEN;
switch (proto) {
case htons(PPP_IP):
- goto ip;
+ proto = htons(ETH_P_IP);
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+ break;
case htons(PPP_IPV6):
- goto ipv6;
+ proto = htons(ETH_P_IPV6);
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+ break;
default:
- goto out_bad;
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
}
+ break;
}
case htons(ETH_P_TIPC): {
- struct {
- __be32 pre[3];
- __be32 srcnode;
- } *hdr, _hdr;
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
- if (!hdr)
- goto out_bad;
+ struct tipc_basic_hdr *hdr, _hdr;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr),
+ data, hlen, &_hdr);
+ if (!hdr) {
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
+ FLOW_DISSECTOR_KEY_TIPC)) {
key_addrs = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_TIPC_ADDRS,
+ FLOW_DISSECTOR_KEY_TIPC,
target_container);
- key_addrs->tipcaddrs.srcnode = hdr->srcnode;
- key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
+ key_addrs->tipckey.key = tipc_hdr_rps_key(hdr);
+ key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC;
}
- goto out_good;
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
}
case htons(ETH_P_MPLS_UC):
case htons(ETH_P_MPLS_MC):
-mpls:
- switch (__skb_flow_dissect_mpls(skb, flow_dissector,
+ fdret = __skb_flow_dissect_mpls(skb, flow_dissector,
target_container, data,
- nhoff, hlen)) {
- case FLOW_DISSECT_RET_OUT_GOOD:
- goto out_good;
- case FLOW_DISSECT_RET_OUT_BAD:
- default:
- goto out_bad;
- }
+ nhoff, hlen);
+ break;
case htons(ETH_P_FCOE):
- if ((hlen - nhoff) < FCOE_HEADER_LEN)
- goto out_bad;
+ if ((hlen - nhoff) < FCOE_HEADER_LEN) {
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
nhoff += FCOE_HEADER_LEN;
- goto out_good;
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
case htons(ETH_P_ARP):
case htons(ETH_P_RARP):
- switch (__skb_flow_dissect_arp(skb, flow_dissector,
+ fdret = __skb_flow_dissect_arp(skb, flow_dissector,
target_container, data,
- nhoff, hlen)) {
- case FLOW_DISSECT_RET_OUT_GOOD:
- goto out_good;
- case FLOW_DISSECT_RET_OUT_BAD:
- default:
- goto out_bad;
- }
+ nhoff, hlen);
+ break;
+
+ default:
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
+
+ /* Process result of proto processing */
+ switch (fdret) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
+ goto out_good;
+ case FLOW_DISSECT_RET_PROTO_AGAIN:
+ if (skb_flow_dissect_allowed(&num_hdrs))
+ goto proto_again;
+ goto out_good;
+ case FLOW_DISSECT_RET_CONTINUE:
+ case FLOW_DISSECT_RET_IPPROTO_AGAIN:
+ break;
+ case FLOW_DISSECT_RET_OUT_BAD:
default:
goto out_bad;
}
ip_proto_again:
+ fdret = FLOW_DISSECT_RET_CONTINUE;
+
switch (ip_proto) {
case IPPROTO_GRE:
- switch (__skb_flow_dissect_gre(skb, key_control, flow_dissector,
+ fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector,
target_container, data,
- &proto, &nhoff, &hlen, flags)) {
- case FLOW_DISSECT_RET_OUT_GOOD:
- goto out_good;
- case FLOW_DISSECT_RET_OUT_BAD:
- goto out_bad;
- case FLOW_DISSECT_RET_OUT_PROTO_AGAIN:
- goto proto_again;
- }
+ &proto, &nhoff, &hlen, flags);
+ break;
+
case NEXTHDR_HOP:
case NEXTHDR_ROUTING:
case NEXTHDR_DEST: {
@@ -697,13 +858,16 @@ ip_proto_again:
opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr),
data, hlen, &_opthdr);
- if (!opthdr)
- goto out_bad;
+ if (!opthdr) {
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
ip_proto = opthdr[0];
nhoff += (opthdr[1] + 1) << 3;
- goto ip_proto_again;
+ fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN;
+ break;
}
case NEXTHDR_FRAGMENT: {
struct frag_hdr _fh, *fh;
@@ -714,8 +878,10 @@ ip_proto_again:
fh = __skb_header_pointer(skb, nhoff, sizeof(_fh),
data, hlen, &_fh);
- if (!fh)
- goto out_bad;
+ if (!fh) {
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
key_control->flags |= FLOW_DIS_IS_FRAGMENT;
@@ -724,34 +890,50 @@ ip_proto_again:
if (!(fh->frag_off & htons(IP6_OFFSET))) {
key_control->flags |= FLOW_DIS_FIRST_FRAG;
- if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)
- goto ip_proto_again;
+ if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) {
+ fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN;
+ break;
+ }
}
- goto out_good;
+
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
}
case IPPROTO_IPIP:
proto = htons(ETH_P_IP);
key_control->flags |= FLOW_DIS_ENCAPSULATION;
- if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
- goto out_good;
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) {
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
+ }
+
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+ break;
- goto ip;
case IPPROTO_IPV6:
proto = htons(ETH_P_IPV6);
key_control->flags |= FLOW_DIS_ENCAPSULATION;
- if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
- goto out_good;
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) {
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
+ }
+
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+ break;
+
- goto ipv6;
case IPPROTO_MPLS:
proto = htons(ETH_P_MPLS_UC);
- goto mpls;
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+ break;
+
case IPPROTO_TCP:
__skb_flow_dissect_tcp(skb, flow_dissector, target_container,
data, nhoff, hlen);
break;
+
default:
break;
}
@@ -773,6 +955,24 @@ ip_proto_again:
key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
}
+ /* Process result of IP proto processing */
+ switch (fdret) {
+ case FLOW_DISSECT_RET_PROTO_AGAIN:
+ if (skb_flow_dissect_allowed(&num_hdrs))
+ goto proto_again;
+ break;
+ case FLOW_DISSECT_RET_IPPROTO_AGAIN:
+ if (skb_flow_dissect_allowed(&num_hdrs))
+ goto ip_proto_again;
+ break;
+ case FLOW_DISSECT_RET_OUT_GOOD:
+ case FLOW_DISSECT_RET_CONTINUE:
+ break;
+ case FLOW_DISSECT_RET_OUT_BAD:
+ default:
+ goto out_bad;
+ }
+
out_good:
ret = true;
@@ -824,8 +1024,8 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
diff -= sizeof(flow->addrs.v6addrs);
break;
- case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
- diff -= sizeof(flow->addrs.tipcaddrs);
+ case FLOW_DISSECTOR_KEY_TIPC:
+ diff -= sizeof(flow->addrs.tipckey);
break;
}
return (sizeof(*flow) - diff) / sizeof(u32);
@@ -839,8 +1039,8 @@ __be32 flow_get_u32_src(const struct flow_keys *flow)
case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
return (__force __be32)ipv6_addr_hash(
&flow->addrs.v6addrs.src);
- case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
- return flow->addrs.tipcaddrs.srcnode;
+ case FLOW_DISSECTOR_KEY_TIPC:
+ return flow->addrs.tipckey.key;
default:
return 0;
}
@@ -998,51 +1198,6 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
}
EXPORT_SYMBOL(skb_get_hash_perturb);
-__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
-{
- struct flow_keys keys;
-
- memset(&keys, 0, sizeof(keys));
-
- memcpy(&keys.addrs.v6addrs.src, &fl6->saddr,
- sizeof(keys.addrs.v6addrs.src));
- memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr,
- sizeof(keys.addrs.v6addrs.dst));
- keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
- keys.ports.src = fl6->fl6_sport;
- keys.ports.dst = fl6->fl6_dport;
- keys.keyid.keyid = fl6->fl6_gre_key;
- keys.tags.flow_label = (__force u32)fl6->flowlabel;
- keys.basic.ip_proto = fl6->flowi6_proto;
-
- __skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
- flow_keys_have_l4(&keys));
-
- return skb->hash;
-}
-EXPORT_SYMBOL(__skb_get_hash_flowi6);
-
-__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
-{
- struct flow_keys keys;
-
- memset(&keys, 0, sizeof(keys));
-
- keys.addrs.v4addrs.src = fl4->saddr;
- keys.addrs.v4addrs.dst = fl4->daddr;
- keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- keys.ports.src = fl4->fl4_sport;
- keys.ports.dst = fl4->fl4_dport;
- keys.keyid.keyid = fl4->fl4_gre_key;
- keys.basic.ip_proto = fl4->flowi4_proto;
-
- __skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
- flow_keys_have_l4(&keys));
-
- return skb->hash;
-}
-EXPORT_SYMBOL(__skb_get_hash_flowi4);
-
u32 __skb_get_poff(const struct sk_buff *skb, void *data,
const struct flow_keys *keys, int hlen)
{
@@ -1166,8 +1321,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
.offset = offsetof(struct flow_keys, addrs.v6addrs),
},
{
- .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS,
- .offset = offsetof(struct flow_keys, addrs.tipcaddrs),
+ .key_id = FLOW_DISSECTOR_KEY_TIPC,
+ .offset = offsetof(struct flow_keys, addrs.tipckey),
},
{
.key_id = FLOW_DISSECTOR_KEY_PORTS,
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 0385dece1f6f..7c1ffd6f9501 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -83,10 +83,10 @@ static void est_timer(unsigned long arg)
u64 rate, brate;
est_fetch_counters(est, &b);
- brate = (b.bytes - est->last_bytes) << (8 - est->ewma_log);
+ brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log);
brate -= (est->avbps >> est->ewma_log);
- rate = (u64)(b.packets - est->last_packets) << (8 - est->ewma_log);
+ rate = (u64)(b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log);
rate -= (est->avpps >> est->ewma_log);
write_seqcount_begin(&est->seq);
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index 814e58a3ce8b..4b54e5f107c6 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 1307731ddfe4..e7e626fb87bb 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -51,7 +51,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
*/
preempt_disable();
rcu_read_lock();
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
ret = bpf_prog_run_save_cb(lwt->prog, skb);
rcu_read_unlock();
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index d9cb3532f1dd..0b171756453c 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -44,6 +44,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
return "SEG6";
case LWTUNNEL_ENCAP_BPF:
return "BPF";
+ case LWTUNNEL_ENCAP_SEG6_LOCAL:
+ return "SEG6LOCAL";
case LWTUNNEL_ENCAP_IP6:
case LWTUNNEL_ENCAP_IP:
case LWTUNNEL_ENCAP_NONE:
@@ -65,7 +67,7 @@ struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
return lws;
}
-EXPORT_SYMBOL(lwtunnel_state_alloc);
+EXPORT_SYMBOL_GPL(lwtunnel_state_alloc);
static const struct lwtunnel_encap_ops __rcu *
lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly;
@@ -80,7 +82,7 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
&lwtun_encaps[num],
NULL, ops) ? 0 : -1;
}
-EXPORT_SYMBOL(lwtunnel_encap_add_ops);
+EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops);
int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
unsigned int encap_type)
@@ -99,7 +101,7 @@ int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
return ret;
}
-EXPORT_SYMBOL(lwtunnel_encap_del_ops);
+EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops);
int lwtunnel_build_state(u16 encap_type,
struct nlattr *encap, unsigned int family,
@@ -138,7 +140,7 @@ int lwtunnel_build_state(u16 encap_type,
return ret;
}
-EXPORT_SYMBOL(lwtunnel_build_state);
+EXPORT_SYMBOL_GPL(lwtunnel_build_state);
int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack)
{
@@ -175,7 +177,7 @@ int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack)
return ret;
}
-EXPORT_SYMBOL(lwtunnel_valid_encap_type);
+EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type);
int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
struct netlink_ext_ack *extack)
@@ -205,7 +207,7 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
return 0;
}
-EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr);
+EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr);
void lwtstate_free(struct lwtunnel_state *lws)
{
@@ -219,7 +221,7 @@ void lwtstate_free(struct lwtunnel_state *lws)
}
module_put(ops->owner);
}
-EXPORT_SYMBOL(lwtstate_free);
+EXPORT_SYMBOL_GPL(lwtstate_free);
int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
{
@@ -259,7 +261,7 @@ nla_put_failure:
return (ret == -EOPNOTSUPP ? 0 : ret);
}
-EXPORT_SYMBOL(lwtunnel_fill_encap);
+EXPORT_SYMBOL_GPL(lwtunnel_fill_encap);
int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
{
@@ -281,7 +283,7 @@ int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
return ret;
}
-EXPORT_SYMBOL(lwtunnel_get_encap_size);
+EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size);
int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
{
@@ -309,7 +311,7 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
return ret;
}
-EXPORT_SYMBOL(lwtunnel_cmp_encap);
+EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap);
int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
@@ -343,7 +345,7 @@ drop:
return ret;
}
-EXPORT_SYMBOL(lwtunnel_output);
+EXPORT_SYMBOL_GPL(lwtunnel_output);
int lwtunnel_xmit(struct sk_buff *skb)
{
@@ -378,7 +380,7 @@ drop:
return ret;
}
-EXPORT_SYMBOL(lwtunnel_xmit);
+EXPORT_SYMBOL_GPL(lwtunnel_xmit);
int lwtunnel_input(struct sk_buff *skb)
{
@@ -412,4 +414,4 @@ drop:
return ret;
}
-EXPORT_SYMBOL(lwtunnel_input);
+EXPORT_SYMBOL_GPL(lwtunnel_input);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d0713627deb6..6ea3a1a7f36a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -457,7 +457,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
const void *pkey)
{
struct neighbour *n;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val;
struct neigh_hash_table *nht;
@@ -488,7 +488,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref)
{
u32 hash_val;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
int error;
struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
struct neigh_hash_table *nht;
@@ -572,7 +572,7 @@ out_neigh_release:
}
EXPORT_SYMBOL(__neigh_create);
-static u32 pneigh_hash(const void *pkey, int key_len)
+static u32 pneigh_hash(const void *pkey, unsigned int key_len)
{
u32 hash_val = *(u32 *)(pkey + key_len - 4);
hash_val ^= (hash_val >> 16);
@@ -585,7 +585,7 @@ static u32 pneigh_hash(const void *pkey, int key_len)
static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
struct net *net,
const void *pkey,
- int key_len,
+ unsigned int key_len,
struct net_device *dev)
{
while (n) {
@@ -601,7 +601,7 @@ static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
struct net *net, const void *pkey, struct net_device *dev)
{
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val = pneigh_hash(pkey, key_len);
return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
@@ -614,7 +614,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
struct net_device *dev, int creat)
{
struct pneigh_entry *n;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val = pneigh_hash(pkey, key_len);
read_lock_bh(&tbl->lock);
@@ -659,7 +659,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
struct net_device *dev)
{
struct pneigh_entry *n, **np;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val = pneigh_hash(pkey, key_len);
write_lock_bh(&tbl->lock);
@@ -1662,7 +1662,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tbl == NULL)
return -EAFNOSUPPORT;
- if (nla_len(dst_attr) < tbl->key_len)
+ if (nla_len(dst_attr) < (int)tbl->key_len)
goto out;
if (ndm->ndm_flags & NTF_PROXY) {
@@ -1730,7 +1730,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tbl == NULL)
return -EAFNOSUPPORT;
- if (nla_len(tb[NDA_DST]) < tbl->key_len)
+ if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
goto out;
dst = nla_data(tb[NDA_DST]);
lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
@@ -3261,13 +3261,13 @@ EXPORT_SYMBOL(neigh_sysctl_unregister);
static int __init neigh_init(void)
{
- rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
+ rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
- NULL);
- rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
+ 0);
+ rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
return 0;
}
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 4847964931df..615ccab55f38 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b4f9922b6f23..799b75268291 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -97,7 +97,8 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
return restart_syscall();
if (dev_isalive(netdev)) {
- if ((ret = (*set)(netdev, new)) == 0)
+ ret = (*set)(netdev, new);
+ if (ret == 0)
ret = len;
}
rtnl_unlock();
@@ -160,6 +161,7 @@ static ssize_t broadcast_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *ndev = to_net_dev(dev);
+
if (dev_isalive(ndev))
return sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
return -EINVAL;
@@ -170,7 +172,7 @@ static int change_carrier(struct net_device *dev, unsigned long new_carrier)
{
if (!netif_running(dev))
return -EINVAL;
- return dev_change_carrier(dev, (bool) new_carrier);
+ return dev_change_carrier(dev, (bool)new_carrier);
}
static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
@@ -183,9 +185,10 @@ static ssize_t carrier_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
- if (netif_running(netdev)) {
+
+ if (netif_running(netdev))
return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
- }
+
return -EINVAL;
}
static DEVICE_ATTR_RW(carrier);
@@ -290,6 +293,7 @@ static ssize_t carrier_changes_show(struct device *dev,
char *buf)
{
struct net_device *netdev = to_net_dev(dev);
+
return sprintf(buf, fmt_dec,
atomic_read(&netdev->carrier_changes));
}
@@ -299,7 +303,7 @@ static DEVICE_ATTR_RO(carrier_changes);
static int change_mtu(struct net_device *dev, unsigned long new_mtu)
{
- return dev_set_mtu(dev, (int) new_mtu);
+ return dev_set_mtu(dev, (int)new_mtu);
}
static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
@@ -311,7 +315,7 @@ NETDEVICE_SHOW_RW(mtu, fmt_dec);
static int change_flags(struct net_device *dev, unsigned long new_flags)
{
- return dev_change_flags(dev, (unsigned int) new_flags);
+ return dev_change_flags(dev, (unsigned int)new_flags);
}
static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
@@ -362,8 +366,8 @@ static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
}
static ssize_t gro_flush_timeout_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t len)
+ struct device_attribute *attr,
+ const char *buf, size_t len)
{
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -378,7 +382,7 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
struct net_device *netdev = to_net_dev(dev);
struct net *net = dev_net(netdev);
size_t count = len;
- ssize_t ret;
+ ssize_t ret = 0;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -389,30 +393,37 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
if (!rtnl_trylock())
return restart_syscall();
- ret = dev_set_alias(netdev, buf, count);
+
+ if (dev_isalive(netdev)) {
+ ret = dev_set_alias(netdev, buf, count);
+ if (ret < 0)
+ goto err;
+ ret = len;
+ netdev_state_change(netdev);
+ }
+err:
rtnl_unlock();
- return ret < 0 ? ret : len;
+ return ret;
}
static ssize_t ifalias_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
const struct net_device *netdev = to_net_dev(dev);
+ char tmp[IFALIASZ];
ssize_t ret = 0;
- if (!rtnl_trylock())
- return restart_syscall();
- if (netdev->ifalias)
- ret = sprintf(buf, "%s\n", netdev->ifalias);
- rtnl_unlock();
+ ret = dev_get_alias(netdev, tmp, sizeof(tmp));
+ if (ret > 0)
+ ret = sprintf(buf, "%s\n", tmp);
return ret;
}
static DEVICE_ATTR_RW(ifalias);
static int change_group(struct net_device *dev, unsigned long new_group)
{
- dev_set_group(dev, (int) new_group);
+ dev_set_group(dev, (int)new_group);
return 0;
}
@@ -426,7 +437,7 @@ static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
static int change_proto_down(struct net_device *dev, unsigned long proto_down)
{
- return dev_change_proto_down(dev, (bool) proto_down);
+ return dev_change_proto_down(dev, (bool)proto_down);
}
static ssize_t proto_down_store(struct device *dev,
@@ -508,7 +519,7 @@ static ssize_t phys_switch_id_show(struct device *dev,
}
static DEVICE_ATTR_RO(phys_switch_id);
-static struct attribute *net_class_attrs[] = {
+static struct attribute *net_class_attrs[] __ro_after_init = {
&dev_attr_netdev_group.attr,
&dev_attr_type.attr,
&dev_attr_dev_id.attr,
@@ -549,14 +560,14 @@ static ssize_t netstat_show(const struct device *d,
ssize_t ret = -EINVAL;
WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
- offset % sizeof(u64) != 0);
+ offset % sizeof(u64) != 0);
read_lock(&dev_base_lock);
if (dev_isalive(dev)) {
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
- ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
+ ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset));
}
read_unlock(&dev_base_lock);
return ret;
@@ -565,7 +576,7 @@ static ssize_t netstat_show(const struct device *d,
/* generate a read-only statistics attribute */
#define NETSTAT_ENTRY(name) \
static ssize_t name##_show(struct device *d, \
- struct device_attribute *attr, char *buf) \
+ struct device_attribute *attr, char *buf) \
{ \
return netstat_show(d, attr, buf, \
offsetof(struct rtnl_link_stats64, name)); \
@@ -597,7 +608,7 @@ NETSTAT_ENTRY(rx_compressed);
NETSTAT_ENTRY(tx_compressed);
NETSTAT_ENTRY(rx_nohandler);
-static struct attribute *netstat_attrs[] = {
+static struct attribute *netstat_attrs[] __ro_after_init = {
&dev_attr_rx_packets.attr,
&dev_attr_tx_packets.attr,
&dev_attr_rx_bytes.attr,
@@ -625,7 +636,6 @@ static struct attribute *netstat_attrs[] = {
NULL
};
-
static const struct attribute_group netstat_group = {
.name = "statistics",
.attrs = netstat_attrs,
@@ -647,33 +657,33 @@ static const struct attribute_group wireless_group = {
#endif /* CONFIG_SYSFS */
#ifdef CONFIG_SYSFS
-#define to_rx_queue_attr(_attr) container_of(_attr, \
- struct rx_queue_attribute, attr)
+#define to_rx_queue_attr(_attr) \
+ container_of(_attr, struct rx_queue_attribute, attr)
#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
- struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+ const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
struct netdev_rx_queue *queue = to_rx_queue(kobj);
if (!attribute->show)
return -EIO;
- return attribute->show(queue, attribute, buf);
+ return attribute->show(queue, buf);
}
static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t count)
{
- struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+ const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
struct netdev_rx_queue *queue = to_rx_queue(kobj);
if (!attribute->store)
return -EIO;
- return attribute->store(queue, attribute, buf, count);
+ return attribute->store(queue, buf, count);
}
static const struct sysfs_ops rx_queue_sysfs_ops = {
@@ -682,8 +692,7 @@ static const struct sysfs_ops rx_queue_sysfs_ops = {
};
#ifdef CONFIG_RPS
-static ssize_t show_rps_map(struct netdev_rx_queue *queue,
- struct rx_queue_attribute *attribute, char *buf)
+static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf)
{
struct rps_map *map;
cpumask_var_t mask;
@@ -706,8 +715,7 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue,
}
static ssize_t store_rps_map(struct netdev_rx_queue *queue,
- struct rx_queue_attribute *attribute,
- const char *buf, size_t len)
+ const char *buf, size_t len)
{
struct rps_map *old_map, *map;
cpumask_var_t mask;
@@ -727,8 +735,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
}
map = kzalloc(max_t(unsigned int,
- RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
- GFP_KERNEL);
+ RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
+ GFP_KERNEL);
if (!map) {
free_cpumask_var(mask);
return -ENOMEM;
@@ -738,9 +746,9 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
for_each_cpu_and(cpu, mask, cpu_online_mask)
map->cpus[i++] = cpu;
- if (i)
+ if (i) {
map->len = i;
- else {
+ } else {
kfree(map);
map = NULL;
}
@@ -765,7 +773,6 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
}
static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
- struct rx_queue_attribute *attr,
char *buf)
{
struct rps_dev_flow_table *flow_table;
@@ -788,8 +795,7 @@ static void rps_dev_flow_table_release(struct rcu_head *rcu)
}
static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
- struct rx_queue_attribute *attr,
- const char *buf, size_t len)
+ const char *buf, size_t len)
{
unsigned long mask, count;
struct rps_dev_flow_table *table, *old_table;
@@ -831,8 +837,9 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
table->mask = mask;
for (count = 0; count <= mask; count++)
table->flows[count].cpu = RPS_NO_CPU;
- } else
+ } else {
table = NULL;
+ }
spin_lock(&rps_dev_flow_lock);
old_table = rcu_dereference_protected(queue->rps_flow_table,
@@ -846,16 +853,15 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
return len;
}
-static struct rx_queue_attribute rps_cpus_attribute =
- __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
-
+static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
+ = __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
-static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
- __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
- show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
+static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
+ = __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+ show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
#endif /* CONFIG_RPS */
-static struct attribute *rx_queue_default_attrs[] = {
+static struct attribute *rx_queue_default_attrs[] __ro_after_init = {
#ifdef CONFIG_RPS
&rps_cpus_attribute.attr,
&rps_dev_flow_table_cnt_attribute.attr,
@@ -870,7 +876,6 @@ static void rx_queue_release(struct kobject *kobj)
struct rps_map *map;
struct rps_dev_flow_table *flow_table;
-
map = rcu_dereference_protected(queue->rps_map, 1);
if (map) {
RCU_INIT_POINTER(queue->rps_map, NULL);
@@ -900,7 +905,7 @@ static const void *rx_queue_namespace(struct kobject *kobj)
return ns;
}
-static struct kobj_type rx_queue_ktype = {
+static struct kobj_type rx_queue_ktype __ro_after_init = {
.sysfs_ops = &rx_queue_sysfs_ops,
.release = rx_queue_release,
.default_attrs = rx_queue_default_attrs,
@@ -915,23 +920,22 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
kobj->kset = dev->queues_kset;
error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
- "rx-%u", index);
+ "rx-%u", index);
if (error)
- goto exit;
+ return error;
if (dev->sysfs_rx_queue_group) {
error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
- if (error)
- goto exit;
+ if (error) {
+ kobject_put(kobj);
+ return error;
+ }
}
kobject_uevent(kobj, KOBJ_ADD);
dev_hold(queue->dev);
return error;
-exit:
- kobject_put(kobj);
- return error;
}
#endif /* CONFIG_SYSFS */
@@ -976,39 +980,40 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
*/
struct netdev_queue_attribute {
struct attribute attr;
- ssize_t (*show)(struct netdev_queue *queue,
- struct netdev_queue_attribute *attr, char *buf);
+ ssize_t (*show)(struct netdev_queue *queue, char *buf);
ssize_t (*store)(struct netdev_queue *queue,
- struct netdev_queue_attribute *attr, const char *buf, size_t len);
+ const char *buf, size_t len);
};
-#define to_netdev_queue_attr(_attr) container_of(_attr, \
- struct netdev_queue_attribute, attr)
+#define to_netdev_queue_attr(_attr) \
+ container_of(_attr, struct netdev_queue_attribute, attr)
#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
static ssize_t netdev_queue_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
- struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+ const struct netdev_queue_attribute *attribute
+ = to_netdev_queue_attr(attr);
struct netdev_queue *queue = to_netdev_queue(kobj);
if (!attribute->show)
return -EIO;
- return attribute->show(queue, attribute, buf);
+ return attribute->show(queue, buf);
}
static ssize_t netdev_queue_attr_store(struct kobject *kobj,
struct attribute *attr,
const char *buf, size_t count)
{
- struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+ const struct netdev_queue_attribute *attribute
+ = to_netdev_queue_attr(attr);
struct netdev_queue *queue = to_netdev_queue(kobj);
if (!attribute->store)
return -EIO;
- return attribute->store(queue, attribute, buf, count);
+ return attribute->store(queue, buf, count);
}
static const struct sysfs_ops netdev_queue_sysfs_ops = {
@@ -1016,9 +1021,7 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
.store = netdev_queue_attr_store,
};
-static ssize_t show_trans_timeout(struct netdev_queue *queue,
- struct netdev_queue_attribute *attribute,
- char *buf)
+static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
{
unsigned long trans_timeout;
@@ -1040,8 +1043,7 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
return i;
}
-static ssize_t show_traffic_class(struct netdev_queue *queue,
- struct netdev_queue_attribute *attribute,
+static ssize_t traffic_class_show(struct netdev_queue *queue,
char *buf)
{
struct net_device *dev = queue->dev;
@@ -1055,16 +1057,14 @@ static ssize_t show_traffic_class(struct netdev_queue *queue,
}
#ifdef CONFIG_XPS
-static ssize_t show_tx_maxrate(struct netdev_queue *queue,
- struct netdev_queue_attribute *attribute,
+static ssize_t tx_maxrate_show(struct netdev_queue *queue,
char *buf)
{
return sprintf(buf, "%lu\n", queue->tx_maxrate);
}
-static ssize_t set_tx_maxrate(struct netdev_queue *queue,
- struct netdev_queue_attribute *attribute,
- const char *buf, size_t len)
+static ssize_t tx_maxrate_store(struct netdev_queue *queue,
+ const char *buf, size_t len)
{
struct net_device *dev = queue->dev;
int err, index = get_netdev_queue_index(queue);
@@ -1089,16 +1089,15 @@ static ssize_t set_tx_maxrate(struct netdev_queue *queue,
return err;
}
-static struct netdev_queue_attribute queue_tx_maxrate =
- __ATTR(tx_maxrate, S_IRUGO | S_IWUSR,
- show_tx_maxrate, set_tx_maxrate);
+static struct netdev_queue_attribute queue_tx_maxrate __ro_after_init
+ = __ATTR_RW(tx_maxrate);
#endif
-static struct netdev_queue_attribute queue_trans_timeout =
- __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
+static struct netdev_queue_attribute queue_trans_timeout __ro_after_init
+ = __ATTR_RO(tx_timeout);
-static struct netdev_queue_attribute queue_traffic_class =
- __ATTR(traffic_class, S_IRUGO, show_traffic_class, NULL);
+static struct netdev_queue_attribute queue_traffic_class __ro_after_init
+ = __ATTR_RO(traffic_class);
#ifdef CONFIG_BQL
/*
@@ -1115,9 +1114,9 @@ static ssize_t bql_set(const char *buf, const size_t count,
unsigned int value;
int err;
- if (!strcmp(buf, "max") || !strcmp(buf, "max\n"))
+ if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) {
value = DQL_MAX_LIMIT;
- else {
+ } else {
err = kstrtouint(buf, 10, &value);
if (err < 0)
return err;
@@ -1131,7 +1130,6 @@ static ssize_t bql_set(const char *buf, const size_t count,
}
static ssize_t bql_show_hold_time(struct netdev_queue *queue,
- struct netdev_queue_attribute *attr,
char *buf)
{
struct dql *dql = &queue->dql;
@@ -1140,7 +1138,6 @@ static ssize_t bql_show_hold_time(struct netdev_queue *queue,
}
static ssize_t bql_set_hold_time(struct netdev_queue *queue,
- struct netdev_queue_attribute *attribute,
const char *buf, size_t len)
{
struct dql *dql = &queue->dql;
@@ -1156,12 +1153,11 @@ static ssize_t bql_set_hold_time(struct netdev_queue *queue,
return len;
}
-static struct netdev_queue_attribute bql_hold_time_attribute =
- __ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time,
- bql_set_hold_time);
+static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
+ = __ATTR(hold_time, S_IRUGO | S_IWUSR,
+ bql_show_hold_time, bql_set_hold_time);
static ssize_t bql_show_inflight(struct netdev_queue *queue,
- struct netdev_queue_attribute *attr,
char *buf)
{
struct dql *dql = &queue->dql;
@@ -1169,33 +1165,31 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue,
return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed);
}
-static struct netdev_queue_attribute bql_inflight_attribute =
+static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
__ATTR(inflight, S_IRUGO, bql_show_inflight, NULL);
#define BQL_ATTR(NAME, FIELD) \
static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \
- struct netdev_queue_attribute *attr, \
char *buf) \
{ \
return bql_show(buf, queue->dql.FIELD); \
} \
\
static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \
- struct netdev_queue_attribute *attr, \
const char *buf, size_t len) \
{ \
return bql_set(buf, len, &queue->dql.FIELD); \
} \
\
-static struct netdev_queue_attribute bql_ ## NAME ## _attribute = \
- __ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME, \
- bql_set_ ## NAME);
+static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
+ = __ATTR(NAME, S_IRUGO | S_IWUSR, \
+ bql_show_ ## NAME, bql_set_ ## NAME)
-BQL_ATTR(limit, limit)
-BQL_ATTR(limit_max, max_limit)
-BQL_ATTR(limit_min, min_limit)
+BQL_ATTR(limit, limit);
+BQL_ATTR(limit_max, max_limit);
+BQL_ATTR(limit_min, min_limit);
-static struct attribute *dql_attrs[] = {
+static struct attribute *dql_attrs[] __ro_after_init = {
&bql_limit_attribute.attr,
&bql_limit_max_attribute.attr,
&bql_limit_min_attribute.attr,
@@ -1211,8 +1205,8 @@ static const struct attribute_group dql_group = {
#endif /* CONFIG_BQL */
#ifdef CONFIG_XPS
-static ssize_t show_xps_map(struct netdev_queue *queue,
- struct netdev_queue_attribute *attribute, char *buf)
+static ssize_t xps_cpus_show(struct netdev_queue *queue,
+ char *buf)
{
struct net_device *dev = queue->dev;
int cpu, len, num_tc = 1, tc = 0;
@@ -1258,9 +1252,8 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
return len < PAGE_SIZE ? len : -EINVAL;
}
-static ssize_t store_xps_map(struct netdev_queue *queue,
- struct netdev_queue_attribute *attribute,
- const char *buf, size_t len)
+static ssize_t xps_cpus_store(struct netdev_queue *queue,
+ const char *buf, size_t len)
{
struct net_device *dev = queue->dev;
unsigned long index;
@@ -1288,11 +1281,11 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
return err ? : len;
}
-static struct netdev_queue_attribute xps_cpus_attribute =
- __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
+ = __ATTR_RW(xps_cpus);
#endif /* CONFIG_XPS */
-static struct attribute *netdev_queue_default_attrs[] = {
+static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
&queue_trans_timeout.attr,
&queue_traffic_class.attr,
#ifdef CONFIG_XPS
@@ -1322,7 +1315,7 @@ static const void *netdev_queue_namespace(struct kobject *kobj)
return ns;
}
-static struct kobj_type netdev_queue_ktype = {
+static struct kobj_type netdev_queue_ktype __ro_after_init = {
.sysfs_ops = &netdev_queue_sysfs_ops,
.release = netdev_queue_release,
.default_attrs = netdev_queue_default_attrs,
@@ -1337,23 +1330,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
kobj->kset = dev->queues_kset;
error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
- "tx-%u", index);
+ "tx-%u", index);
if (error)
- goto exit;
+ return error;
#ifdef CONFIG_BQL
error = sysfs_create_group(kobj, &dql_group);
- if (error)
- goto exit;
+ if (error) {
+ kobject_put(kobj);
+ return error;
+ }
#endif
kobject_uevent(kobj, KOBJ_ADD);
dev_hold(queue->dev);
return 0;
-exit:
- kobject_put(kobj);
- return error;
}
#endif /* CONFIG_SYSFS */
@@ -1395,7 +1387,7 @@ static int register_queue_kobjects(struct net_device *dev)
#ifdef CONFIG_SYSFS
dev->queues_kset = kset_create_and_add("queues",
- NULL, &dev->dev.kobj);
+ NULL, &dev->dev.kobj);
if (!dev->queues_kset)
return -ENOMEM;
real_rx = dev->real_num_rx_queues;
@@ -1463,7 +1455,7 @@ static const void *net_netlink_ns(struct sock *sk)
return sock_net(sk);
}
-struct kobj_ns_type_operations net_ns_type_operations = {
+const struct kobj_ns_type_operations net_ns_type_operations = {
.type = KOBJ_NS_TYPE_NET,
.current_may_mount = net_current_may_mount,
.grab_current_ns = net_grab_current_ns,
@@ -1485,7 +1477,8 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
/* pass ifindex to uevent.
* ifindex is useful as it won't change (interface name may change)
- * and is what RtNetlink uses natively. */
+ * and is what RtNetlink uses natively.
+ */
retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
exit:
@@ -1502,7 +1495,10 @@ static void netdev_release(struct device *d)
BUG_ON(dev->reg_state != NETREG_RELEASED);
- kfree(dev->ifalias);
+ /* no need to wait for rcu grace period:
+ * device is dead and about to be freed.
+ */
+ kfree(rcu_access_pointer(dev->ifalias));
netdev_freemem(dev);
}
@@ -1513,7 +1509,7 @@ static const void *net_namespace(struct device *d)
return dev_net(dev);
}
-static struct class net_class = {
+static struct class net_class __ro_after_init = {
.name = "net",
.dev_release = netdev_release,
.dev_groups = net_class_groups,
@@ -1560,7 +1556,7 @@ EXPORT_SYMBOL(of_find_net_device_by_node);
*/
void netdev_unregister_kobject(struct net_device *ndev)
{
- struct device *dev = &(ndev->dev);
+ struct device *dev = &ndev->dev;
if (!atomic_read(&dev_net(ndev)->count))
dev_set_uevent_suppress(dev, 1);
@@ -1577,7 +1573,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
/* Create sysfs entries for network device. */
int netdev_register_kobject(struct net_device *ndev)
{
- struct device *dev = &(ndev->dev);
+ struct device *dev = &ndev->dev;
const struct attribute_group **groups = ndev->sysfs_groups;
int error = 0;
@@ -1620,14 +1616,14 @@ int netdev_register_kobject(struct net_device *ndev)
return error;
}
-int netdev_class_create_file_ns(struct class_attribute *class_attr,
+int netdev_class_create_file_ns(const struct class_attribute *class_attr,
const void *ns)
{
return class_create_file_ns(&net_class, class_attr, ns);
}
EXPORT_SYMBOL(netdev_class_create_file_ns);
-void netdev_class_remove_file_ns(struct class_attribute *class_attr,
+void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
const void *ns)
{
class_remove_file_ns(&net_class, class_attr, ns);
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 2745a1b51e03..006876c7b78d 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NET_SYSFS_H__
#define __NET_SYSFS_H__
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 92da5e4ceb4f..380934580fa1 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* consolidates trace point definitions
*
@@ -31,12 +32,23 @@
#include <trace/events/napi.h>
#include <trace/events/sock.h>
#include <trace/events/udp.h>
+#include <trace/events/tcp.h>
#include <trace/events/fib.h>
+#include <trace/events/qdisc.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <trace/events/fib6.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
#endif
+#if IS_ENABLED(CONFIG_BRIDGE)
+#include <trace/events/bridge.h>
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add);
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add);
+EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete);
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
+#endif
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 8726d051f31d..b797832565d3 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -234,6 +234,7 @@ int peernet2id_alloc(struct net *net, struct net *peer)
rtnl_net_notifyid(net, RTM_NEWNSID, id);
return id;
}
+EXPORT_SYMBOL_GPL(peernet2id_alloc);
/* This function returns, if assigned, the id of a peer netns. */
int peernet2id(struct net *net, struct net *peer)
@@ -855,9 +856,10 @@ static int __init net_ns_init(void)
register_pernet_subsys(&net_ns_ops);
- rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
+ RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
- NULL);
+ RTNL_FLAG_DOIT_UNLOCKED);
return 0;
}
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 029a61ac6cdd..5e4f04004a49 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -100,7 +100,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
cs->classid = (u32)value;
- css_task_iter_start(css, &it);
+ css_task_iter_start(css, 0, &it);
while ((p = css_task_iter_next(&it))) {
task_lock(p);
iterate_fd(p->files, 0, update_classid_sock,
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 912731bed7b7..57557a6a950c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -334,7 +334,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
/* It is up to the caller to keep npinfo alive. */
struct netpoll_info *npinfo;
- WARN_ON_ONCE(!irqs_disabled());
+ lockdep_assert_irqs_disabled();
npinfo = rcu_dereference_bh(np->dev->npinfo);
if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6e1e10ff433a..f95a15086225 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2165,7 +2165,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
+ pkt_dev->pkt_overhead;
}
- for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ for (i = 0; i < sizeof(struct in6_addr); i++)
if (pkt_dev->cur_in6_saddr.s6_addr[i]) {
set = 1;
break;
@@ -2711,7 +2711,7 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi,
static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
int datalen)
{
- struct timeval timestamp;
+ struct timespec64 timestamp;
struct pktgen_hdr *pgh;
pgh = skb_put(skb, sizeof(*pgh));
@@ -2773,9 +2773,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
pgh->tv_sec = 0;
pgh->tv_usec = 0;
} else {
- do_gettimeofday(&timestamp);
+ /*
+ * pgh->tv_sec wraps in y2106 when interpreted as unsigned
+ * as done by wireshark, or y2038 when interpreted as signed.
+ * This is probably harmless, but if anyone wants to improve
+ * it, we could introduce a variant that puts 64-bit nanoseconds
+ * into the respective header bytes.
+ * This would also be slightly faster to read.
+ */
+ ktime_get_real_ts64(&timestamp);
pgh->tv_sec = htonl(timestamp.tv_sec);
- pgh->tv_usec = htonl(timestamp.tv_usec);
+ pgh->tv_usec = htonl(timestamp.tv_nsec / NSEC_PER_USEC);
}
}
@@ -3377,7 +3385,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
static void pktgen_xmit(struct pktgen_dev *pkt_dev)
{
- unsigned int burst = ACCESS_ONCE(pkt_dev->burst);
+ unsigned int burst = READ_ONCE(pkt_dev->burst);
struct net_device *odev = pkt_dev->odev;
struct netdev_queue *txq;
struct sk_buff *skb;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9201e3621351..dabba2a91fc8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -62,7 +62,7 @@
struct rtnl_link {
rtnl_doit_func doit;
rtnl_dumpit_func dumpit;
- rtnl_calcit_func calcit;
+ unsigned int flags;
};
static DEFINE_MUTEX(rtnl_mutex);
@@ -127,7 +127,8 @@ bool lockdep_rtnl_is_held(void)
EXPORT_SYMBOL(lockdep_rtnl_is_held);
#endif /* #ifdef CONFIG_PROVE_LOCKING */
-static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
+static struct rtnl_link __rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
+static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype)
{
@@ -143,58 +144,13 @@ static inline int rtm_msgindex(int msgtype)
return msgindex;
}
-static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
-{
- struct rtnl_link *tab;
-
- if (protocol <= RTNL_FAMILY_MAX)
- tab = rtnl_msg_handlers[protocol];
- else
- tab = NULL;
-
- if (tab == NULL || tab[msgindex].doit == NULL)
- tab = rtnl_msg_handlers[PF_UNSPEC];
-
- return tab[msgindex].doit;
-}
-
-static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
-{
- struct rtnl_link *tab;
-
- if (protocol <= RTNL_FAMILY_MAX)
- tab = rtnl_msg_handlers[protocol];
- else
- tab = NULL;
-
- if (tab == NULL || tab[msgindex].dumpit == NULL)
- tab = rtnl_msg_handlers[PF_UNSPEC];
-
- return tab[msgindex].dumpit;
-}
-
-static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
-{
- struct rtnl_link *tab;
-
- if (protocol <= RTNL_FAMILY_MAX)
- tab = rtnl_msg_handlers[protocol];
- else
- tab = NULL;
-
- if (tab == NULL || tab[msgindex].calcit == NULL)
- tab = rtnl_msg_handlers[PF_UNSPEC];
-
- return tab[msgindex].calcit;
-}
-
/**
* __rtnl_register - Register a rtnetlink message type
* @protocol: Protocol family or PF_UNSPEC
* @msgtype: rtnetlink message type
* @doit: Function pointer called for each request message
* @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @calcit: Function pointer to calc size of dump message
+ * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
*
* Registers the specified function pointers (at least one of them has
* to be non-NULL) to be called whenever a request message for the
@@ -208,7 +164,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
*/
int __rtnl_register(int protocol, int msgtype,
rtnl_doit_func doit, rtnl_dumpit_func dumpit,
- rtnl_calcit_func calcit)
+ unsigned int flags)
{
struct rtnl_link *tab;
int msgindex;
@@ -216,23 +172,20 @@ int __rtnl_register(int protocol, int msgtype,
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
- tab = rtnl_msg_handlers[protocol];
+ tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]);
if (tab == NULL) {
tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
if (tab == NULL)
return -ENOBUFS;
- rtnl_msg_handlers[protocol] = tab;
+ rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
}
if (doit)
tab[msgindex].doit = doit;
-
if (dumpit)
tab[msgindex].dumpit = dumpit;
-
- if (calcit)
- tab[msgindex].calcit = calcit;
+ tab[msgindex].flags |= flags;
return 0;
}
@@ -249,9 +202,9 @@ EXPORT_SYMBOL_GPL(__rtnl_register);
*/
void rtnl_register(int protocol, int msgtype,
rtnl_doit_func doit, rtnl_dumpit_func dumpit,
- rtnl_calcit_func calcit)
+ unsigned int flags)
{
- if (__rtnl_register(protocol, msgtype, doit, dumpit, calcit) < 0)
+ if (__rtnl_register(protocol, msgtype, doit, dumpit, flags) < 0)
panic("Unable to register rtnetlink message handler, "
"protocol = %d, message type = %d\n",
protocol, msgtype);
@@ -267,17 +220,23 @@ EXPORT_SYMBOL_GPL(rtnl_register);
*/
int rtnl_unregister(int protocol, int msgtype)
{
+ struct rtnl_link *handlers;
int msgindex;
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
- if (rtnl_msg_handlers[protocol] == NULL)
+ rtnl_lock();
+ handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
+ if (!handlers) {
+ rtnl_unlock();
return -ENOENT;
+ }
- rtnl_msg_handlers[protocol][msgindex].doit = NULL;
- rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
- rtnl_msg_handlers[protocol][msgindex].calcit = NULL;
+ handlers[msgindex].doit = NULL;
+ handlers[msgindex].dumpit = NULL;
+ handlers[msgindex].flags = 0;
+ rtnl_unlock();
return 0;
}
@@ -292,10 +251,20 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
*/
void rtnl_unregister_all(int protocol)
{
+ struct rtnl_link *handlers;
+
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
- kfree(rtnl_msg_handlers[protocol]);
- rtnl_msg_handlers[protocol] = NULL;
+ rtnl_lock();
+ handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
+ RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL);
+ rtnl_unlock();
+
+ synchronize_net();
+
+ while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1)
+ schedule();
+ kfree(handlers);
}
EXPORT_SYMBOL_GPL(rtnl_unregister_all);
@@ -433,16 +402,24 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev)
{
struct net_device *master_dev;
const struct rtnl_link_ops *ops;
+ size_t size = 0;
- master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
+ rcu_read_lock();
+
+ master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev);
if (!master_dev)
- return 0;
+ goto out;
+
ops = master_dev->rtnl_link_ops;
if (!ops || !ops->get_slave_size)
- return 0;
+ goto out;
/* IFLA_INFO_SLAVE_DATA + nested data */
- return nla_total_size(sizeof(struct nlattr)) +
+ size = nla_total_size(sizeof(struct nlattr)) +
ops->get_slave_size(master_dev, dev);
+
+out:
+ rcu_read_unlock();
+ return size;
}
static size_t rtnl_link_get_size(const struct net_device *dev)
@@ -476,7 +453,7 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
{
const struct rtnl_af_ops *ops;
- list_for_each_entry(ops, &rtnl_af_ops, list) {
+ list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
if (ops->family == family)
return ops;
}
@@ -493,32 +470,22 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
void rtnl_af_register(struct rtnl_af_ops *ops)
{
rtnl_lock();
- list_add_tail(&ops->list, &rtnl_af_ops);
+ list_add_tail_rcu(&ops->list, &rtnl_af_ops);
rtnl_unlock();
}
EXPORT_SYMBOL_GPL(rtnl_af_register);
/**
- * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
- * @ops: struct rtnl_af_ops * to unregister
- *
- * The caller must hold the rtnl_mutex.
- */
-void __rtnl_af_unregister(struct rtnl_af_ops *ops)
-{
- list_del(&ops->list);
-}
-EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
-
-/**
* rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
* @ops: struct rtnl_af_ops * to unregister
*/
void rtnl_af_unregister(struct rtnl_af_ops *ops)
{
rtnl_lock();
- __rtnl_af_unregister(ops);
+ list_del_rcu(&ops->list);
rtnl_unlock();
+
+ synchronize_rcu();
}
EXPORT_SYMBOL_GPL(rtnl_af_unregister);
@@ -531,13 +498,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
/* IFLA_AF_SPEC */
size = nla_total_size(sizeof(struct nlattr));
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->get_link_af_size) {
/* AF_* + nested data */
size += nla_total_size(sizeof(struct nlattr)) +
af_ops->get_link_af_size(dev, ext_filter_mask);
}
}
+ rcu_read_unlock();
return size;
}
@@ -545,11 +514,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
static bool rtnl_have_link_slave_info(const struct net_device *dev)
{
struct net_device *master_dev;
+ bool ret = false;
- master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
+ rcu_read_lock();
+
+ master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev);
if (master_dev && master_dev->rtnl_link_ops)
- return true;
- return false;
+ ret = true;
+ rcu_read_unlock();
+ return ret;
}
static int rtnl_link_slave_info_fill(struct sk_buff *skb,
@@ -946,8 +919,10 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
+ rtnl_xdp_size() /* IFLA_XDP */
+ nla_total_size(4) /* IFLA_EVENT */
- + nla_total_size(1); /* IFLA_PROTO_DOWN */
-
+ + nla_total_size(4) /* IFLA_NEW_NETNSID */
+ + nla_total_size(1) /* IFLA_PROTO_DOWN */
+ + nla_total_size(4) /* IFLA_IF_NETNSID */
+ + 0;
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1234,6 +1209,36 @@ nla_put_vfinfo_failure:
return -EMSGSIZE;
}
+static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
+ struct net_device *dev,
+ u32 ext_filter_mask)
+{
+ struct nlattr *vfinfo;
+ int i, num_vfs;
+
+ if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0))
+ return 0;
+
+ num_vfs = dev_num_vf(dev->dev.parent);
+ if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs))
+ return -EMSGSIZE;
+
+ if (!dev->netdev_ops->ndo_get_vf_config)
+ return 0;
+
+ vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
+ if (!vfinfo)
+ return -EMSGSIZE;
+
+ for (i = 0; i < num_vfs; i++) {
+ if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
+ return -EMSGSIZE;
+ }
+
+ nla_nest_end(skb, vfinfo);
+ return 0;
+}
+
static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
{
struct rtnl_link_ifmap map;
@@ -1265,10 +1270,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
*prog_id = generic_xdp_prog->aux->id;
return XDP_ATTACHED_SKB;
}
- if (!ops->ndo_xdp)
+ if (!ops->ndo_bpf)
return XDP_ATTACHED_NONE;
- return __dev_xdp_attached(dev, ops->ndo_xdp, prog_id);
+ return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id);
}
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1330,16 +1335,108 @@ static u32 rtnl_get_event(unsigned long event)
return rtnl_event_type;
}
-static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
+{
+ const struct net_device *upper_dev;
+ int ret = 0;
+
+ rcu_read_lock();
+
+ upper_dev = netdev_master_upper_dev_get_rcu(dev);
+ if (upper_dev)
+ ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex);
+
+ rcu_read_unlock();
+ return ret;
+}
+
+static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev)
+{
+ int ifindex = dev_get_iflink(dev);
+
+ if (dev->ifindex == ifindex)
+ return 0;
+
+ return nla_put_u32(skb, IFLA_LINK, ifindex);
+}
+
+static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ char buf[IFALIASZ];
+ int ret;
+
+ ret = dev_get_alias(dev, buf, sizeof(buf));
+ return ret > 0 ? nla_put_string(skb, IFLA_IFALIAS, buf) : 0;
+}
+
+static int rtnl_fill_link_netnsid(struct sk_buff *skb,
+ const struct net_device *dev,
+ struct net *src_net)
+{
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) {
+ struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
+
+ if (!net_eq(dev_net(dev), link_net)) {
+ int id = peernet2id_alloc(src_net, link_net);
+
+ if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
+ return -EMSGSIZE;
+ }
+ }
+
+ return 0;
+}
+
+static int rtnl_fill_link_af(struct sk_buff *skb,
+ const struct net_device *dev,
+ u32 ext_filter_mask)
+{
+ const struct rtnl_af_ops *af_ops;
+ struct nlattr *af_spec;
+
+ af_spec = nla_nest_start(skb, IFLA_AF_SPEC);
+ if (!af_spec)
+ return -EMSGSIZE;
+
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
+ struct nlattr *af;
+ int err;
+
+ if (!af_ops->fill_link_af)
+ continue;
+
+ af = nla_nest_start(skb, af_ops->family);
+ if (!af)
+ return -EMSGSIZE;
+
+ err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
+ /*
+ * Caller may return ENODATA to indicate that there
+ * was no data to be dumped. This is not an error, it
+ * means we should trim the attribute header and
+ * continue.
+ */
+ if (err == -ENODATA)
+ nla_nest_cancel(skb, af);
+ else if (err < 0)
+ return -EMSGSIZE;
+
+ nla_nest_end(skb, af);
+ }
+
+ nla_nest_end(skb, af_spec);
+ return 0;
+}
+
+static int rtnl_fill_ifinfo(struct sk_buff *skb,
+ struct net_device *dev, struct net *src_net,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask,
- u32 event)
+ u32 event, int *new_nsid, int tgt_netnsid)
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
- struct nlattr *af_spec;
- struct rtnl_af_ops *af_ops;
- struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
ASSERT_RTNL();
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -1354,6 +1451,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
ifm->ifi_flags = dev_get_flags(dev);
ifm->ifi_change = change;
+ if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_IF_NETNSID, tgt_netnsid))
+ goto nla_put_failure;
+
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
nla_put_u8(skb, IFLA_OPERSTATE,
@@ -1368,15 +1468,12 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
#endif
- (dev->ifindex != dev_get_iflink(dev) &&
- nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
- (upper_dev &&
- nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
+ nla_put_iflink(skb, dev) ||
+ put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
(dev->qdisc &&
nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
- (dev->ifalias &&
- nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) ||
+ nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_changes)) ||
nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
@@ -1408,27 +1505,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (rtnl_fill_stats(skb, dev))
goto nla_put_failure;
- if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
- nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
+ if (rtnl_fill_vf(skb, dev, ext_filter_mask))
goto nla_put_failure;
- if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
- ext_filter_mask & RTEXT_FILTER_VF) {
- int i;
- struct nlattr *vfinfo;
- int num_vfs = dev_num_vf(dev->dev.parent);
-
- vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
- if (!vfinfo)
- goto nla_put_failure;
- for (i = 0; i < num_vfs; i++) {
- if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
- goto nla_put_failure;
- }
-
- nla_nest_end(skb, vfinfo);
- }
-
if (rtnl_port_fill(skb, dev, ext_filter_mask))
goto nla_put_failure;
@@ -1440,51 +1519,23 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
}
- if (dev->rtnl_link_ops &&
- dev->rtnl_link_ops->get_link_net) {
- struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
-
- if (!net_eq(dev_net(dev), link_net)) {
- int id = peernet2id_alloc(dev_net(dev), link_net);
-
- if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
- goto nla_put_failure;
- }
- }
-
- if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
+ if (rtnl_fill_link_netnsid(skb, dev, src_net))
goto nla_put_failure;
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
- if (af_ops->fill_link_af) {
- struct nlattr *af;
- int err;
-
- if (!(af = nla_nest_start(skb, af_ops->family)))
- goto nla_put_failure;
-
- err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
-
- /*
- * Caller may return ENODATA to indicate that there
- * was no data to be dumped. This is not an error, it
- * means we should trim the attribute header and
- * continue.
- */
- if (err == -ENODATA)
- nla_nest_cancel(skb, af);
- else if (err < 0)
- goto nla_put_failure;
-
- nla_nest_end(skb, af);
- }
- }
+ if (new_nsid &&
+ nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
+ goto nla_put_failure;
- nla_nest_end(skb, af_spec);
+ rcu_read_lock();
+ if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
+ goto nla_put_failure_rcu;
+ rcu_read_unlock();
nlmsg_end(skb, nlh);
return 0;
+nla_put_failure_rcu:
+ rcu_read_unlock();
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
@@ -1506,7 +1557,10 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_LINKINFO] = { .type = NLA_NESTED },
[IFLA_NET_NS_PID] = { .type = NLA_U32 },
[IFLA_NET_NS_FD] = { .type = NLA_U32 },
- [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
+ /* IFLA_IFALIAS is a string, but policy is set to NLA_BINARY to
+ * allow 0-length string (needed to remove an alias).
+ */
+ [IFLA_IFALIAS] = { .type = NLA_BINARY, .len = IFALIASZ - 1 },
[IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
[IFLA_VF_PORTS] = { .type = NLA_NESTED },
[IFLA_PORT_SELF] = { .type = NLA_NESTED },
@@ -1523,6 +1577,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_XDP] = { .type = NLA_NESTED },
[IFLA_EVENT] = { .type = NLA_U32 },
[IFLA_GROUP] = { .type = NLA_U32 },
+ [IFLA_IF_NETNSID] = { .type = NLA_S32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1626,9 +1681,28 @@ static bool link_dump_filtered(struct net_device *dev,
return false;
}
+static struct net *get_target_net(struct sk_buff *skb, int netnsid)
+{
+ struct net *net;
+
+ net = get_net_ns_by_id(sock_net(skb->sk), netnsid);
+ if (!net)
+ return ERR_PTR(-EINVAL);
+
+ /* For now, the caller is required to have CAP_NET_ADMIN in
+ * the user namespace owning the target net ns.
+ */
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
+ return ERR_PTR(-EACCES);
+ }
+ return net;
+}
+
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ struct net *tgt_net = net;
int h, s_h;
int idx = 0, s_idx;
struct net_device *dev;
@@ -1638,14 +1712,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
const struct rtnl_link_ops *kind_ops = NULL;
unsigned int flags = NLM_F_MULTI;
int master_idx = 0;
+ int netnsid = -1;
int err;
int hdrlen;
s_h = cb->args[0];
s_idx = cb->args[1];
- cb->seq = net->dev_base_seq;
-
/* A hack to preserve kernel<->userspace interface.
* The correct header is ifinfomsg. It is consistent with rtnl_getlink.
* However, before Linux v3.9 the code here assumed rtgenmsg and that's
@@ -1658,6 +1731,15 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX,
ifla_policy, NULL) >= 0) {
+ if (tb[IFLA_IF_NETNSID]) {
+ netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
+ tgt_net = get_target_net(skb, netnsid);
+ if (IS_ERR(tgt_net)) {
+ tgt_net = net;
+ netnsid = -1;
+ }
+ }
+
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1673,17 +1755,19 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
- head = &net->dev_index_head[h];
+ head = &tgt_net->dev_index_head[h];
hlist_for_each_entry(dev, head, index_hlist) {
if (link_dump_filtered(dev, master_idx, kind_ops))
goto cont;
if (idx < s_idx)
goto cont;
- err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+ err = rtnl_fill_ifinfo(skb, dev, net,
+ RTM_NEWLINK,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
flags,
- ext_filter_mask, 0);
+ ext_filter_mask, 0, NULL,
+ netnsid);
if (err < 0) {
if (likely(skb->len))
@@ -1691,8 +1775,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
goto out_err;
}
-
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
@@ -1702,6 +1784,10 @@ out:
out_err:
cb->args[1] = idx;
cb->args[0] = h;
+ cb->seq = net->dev_base_seq;
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ if (netnsid >= 0)
+ put_net(tgt_net);
return err;
}
@@ -1748,17 +1834,27 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
const struct rtnl_af_ops *af_ops;
- if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+ rcu_read_lock();
+ af_ops = rtnl_af_lookup(nla_type(af));
+ if (!af_ops) {
+ rcu_read_unlock();
return -EAFNOSUPPORT;
+ }
- if (!af_ops->set_link_af)
+ if (!af_ops->set_link_af) {
+ rcu_read_unlock();
return -EOPNOTSUPP;
+ }
if (af_ops->validate_link_af) {
err = af_ops->validate_link_af(dev, af);
- if (err < 0)
+ if (err < 0) {
+ rcu_read_unlock();
return err;
+ }
}
+
+ rcu_read_unlock();
}
}
@@ -1934,7 +2030,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
return err;
}
-static int do_set_master(struct net_device *dev, int ifindex)
+static int do_set_master(struct net_device *dev, int ifindex,
+ struct netlink_ext_ack *extack)
{
struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
const struct net_device_ops *ops;
@@ -1959,7 +2056,7 @@ static int do_set_master(struct net_device *dev, int ifindex)
return -EINVAL;
ops = upper_dev->netdev_ops;
if (ops->ndo_add_slave) {
- err = ops->ndo_add_slave(upper_dev, dev);
+ err = ops->ndo_add_slave(upper_dev, dev, extack);
if (err)
return err;
} else {
@@ -2092,7 +2189,7 @@ static int do_setlink(const struct sk_buff *skb,
}
if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
if (err)
goto errout;
status |= DO_SETLINK_MODIFIED;
@@ -2118,7 +2215,7 @@ static int do_setlink(const struct sk_buff *skb,
dev->tx_queue_len = orig_len;
goto errout;
}
- status |= DO_SETLINK_NOTIFY;
+ status |= DO_SETLINK_MODIFIED;
}
}
@@ -2215,13 +2312,17 @@ static int do_setlink(const struct sk_buff *skb,
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
const struct rtnl_af_ops *af_ops;
- if (!(af_ops = rtnl_af_lookup(nla_type(af))))
- BUG();
+ rcu_read_lock();
+
+ BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
err = af_ops->set_link_af(dev, af);
- if (err < 0)
+ if (err < 0) {
+ rcu_read_unlock();
goto errout;
+ }
+ rcu_read_unlock();
status |= DO_SETLINK_NOTIFY;
}
}
@@ -2273,7 +2374,7 @@ static int do_setlink(const struct sk_buff *skb,
errout:
if (status & DO_SETLINK_MODIFIED) {
- if (status & DO_SETLINK_NOTIFY)
+ if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY)
netdev_state_change(dev);
if (err < 0)
@@ -2299,6 +2400,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
+ if (tb[IFLA_IF_NETNSID])
+ return -EOPNOTSUPP;
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
@@ -2393,6 +2497,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (tb[IFLA_IF_NETNSID])
+ return -EOPNOTSUPP;
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
@@ -2524,6 +2631,9 @@ replay:
if (err < 0)
return err;
+ if (tb[IFLA_IF_NETNSID])
+ return -EOPNOTSUPP;
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
@@ -2601,12 +2711,6 @@ replay:
return err;
slave_data = slave_attr;
}
- if (m_ops->slave_validate) {
- err = m_ops->slave_validate(tb, slave_data,
- extack);
- if (err < 0)
- return err;
- }
}
if (dev) {
@@ -2736,7 +2840,8 @@ replay:
goto out_unregister;
}
if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]),
+ extack);
if (err)
goto out_unregister;
}
@@ -2762,11 +2867,13 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
+ struct net *tgt_net = net;
struct ifinfomsg *ifm;
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
struct net_device *dev = NULL;
struct sk_buff *nskb;
+ int netnsid = -1;
int err;
u32 ext_filter_mask = 0;
@@ -2774,35 +2881,50 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (tb[IFLA_IF_NETNSID]) {
+ netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
+ tgt_net = get_target_net(skb, netnsid);
+ if (IS_ERR(tgt_net))
+ return PTR_ERR(tgt_net);
+ }
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+ err = -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
- dev = __dev_get_by_index(net, ifm->ifi_index);
+ dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME])
- dev = __dev_get_by_name(net, ifname);
+ dev = __dev_get_by_name(tgt_net, ifname);
else
- return -EINVAL;
+ goto out;
+ err = -ENODEV;
if (dev == NULL)
- return -ENODEV;
+ goto out;
+ err = -ENOBUFS;
nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
if (nskb == NULL)
- return -ENOBUFS;
+ goto out;
- err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0);
+ err = rtnl_fill_ifinfo(nskb, dev, net,
+ RTM_NEWLINK, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq, 0, 0, ext_filter_mask,
+ 0, NULL, netnsid);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size */
WARN_ON(err == -EMSGSIZE);
kfree_skb(nskb);
} else
err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
+out:
+ if (netnsid >= 0)
+ put_net(tgt_net);
return err;
}
@@ -2831,11 +2953,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
* traverse the list of net devices and compute the minimum
* buffer size based upon the filter mask.
*/
- list_for_each_entry(dev, &net->dev_base_head, dev_list) {
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
if_nlmsg_size(dev,
ext_filter_mask));
}
+ rcu_read_unlock();
return nlmsg_total_size(min_ifinfo_dump_size);
}
@@ -2847,19 +2971,29 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
if (s_idx == 0)
s_idx = 1;
+
for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
int type = cb->nlh->nlmsg_type-RTM_BASE;
+ struct rtnl_link *handlers;
+ rtnl_dumpit_func dumpit;
+
if (idx < s_idx || idx == PF_PACKET)
continue;
- if (rtnl_msg_handlers[idx] == NULL ||
- rtnl_msg_handlers[idx][type].dumpit == NULL)
+
+ handlers = rtnl_dereference(rtnl_msg_handlers[idx]);
+ if (!handlers)
+ continue;
+
+ dumpit = READ_ONCE(handlers[type].dumpit);
+ if (!dumpit)
continue;
+
if (idx > s_idx) {
memset(&cb->args[0], 0, sizeof(cb->args));
cb->prev_seq = 0;
cb->seq = 0;
}
- if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))
+ if (dumpit(skb, cb))
break;
}
cb->family = idx;
@@ -2869,7 +3003,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned int change,
- u32 event, gfp_t flags)
+ u32 event, gfp_t flags, int *new_nsid)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
@@ -2880,7 +3014,9 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
if (skb == NULL)
goto errout;
- err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event);
+ err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
+ type, 0, 0, change, 0, 0, event,
+ new_nsid, -1);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -2903,14 +3039,14 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
static void rtmsg_ifinfo_event(int type, struct net_device *dev,
unsigned int change, u32 event,
- gfp_t flags)
+ gfp_t flags, int *new_nsid)
{
struct sk_buff *skb;
if (dev->reg_state != NETREG_REGISTERED)
return;
- skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags);
+ skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid);
if (skb)
rtmsg_ifinfo_send(skb, dev, flags);
}
@@ -2918,9 +3054,15 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev,
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
gfp_t flags)
{
- rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags);
+ rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL);
+}
+
+void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
+ gfp_t flags, int *new_nsid)
+{
+ rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
+ new_nsid);
}
-EXPORT_SYMBOL(rtmsg_ifinfo);
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
struct net_device *dev,
@@ -3027,21 +3169,21 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
}
EXPORT_SYMBOL(ndo_dflt_fdb_add);
-static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid)
+static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid,
+ struct netlink_ext_ack *extack)
{
u16 vid = 0;
if (vlan_attr) {
if (nla_len(vlan_attr) != sizeof(u16)) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan\n");
+ NL_SET_ERR_MSG(extack, "invalid vlan attribute size");
return -EINVAL;
}
vid = nla_get_u16(vlan_attr);
if (!vid || vid >= VLAN_VID_MASK) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan id %d\n",
- vid);
+ NL_SET_ERR_MSG(extack, "invalid vlan id");
return -EINVAL;
}
}
@@ -3066,24 +3208,24 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex == 0) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n");
+ NL_SET_ERR_MSG(extack, "invalid ifindex");
return -EINVAL;
}
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n");
+ NL_SET_ERR_MSG(extack, "invalid address");
return -EINVAL;
}
addr = nla_data(tb[NDA_LLADDR]);
- err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+ err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
if (err)
return err;
@@ -3170,24 +3312,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex == 0) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n");
+ NL_SET_ERR_MSG(extack, "invalid ifindex");
return -EINVAL;
}
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n");
+ NL_SET_ERR_MSG(extack, "invalid address");
return -EINVAL;
}
addr = nla_data(tb[NDA_LLADDR]);
- err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+ err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
if (err)
return err;
@@ -3627,7 +3769,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
dev = __dev_get_by_index(net, ifm->ifi_index);
if (!dev) {
- pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
@@ -3702,7 +3844,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
dev = __dev_get_by_index(net, ifm->ifi_index);
if (!dev) {
- pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
@@ -3867,6 +4009,9 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
return -EMSGSIZE;
ifsm = nlmsg_data(nlh);
+ ifsm->family = PF_UNSPEC;
+ ifsm->pad1 = 0;
+ ifsm->pad2 = 0;
ifsm->ifindex = dev->ifindex;
ifsm->filter_mask = filter_mask;
@@ -3950,25 +4095,30 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
if (!attr)
goto nla_put_failure;
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->fill_stats_af) {
struct nlattr *af;
int err;
af = nla_nest_start(skb, af_ops->family);
- if (!af)
+ if (!af) {
+ rcu_read_unlock();
goto nla_put_failure;
-
+ }
err = af_ops->fill_stats_af(skb, dev);
- if (err == -ENODATA)
+ if (err == -ENODATA) {
nla_nest_cancel(skb, af);
- else if (err < 0)
+ } else if (err < 0) {
+ rcu_read_unlock();
goto nla_put_failure;
+ }
nla_nest_end(skb, af);
}
}
+ rcu_read_unlock();
nla_nest_end(skb, attr);
@@ -4037,7 +4187,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
/* for IFLA_STATS_AF_SPEC */
size += nla_total_size(0);
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->get_stats_af_size) {
size += nla_total_size(
af_ops->get_stats_af_size(dev));
@@ -4046,6 +4197,7 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
size += nla_total_size(0);
}
}
+ rcu_read_unlock();
}
return size;
@@ -4162,11 +4314,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
+ struct rtnl_link *handlers;
+ int err = -EOPNOTSUPP;
rtnl_doit_func doit;
+ unsigned int flags;
int kind;
int family;
int type;
- int err;
type = nlh->nlmsg_type;
if (type > RTM_MAX)
@@ -4184,20 +4338,40 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
+ if (family >= ARRAY_SIZE(rtnl_msg_handlers))
+ family = PF_UNSPEC;
+
+ rcu_read_lock();
+ handlers = rcu_dereference(rtnl_msg_handlers[family]);
+ if (!handlers) {
+ family = PF_UNSPEC;
+ handlers = rcu_dereference(rtnl_msg_handlers[family]);
+ }
+
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
struct sock *rtnl;
rtnl_dumpit_func dumpit;
- rtnl_calcit_func calcit;
u16 min_dump_alloc = 0;
- dumpit = rtnl_get_dumpit(family, type);
- if (dumpit == NULL)
- return -EOPNOTSUPP;
- calcit = rtnl_get_calcit(family, type);
- if (calcit)
- min_dump_alloc = calcit(skb, nlh);
+ dumpit = READ_ONCE(handlers[type].dumpit);
+ if (!dumpit) {
+ family = PF_UNSPEC;
+ handlers = rcu_dereference(rtnl_msg_handlers[PF_UNSPEC]);
+ if (!handlers)
+ goto err_unlock;
+
+ dumpit = READ_ONCE(handlers[type].dumpit);
+ if (!dumpit)
+ goto err_unlock;
+ }
+
+ refcount_inc(&rtnl_msg_handlers_ref[family]);
+
+ if (type == RTM_GETLINK - RTM_BASE)
+ min_dump_alloc = rtnl_calcit(skb, nlh);
+
+ rcu_read_unlock();
- __rtnl_unlock();
rtnl = net->rtnl;
{
struct netlink_dump_control c = {
@@ -4206,22 +4380,47 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
};
err = netlink_dump_start(rtnl, skb, nlh, &c);
}
- rtnl_lock();
+ refcount_dec(&rtnl_msg_handlers_ref[family]);
return err;
}
- doit = rtnl_get_doit(family, type);
- if (doit == NULL)
- return -EOPNOTSUPP;
+ doit = READ_ONCE(handlers[type].doit);
+ if (!doit) {
+ family = PF_UNSPEC;
+ handlers = rcu_dereference(rtnl_msg_handlers[family]);
+ }
+
+ flags = READ_ONCE(handlers[type].flags);
+ if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
+ refcount_inc(&rtnl_msg_handlers_ref[family]);
+ doit = READ_ONCE(handlers[type].doit);
+ rcu_read_unlock();
+ if (doit)
+ err = doit(skb, nlh, extack);
+ refcount_dec(&rtnl_msg_handlers_ref[family]);
+ return err;
+ }
+
+ rcu_read_unlock();
+
+ rtnl_lock();
+ handlers = rtnl_dereference(rtnl_msg_handlers[family]);
+ if (handlers) {
+ doit = READ_ONCE(handlers[type].doit);
+ if (doit)
+ err = doit(skb, nlh, extack);
+ }
+ rtnl_unlock();
+ return err;
- return doit(skb, nlh, extack);
+err_unlock:
+ rcu_read_unlock();
+ return -EOPNOTSUPP;
}
static void rtnetlink_rcv(struct sk_buff *skb)
{
- rtnl_lock();
netlink_rcv_skb(skb, &rtnetlink_rcv_msg);
- rtnl_unlock();
}
static int rtnetlink_bind(struct net *net, int group)
@@ -4242,15 +4441,20 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
switch (event) {
case NETDEV_REBOOT:
+ case NETDEV_CHANGEMTU:
case NETDEV_CHANGEADDR:
case NETDEV_CHANGENAME:
case NETDEV_FEAT_CHANGE:
case NETDEV_BONDING_FAILOVER:
+ case NETDEV_POST_TYPE_CHANGE:
case NETDEV_NOTIFY_PEERS:
+ case NETDEV_CHANGEUPPER:
case NETDEV_RESEND_IGMP:
case NETDEV_CHANGEINFODATA:
+ case NETDEV_CHANGELOWERSTATE:
+ case NETDEV_CHANGE_TX_QUEUE_LEN:
rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
- GFP_KERNEL);
+ GFP_KERNEL, NULL);
break;
default:
break;
@@ -4294,29 +4498,34 @@ static struct pernet_operations rtnetlink_net_ops = {
void __init rtnetlink_init(void)
{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(rtnl_msg_handlers_ref); i++)
+ refcount_set(&rtnl_msg_handlers_ref[i], 1);
+
if (register_pernet_subsys(&rtnetlink_net_ops))
panic("rtnetlink_init: cannot initialize rtnetlink\n");
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
- rtnl_dump_ifinfo, rtnl_calcit);
- rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, NULL);
+ rtnl_dump_ifinfo, 0);
+ rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0);
+ rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0);
+ rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0);
- rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
- rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
- rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
+ rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
+ rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
+ rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, 0);
- rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
- rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
- rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
+ rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
+ rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0);
+ rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
- NULL);
+ 0);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f990eb8b30a9..6b0ff396fa9d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -41,7 +41,6 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
-#include <linux/kmemcheck.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/in.h>
@@ -158,31 +157,6 @@ out:
*
*/
-struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
-{
- struct sk_buff *skb;
-
- /* Get the HEAD */
- skb = kmem_cache_alloc_node(skbuff_head_cache,
- gfp_mask & ~__GFP_DMA, node);
- if (!skb)
- goto out;
-
- /*
- * Only clear those fields we need to clear, not those that we will
- * actually initialise below. Hence, don't put any more fields after
- * the tail pointer in struct sk_buff!
- */
- memset(skb, 0, offsetof(struct sk_buff, tail));
- skb->head = NULL;
- skb->truesize = sizeof(struct sk_buff);
- refcount_set(&skb->users, 1);
-
- skb->mac_header = (typeof(skb->mac_header))~0U;
-out:
- return skb;
-}
-
/**
* __alloc_skb - allocate a network buffer
* @size: size to allocate
@@ -259,14 +233,12 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
- kmemcheck_annotate_variable(shinfo->destructor_arg);
if (flags & SKB_ALLOC_FCLONE) {
struct sk_buff_fclones *fclones;
fclones = container_of(skb, struct sk_buff_fclones, skb1);
- kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
skb->fclone = SKB_FCLONE_ORIG;
refcount_set(&fclones->fclone_ref, 1);
@@ -326,7 +298,6 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
- kmemcheck_annotate_variable(shinfo->destructor_arg);
return skb;
}
@@ -382,7 +353,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
*/
void *netdev_alloc_frag(unsigned int fragsz)
{
- return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+ return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
}
EXPORT_SYMBOL(netdev_alloc_frag);
@@ -395,7 +366,7 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
void *napi_alloc_frag(unsigned int fragsz)
{
- return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+ return __napi_alloc_frag(fragsz, GFP_ATOMIC);
}
EXPORT_SYMBOL(napi_alloc_frag);
@@ -592,21 +563,10 @@ static void skb_release_data(struct sk_buff *skb)
for (i = 0; i < shinfo->nr_frags; i++)
__skb_frag_unref(&shinfo->frags[i]);
- /*
- * If skb buf is from userspace, we need to notify the caller
- * the lower device DMA has done;
- */
- if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) {
- struct ubuf_info *uarg;
-
- uarg = shinfo->destructor_arg;
- if (uarg->callback)
- uarg->callback(uarg, true);
- }
-
if (shinfo->frag_list)
kfree_skb_list(shinfo->frag_list);
+ skb_zcopy_clear(skb, true);
skb_free_head(skb);
}
@@ -720,14 +680,7 @@ EXPORT_SYMBOL(kfree_skb_list);
*/
void skb_tx_error(struct sk_buff *skb)
{
- if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
- struct ubuf_info *uarg;
-
- uarg = skb_shinfo(skb)->destructor_arg;
- if (uarg->callback)
- uarg->callback(uarg, false);
- skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
- }
+ skb_zcopy_clear(skb, true);
}
EXPORT_SYMBOL(skb_tx_error);
@@ -753,17 +706,13 @@ EXPORT_SYMBOL(consume_skb);
* consume_stateless_skb - free an skbuff, assuming it is stateless
* @skb: buffer to free
*
- * Works like consume_skb(), but this variant assumes that all the head
- * states have been already dropped.
+ * Alike consume_skb(), but this variant assumes that this is the last
+ * skb reference and all the head states have been already dropped
*/
-void consume_stateless_skb(struct sk_buff *skb)
+void __consume_stateless_skb(struct sk_buff *skb)
{
- if (!skb_unref(skb))
- return;
-
trace_consume_skb(skb);
- if (likely(skb->head))
- skb_release_data(skb);
+ skb_release_data(skb);
kfree_skbmem(skb);
}
@@ -941,6 +890,271 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
}
EXPORT_SYMBOL_GPL(skb_morph);
+static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+{
+ unsigned long max_pg, num_pg, new_pg, old_pg;
+ struct user_struct *user;
+
+ if (capable(CAP_IPC_LOCK) || !size)
+ return 0;
+
+ num_pg = (size >> PAGE_SHIFT) + 2; /* worst case */
+ max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ user = mmp->user ? : current_user();
+
+ do {
+ old_pg = atomic_long_read(&user->locked_vm);
+ new_pg = old_pg + num_pg;
+ if (new_pg > max_pg)
+ return -ENOBUFS;
+ } while (atomic_long_cmpxchg(&user->locked_vm, old_pg, new_pg) !=
+ old_pg);
+
+ if (!mmp->user) {
+ mmp->user = get_uid(user);
+ mmp->num_pg = num_pg;
+ } else {
+ mmp->num_pg += num_pg;
+ }
+
+ return 0;
+}
+
+static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+{
+ if (mmp->user) {
+ atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
+ free_uid(mmp->user);
+ }
+}
+
+struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
+{
+ struct ubuf_info *uarg;
+ struct sk_buff *skb;
+
+ WARN_ON_ONCE(!in_task());
+
+ if (!sock_flag(sk, SOCK_ZEROCOPY))
+ return NULL;
+
+ skb = sock_omalloc(sk, 0, GFP_KERNEL);
+ if (!skb)
+ return NULL;
+
+ BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
+ uarg = (void *)skb->cb;
+ uarg->mmp.user = NULL;
+
+ if (mm_account_pinned_pages(&uarg->mmp, size)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ uarg->callback = sock_zerocopy_callback;
+ uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
+ uarg->len = 1;
+ uarg->bytelen = size;
+ uarg->zerocopy = 1;
+ refcount_set(&uarg->refcnt, 1);
+ sock_hold(sk);
+
+ return uarg;
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_alloc);
+
+static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
+{
+ return container_of((void *)uarg, struct sk_buff, cb);
+}
+
+struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
+ struct ubuf_info *uarg)
+{
+ if (uarg) {
+ const u32 byte_limit = 1 << 19; /* limit to a few TSO */
+ u32 bytelen, next;
+
+ /* realloc only when socket is locked (TCP, UDP cork),
+ * so uarg->len and sk_zckey access is serialized
+ */
+ if (!sock_owned_by_user(sk)) {
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+
+ bytelen = uarg->bytelen + size;
+ if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) {
+ /* TCP can create new skb to attach new uarg */
+ if (sk->sk_type == SOCK_STREAM)
+ goto new_alloc;
+ return NULL;
+ }
+
+ next = (u32)atomic_read(&sk->sk_zckey);
+ if ((u32)(uarg->id + uarg->len) == next) {
+ if (mm_account_pinned_pages(&uarg->mmp, size))
+ return NULL;
+ uarg->len++;
+ uarg->bytelen = bytelen;
+ atomic_set(&sk->sk_zckey, ++next);
+ sock_zerocopy_get(uarg);
+ return uarg;
+ }
+ }
+
+new_alloc:
+ return sock_zerocopy_alloc(sk, size);
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_realloc);
+
+static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
+{
+ struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+ u32 old_lo, old_hi;
+ u64 sum_len;
+
+ old_lo = serr->ee.ee_info;
+ old_hi = serr->ee.ee_data;
+ sum_len = old_hi - old_lo + 1ULL + len;
+
+ if (sum_len >= (1ULL << 32))
+ return false;
+
+ if (lo != old_hi + 1)
+ return false;
+
+ serr->ee.ee_data += len;
+ return true;
+}
+
+void sock_zerocopy_callback(struct ubuf_info *uarg, bool success)
+{
+ struct sk_buff *tail, *skb = skb_from_uarg(uarg);
+ struct sock_exterr_skb *serr;
+ struct sock *sk = skb->sk;
+ struct sk_buff_head *q;
+ unsigned long flags;
+ u32 lo, hi;
+ u16 len;
+
+ mm_unaccount_pinned_pages(&uarg->mmp);
+
+ /* if !len, there was only 1 call, and it was aborted
+ * so do not queue a completion notification
+ */
+ if (!uarg->len || sock_flag(sk, SOCK_DEAD))
+ goto release;
+
+ len = uarg->len;
+ lo = uarg->id;
+ hi = uarg->id + len - 1;
+
+ serr = SKB_EXT_ERR(skb);
+ memset(serr, 0, sizeof(*serr));
+ serr->ee.ee_errno = 0;
+ serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
+ serr->ee.ee_data = hi;
+ serr->ee.ee_info = lo;
+ if (!success)
+ serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
+
+ q = &sk->sk_error_queue;
+ spin_lock_irqsave(&q->lock, flags);
+ tail = skb_peek_tail(q);
+ if (!tail || SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY ||
+ !skb_zerocopy_notify_extend(tail, lo, len)) {
+ __skb_queue_tail(q, skb);
+ skb = NULL;
+ }
+ spin_unlock_irqrestore(&q->lock, flags);
+
+ sk->sk_error_report(sk);
+
+release:
+ consume_skb(skb);
+ sock_put(sk);
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_callback);
+
+void sock_zerocopy_put(struct ubuf_info *uarg)
+{
+ if (uarg && refcount_dec_and_test(&uarg->refcnt)) {
+ if (uarg->callback)
+ uarg->callback(uarg, uarg->zerocopy);
+ else
+ consume_skb(skb_from_uarg(uarg));
+ }
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_put);
+
+void sock_zerocopy_put_abort(struct ubuf_info *uarg)
+{
+ if (uarg) {
+ struct sock *sk = skb_from_uarg(uarg)->sk;
+
+ atomic_dec(&sk->sk_zckey);
+ uarg->len--;
+
+ sock_zerocopy_put(uarg);
+ }
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
+
+extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+ struct iov_iter *from, size_t length);
+
+int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
+ struct msghdr *msg, int len,
+ struct ubuf_info *uarg)
+{
+ struct ubuf_info *orig_uarg = skb_zcopy(skb);
+ struct iov_iter orig_iter = msg->msg_iter;
+ int err, orig_len = skb->len;
+
+ /* An skb can only point to one uarg. This edge case happens when
+ * TCP appends to an skb, but zerocopy_realloc triggered a new alloc.
+ */
+ if (orig_uarg && uarg != orig_uarg)
+ return -EEXIST;
+
+ err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
+ if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
+ struct sock *save_sk = skb->sk;
+
+ /* Streams do not free skb on error. Reset to prev state. */
+ msg->msg_iter = orig_iter;
+ skb->sk = sk;
+ ___pskb_trim(skb, orig_len);
+ skb->sk = save_sk;
+ return err;
+ }
+
+ skb_zcopy_set(skb, uarg);
+ return skb->len - orig_len;
+}
+EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
+
+static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
+ gfp_t gfp_mask)
+{
+ if (skb_zcopy(orig)) {
+ if (skb_zcopy(nskb)) {
+ /* !gfp_mask callers are verified to !skb_zcopy(nskb) */
+ if (!gfp_mask) {
+ WARN_ON_ONCE(1);
+ return -ENOMEM;
+ }
+ if (skb_uarg(nskb) == skb_uarg(orig))
+ return 0;
+ if (skb_copy_ubufs(nskb, GFP_ATOMIC))
+ return -EIO;
+ }
+ skb_zcopy_set(nskb, skb_uarg(orig));
+ }
+ return 0;
+}
+
/**
* skb_copy_ubufs - copy userspace skb frags buffers to kernel
* @skb: the skb to modify
@@ -958,15 +1172,19 @@ EXPORT_SYMBOL_GPL(skb_morph);
*/
int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
{
- int i;
int num_frags = skb_shinfo(skb)->nr_frags;
struct page *page, *head = NULL;
- struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg;
+ int i, new_frags;
+ u32 d_off;
- for (i = 0; i < num_frags; i++) {
- u8 *vaddr;
- skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+ if (!num_frags)
+ return 0;
+ if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
+ return -EINVAL;
+
+ new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ for (i = 0; i < new_frags; i++) {
page = alloc_page(gfp_mask);
if (!page) {
while (head) {
@@ -976,28 +1194,51 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
}
return -ENOMEM;
}
- vaddr = kmap_atomic(skb_frag_page(f));
- memcpy(page_address(page),
- vaddr + f->page_offset, skb_frag_size(f));
- kunmap_atomic(vaddr);
set_page_private(page, (unsigned long)head);
head = page;
}
+ page = head;
+ d_off = 0;
+ for (i = 0; i < num_frags; i++) {
+ skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+ u32 p_off, p_len, copied;
+ struct page *p;
+ u8 *vaddr;
+
+ skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f),
+ p, p_off, p_len, copied) {
+ u32 copy, done = 0;
+ vaddr = kmap_atomic(p);
+
+ while (done < p_len) {
+ if (d_off == PAGE_SIZE) {
+ d_off = 0;
+ page = (struct page *)page_private(page);
+ }
+ copy = min_t(u32, PAGE_SIZE - d_off, p_len - done);
+ memcpy(page_address(page) + d_off,
+ vaddr + p_off + done, copy);
+ done += copy;
+ d_off += copy;
+ }
+ kunmap_atomic(vaddr);
+ }
+ }
+
/* skb frags release userspace buffers */
for (i = 0; i < num_frags; i++)
skb_frag_unref(skb, i);
- uarg->callback(uarg, false);
-
/* skb frags point to kernel buffers */
- for (i = num_frags - 1; i >= 0; i--) {
- __skb_fill_page_desc(skb, i, head, 0,
- skb_shinfo(skb)->frags[i].size);
+ for (i = 0; i < new_frags - 1; i++) {
+ __skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE);
head = (struct page *)page_private(head);
}
+ __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
+ skb_shinfo(skb)->nr_frags = new_frags;
- skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
+ skb_zcopy_clear(skb, false);
return 0;
}
EXPORT_SYMBOL_GPL(skb_copy_ubufs);
@@ -1038,7 +1279,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
if (!n)
return NULL;
- kmemcheck_annotate_bitfield(n, flags1);
n->fclone = SKB_FCLONE_UNAVAILABLE;
}
@@ -1109,8 +1349,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
/* Set the tail pointer and length */
skb_put(n, skb->len);
- if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
- BUG();
+ BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
copy_skb_header(n, skb);
return n;
@@ -1158,7 +1397,8 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
if (skb_shinfo(skb)->nr_frags) {
int i;
- if (skb_orphan_frags(skb, gfp_mask)) {
+ if (skb_orphan_frags(skb, gfp_mask) ||
+ skb_zerocopy_clone(n, skb, gfp_mask)) {
kfree_skb(n);
n = NULL;
goto out;
@@ -1207,8 +1447,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
BUG_ON(nhead < 0);
- if (skb_shared(skb))
- BUG();
+ BUG_ON(skb_shared(skb));
size = SKB_DATA_ALIGN(size);
@@ -1235,9 +1474,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
* be since all we did is relocate the values
*/
if (skb_cloned(skb)) {
- /* copy this zero copy skb frags */
if (skb_orphan_frags(skb, gfp_mask))
goto nofrags;
+ if (skb_zcopy(skb))
+ refcount_inc(&skb_uarg(skb)->refcnt);
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
skb_frag_ref(skb, i);
@@ -1266,6 +1506,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
+ skb_metadata_clear(skb);
+
/* It is not generally safe to change skb->truesize.
* For the moment, we really care of rx path, or
* when skb is orphaned (not attached to a socket).
@@ -1350,9 +1592,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
head_copy_off = newheadroom - head_copy_len;
/* Copy the linear header and data. */
- if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
- skb->len + head_copy_len))
- BUG();
+ BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
+ skb->len + head_copy_len));
copy_skb_header(n, skb);
@@ -1363,18 +1604,20 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
EXPORT_SYMBOL(skb_copy_expand);
/**
- * skb_pad - zero pad the tail of an skb
+ * __skb_pad - zero pad the tail of an skb
* @skb: buffer to pad
* @pad: space to pad
+ * @free_on_error: free buffer on error
*
* Ensure that a buffer is followed by a padding area that is zero
* filled. Used by network drivers which may DMA or transfer data
* beyond the buffer end onto the wire.
*
- * May return error in out of memory cases. The skb is freed on error.
+ * May return error in out of memory cases. The skb is freed on error
+ * if @free_on_error is true.
*/
-int skb_pad(struct sk_buff *skb, int pad)
+int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error)
{
int err;
int ntail;
@@ -1403,10 +1646,11 @@ int skb_pad(struct sk_buff *skb, int pad)
return 0;
free_skb:
- kfree_skb(skb);
+ if (free_on_error)
+ kfree_skb(skb);
return err;
}
-EXPORT_SYMBOL(skb_pad);
+EXPORT_SYMBOL(__skb_pad);
/**
* pskb_put - add data to the tail of a potentially fragmented buffer
@@ -1630,8 +1874,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
return NULL;
}
- if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
- BUG();
+ BUG_ON(skb_copy_bits(skb, skb_headlen(skb),
+ skb_tail_pointer(skb), delta));
/* Optimization: no fragments, no reasons to preestimate
* size of pulled pages. Superb.
@@ -1650,7 +1894,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
}
/* If we need update frag list, we are in troubles.
- * Certainly, it possible to add an offset to skb data,
+ * Certainly, it is possible to add an offset to skb data,
* but taking into account that pulling is expected to
* be very rare operation, it is worth to fight against
* further bloating skb head and crucify ourselves here instead.
@@ -1719,6 +1963,8 @@ pull_pages:
if (eat) {
skb_shinfo(skb)->frags[k].page_offset += eat;
skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
+ if (!i)
+ goto end;
eat = 0;
}
k++;
@@ -1726,9 +1972,13 @@ pull_pages:
}
skb_shinfo(skb)->nr_frags = k;
+end:
skb->tail += delta;
skb->data_len -= delta;
+ if (!skb->data_len)
+ skb_zcopy_clear(skb, false);
+
return skb_tail_pointer(skb);
}
EXPORT_SYMBOL(__pskb_pull_tail);
@@ -1776,16 +2026,20 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
end = start + skb_frag_size(f);
if ((copy = end - offset) > 0) {
+ u32 p_off, p_len, copied;
+ struct page *p;
u8 *vaddr;
if (copy > len)
copy = len;
- vaddr = kmap_atomic(skb_frag_page(f));
- memcpy(to,
- vaddr + f->page_offset + offset - start,
- copy);
- kunmap_atomic(vaddr);
+ skb_frag_foreach_page(f,
+ f->page_offset + offset - start,
+ copy, p, p_off, p_len, copied) {
+ vaddr = kmap_atomic(p);
+ memcpy(to + copied, vaddr + p_off, p_len);
+ kunmap_atomic(vaddr);
+ }
if ((len -= copy) == 0)
return 0;
@@ -2005,6 +2259,107 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
}
EXPORT_SYMBOL_GPL(skb_splice_bits);
+/* Send skb data on a socket. Socket must be locked. */
+int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
+ int len)
+{
+ unsigned int orig_len = len;
+ struct sk_buff *head = skb;
+ unsigned short fragidx;
+ int slen, ret;
+
+do_frag_list:
+
+ /* Deal with head data */
+ while (offset < skb_headlen(skb) && len) {
+ struct kvec kv;
+ struct msghdr msg;
+
+ slen = min_t(int, len, skb_headlen(skb) - offset);
+ kv.iov_base = skb->data + offset;
+ kv.iov_len = slen;
+ memset(&msg, 0, sizeof(msg));
+
+ ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen);
+ if (ret <= 0)
+ goto error;
+
+ offset += ret;
+ len -= ret;
+ }
+
+ /* All the data was skb head? */
+ if (!len)
+ goto out;
+
+ /* Make offset relative to start of frags */
+ offset -= skb_headlen(skb);
+
+ /* Find where we are in frag list */
+ for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
+
+ if (offset < frag->size)
+ break;
+
+ offset -= frag->size;
+ }
+
+ for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
+
+ slen = min_t(size_t, len, frag->size - offset);
+
+ while (slen) {
+ ret = kernel_sendpage_locked(sk, frag->page.p,
+ frag->page_offset + offset,
+ slen, MSG_DONTWAIT);
+ if (ret <= 0)
+ goto error;
+
+ len -= ret;
+ offset += ret;
+ slen -= ret;
+ }
+
+ offset = 0;
+ }
+
+ if (len) {
+ /* Process any frag lists */
+
+ if (skb == head) {
+ if (skb_has_frag_list(skb)) {
+ skb = skb_shinfo(skb)->frag_list;
+ goto do_frag_list;
+ }
+ } else if (skb->next) {
+ skb = skb->next;
+ goto do_frag_list;
+ }
+ }
+
+out:
+ return orig_len - len;
+
+error:
+ return orig_len == len ? ret : orig_len - len;
+}
+EXPORT_SYMBOL_GPL(skb_send_sock_locked);
+
+/* Send skb data on a socket. */
+int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
+{
+ int ret = 0;
+
+ lock_sock(sk);
+ ret = skb_send_sock_locked(sk, skb, offset, len);
+ release_sock(sk);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(skb_send_sock);
+
/**
* skb_store_bits - store bits from kernel buffer to skb
* @skb: destination buffer
@@ -2044,15 +2399,20 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
+ u32 p_off, p_len, copied;
+ struct page *p;
u8 *vaddr;
if (copy > len)
copy = len;
- vaddr = kmap_atomic(skb_frag_page(frag));
- memcpy(vaddr + frag->page_offset + offset - start,
- from, copy);
- kunmap_atomic(vaddr);
+ skb_frag_foreach_page(frag,
+ frag->page_offset + offset - start,
+ copy, p, p_off, p_len, copied) {
+ vaddr = kmap_atomic(p);
+ memcpy(vaddr + p_off, from + copied, p_len);
+ kunmap_atomic(vaddr);
+ }
if ((len -= copy) == 0)
return 0;
@@ -2117,20 +2477,27 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
+ u32 p_off, p_len, copied;
+ struct page *p;
__wsum csum2;
u8 *vaddr;
if (copy > len)
copy = len;
- vaddr = kmap_atomic(skb_frag_page(frag));
- csum2 = ops->update(vaddr + frag->page_offset +
- offset - start, copy, 0);
- kunmap_atomic(vaddr);
- csum = ops->combine(csum, csum2, pos, copy);
+
+ skb_frag_foreach_page(frag,
+ frag->page_offset + offset - start,
+ copy, p, p_off, p_len, copied) {
+ vaddr = kmap_atomic(p);
+ csum2 = ops->update(vaddr + p_off, p_len, 0);
+ kunmap_atomic(vaddr);
+ csum = ops->combine(csum, csum2, pos, p_len);
+ pos += p_len;
+ }
+
if (!(len -= copy))
return csum;
offset += copy;
- pos += copy;
}
start = end;
}
@@ -2203,24 +2570,31 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
if ((copy = end - offset) > 0) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ u32 p_off, p_len, copied;
+ struct page *p;
__wsum csum2;
u8 *vaddr;
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
if (copy > len)
copy = len;
- vaddr = kmap_atomic(skb_frag_page(frag));
- csum2 = csum_partial_copy_nocheck(vaddr +
- frag->page_offset +
- offset - start, to,
- copy, 0);
- kunmap_atomic(vaddr);
- csum = csum_block_add(csum, csum2, pos);
+
+ skb_frag_foreach_page(frag,
+ frag->page_offset + offset - start,
+ copy, p, p_off, p_len, copied) {
+ vaddr = kmap_atomic(p);
+ csum2 = csum_partial_copy_nocheck(vaddr + p_off,
+ to + copied,
+ p_len, 0);
+ kunmap_atomic(vaddr);
+ csum = csum_block_add(csum, csum2, pos);
+ pos += p_len;
+ }
+
if (!(len -= copy))
return csum;
offset += copy;
to += copy;
- pos += copy;
}
start = end;
}
@@ -2360,6 +2734,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
skb_tx_error(from);
return -ENOMEM;
}
+ skb_zerocopy_clone(to, from, GFP_ATOMIC);
for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
if (!len)
@@ -2471,12 +2846,15 @@ EXPORT_SYMBOL(skb_queue_purge);
*/
void skb_rbtree_purge(struct rb_root *root)
{
- struct sk_buff *skb, *next;
+ struct rb_node *p = rb_first(root);
- rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode)
- kfree_skb(skb);
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
- *root = RB_ROOT;
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
+ kfree_skb(skb);
+ }
}
/**
@@ -2657,6 +3035,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags &
SKBTX_SHARED_FRAG;
+ skb_zerocopy_clone(skb1, skb, 0);
if (len < pos) /* Split line is inside header. */
skb_split_inside_header(skb, skb1, len, pos);
else /* Second chunk has no header, nothing to copy. */
@@ -2700,6 +3079,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
if (skb_headlen(skb))
return 0;
+ if (skb_zcopy(tgt) || skb_zcopy(skb))
+ return 0;
todo = shiftlen;
from = 0;
@@ -3273,6 +3654,8 @@ normal:
skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
SKBTX_SHARED_FRAG;
+ if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
+ goto err;
while (pos < offset + len) {
if (i >= nfrags) {
@@ -4380,6 +4763,7 @@ EXPORT_SYMBOL(kfree_skb_partial);
bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
bool *fragstolen, int *delta_truesize)
{
+ struct skb_shared_info *to_shinfo, *from_shinfo;
int i, delta, len = from->len;
*fragstolen = false;
@@ -4394,15 +4778,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
return true;
}
- if (skb_has_frag_list(to) || skb_has_frag_list(from))
+ to_shinfo = skb_shinfo(to);
+ from_shinfo = skb_shinfo(from);
+ if (to_shinfo->frag_list || from_shinfo->frag_list)
+ return false;
+ if (skb_zcopy(to) || skb_zcopy(from))
return false;
if (skb_headlen(from) != 0) {
struct page *page;
unsigned int offset;
- if (skb_shinfo(to)->nr_frags +
- skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+ if (to_shinfo->nr_frags +
+ from_shinfo->nr_frags >= MAX_SKB_FRAGS)
return false;
if (skb_head_is_locked(from))
@@ -4413,12 +4801,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
- skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
+ skb_fill_page_desc(to, to_shinfo->nr_frags,
page, offset, skb_headlen(from));
*fragstolen = true;
} else {
- if (skb_shinfo(to)->nr_frags +
- skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS)
+ if (to_shinfo->nr_frags +
+ from_shinfo->nr_frags > MAX_SKB_FRAGS)
return false;
delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
@@ -4426,19 +4814,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
WARN_ON_ONCE(delta < len);
- memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
- skb_shinfo(from)->frags,
- skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
- skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
+ memcpy(to_shinfo->frags + to_shinfo->nr_frags,
+ from_shinfo->frags,
+ from_shinfo->nr_frags * sizeof(skb_frag_t));
+ to_shinfo->nr_frags += from_shinfo->nr_frags;
if (!skb_cloned(from))
- skb_shinfo(from)->nr_frags = 0;
+ from_shinfo->nr_frags = 0;
/* if the skb is not cloned this does nothing
* since we set nr_frags to 0.
*/
- for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
- skb_frag_ref(from, i);
+ for (i = 0; i < from_shinfo->nr_frags; i++)
+ __skb_frag_ref(&from_shinfo->frags[i]);
to->truesize += delta;
to->len += len;
@@ -4476,6 +4864,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
if (!xnet)
return;
+ ipvs_reset(skb);
skb_orphan(skb);
skb->mark = 0;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index ac2a404c73eb..c0b5b2f17412 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -307,16 +307,6 @@ static struct lock_class_key af_wlock_keys[AF_MAX];
static struct lock_class_key af_elock_keys[AF_MAX];
static struct lock_class_key af_kern_callback_keys[AF_MAX];
-/* Take into consideration the size of the struct sk_buff overhead in the
- * determination of these values, since that is non-constant across
- * platforms. This makes socket queueing behavior and performance
- * not depend upon such differences.
- */
-#define _SK_MEM_PACKETS 256
-#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
-#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
-#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
-
/* Run time adjustable parameters. */
__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
EXPORT_SYMBOL(sysctl_wmem_max);
@@ -1055,6 +1045,20 @@ set_rcvbuf:
if (val == 1)
dst_negative_advice(sk);
break;
+
+ case SO_ZEROCOPY:
+ if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+ ret = -ENOTSUPP;
+ else if (sk->sk_protocol != IPPROTO_TCP)
+ ret = -ENOTSUPP;
+ else if (sk->sk_state != TCP_CLOSE)
+ ret = -EBUSY;
+ else if (val < 0 || val > 1)
+ ret = -EINVAL;
+ else
+ sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1383,6 +1387,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val64 = sock_gen_cookie(sk);
break;
+ case SO_ZEROCOPY:
+ v.val = sock_flag(sk, SOCK_ZEROCOPY);
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -1461,8 +1469,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
sk = kmalloc(prot->obj_size, priority);
if (sk != NULL) {
- kmemcheck_annotate_bitfield(sk, flags);
-
if (security_sk_alloc(sk, family, priority))
goto out_free;
@@ -1646,6 +1652,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
sock_copy(newsk, sk);
+ newsk->sk_prot_creator = sk->sk_prot;
+
/* SANITY */
if (likely(newsk->sk_net_refcnt))
get_net(sock_net(newsk));
@@ -1667,19 +1675,28 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_dst_pending_confirm = 0;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
+
+ /* sk->sk_memcg will be populated at accept() time */
+ newsk->sk_memcg = NULL;
+
atomic_set(&newsk->sk_drops, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+ atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
+ cgroup_sk_alloc(&newsk->sk_cgrp_data);
- filter = rcu_dereference_protected(newsk->sk_filter, 1);
+ rcu_read_lock();
+ filter = rcu_dereference(sk->sk_filter);
if (filter != NULL)
/* though it's an empty new sock, the charging may fail
* if sysctl_optmem_max was changed between creation of
* original socket and cloning
*/
is_charged = sk_filter_charge(newsk, filter);
+ RCU_INIT_POINTER(newsk->sk_filter, filter);
+ rcu_read_unlock();
if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
/* We need to make sure that we don't uncharge the new
@@ -1700,9 +1717,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
- mem_cgroup_sk_alloc(newsk);
- cgroup_sk_alloc(&newsk->sk_cgrp_data);
-
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -1757,7 +1771,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
sk->sk_route_caps &= ~sk->sk_route_nocaps;
if (sk_can_gso(sk)) {
- if (dst->header_len) {
+ if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
} else {
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
@@ -1923,6 +1937,33 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
}
EXPORT_SYMBOL(sock_wmalloc);
+static void sock_ofree(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+
+ atomic_sub(skb->truesize, &sk->sk_omem_alloc);
+}
+
+struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+ gfp_t priority)
+{
+ struct sk_buff *skb;
+
+ /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
+ if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
+ sysctl_optmem_max)
+ return NULL;
+
+ skb = alloc_skb(size, priority);
+ if (!skb)
+ return NULL;
+
+ atomic_add(skb->truesize, &sk->sk_omem_alloc);
+ skb->sk = sk;
+ skb->destructor = sock_ofree;
+ return skb;
+}
+
/*
* Allocate a memory block from the socket's option memory buffer.
*/
@@ -2303,16 +2344,18 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
/* guarantee minimum buffer size under pressure */
if (kind == SK_MEM_RECV) {
- if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
+ if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
return 1;
} else { /* SK_MEM_SEND */
+ int wmem0 = sk_get_wmem0(sk, prot);
+
if (sk->sk_type == SOCK_STREAM) {
- if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
+ if (sk->sk_wmem_queued < wmem0)
return 1;
- } else if (refcount_read(&sk->sk_wmem_alloc) <
- prot->sysctl_wmem[0])
+ } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
return 1;
+ }
}
if (sk_has_memory_pressure(sk)) {
@@ -2408,9 +2451,6 @@ EXPORT_SYMBOL(__sk_mem_reclaim);
int sk_set_peek_off(struct sock *sk, int val)
{
- if (val < 0)
- return -EINVAL;
-
sk->sk_peek_off = val;
return 0;
}
@@ -2500,6 +2540,12 @@ int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
}
EXPORT_SYMBOL(sock_no_sendmsg);
+int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
+{
+ return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_sendmsg_locked);
+
int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
int flags)
{
@@ -2528,6 +2574,22 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz
}
EXPORT_SYMBOL(sock_no_sendpage);
+ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags)
+{
+ ssize_t res;
+ struct msghdr msg = {.msg_flags = flags};
+ struct kvec iov;
+ char *kaddr = kmap(page);
+
+ iov.iov_base = kaddr + offset;
+ iov.iov_len = size;
+ res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
+ kunmap(page);
+ return res;
+}
+EXPORT_SYMBOL(sock_no_sendpage_locked);
+
/*
* Default Socket Callbacks
*/
@@ -2623,7 +2685,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk_init_common(sk);
sk->sk_send_head = NULL;
- init_timer(&sk->sk_timer);
+ timer_setup(&sk->sk_timer, NULL, 0);
sk->sk_allocation = GFP_KERNEL;
sk->sk_rcvbuf = sysctl_rmem_default;
@@ -2673,6 +2735,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_stamp = SK_DEFAULT_STAMP;
+ atomic_set(&sk->sk_zckey, 0);
#ifdef CONFIG_NET_RX_BUSY_POLL
sk->sk_napi_id = 0;
@@ -2681,6 +2744,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_max_pacing_rate = ~0U;
sk->sk_pacing_rate = ~0U;
+ sk->sk_pacing_shift = 10;
sk->sk_incoming_cpu = -1;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
@@ -2979,7 +3043,6 @@ struct prot_inuse {
static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
-#ifdef CONFIG_NET_NS
void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
{
__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
@@ -3023,27 +3086,6 @@ static __init int net_inuse_init(void)
}
core_initcall(net_inuse_init);
-#else
-static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
-
-void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
-{
- __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
-}
-EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
-
-int sock_prot_inuse_get(struct net *net, struct proto *prot)
-{
- int cpu, idx = prot->inuse_idx;
- int res = 0;
-
- for_each_possible_cpu(cpu)
- res += per_cpu(prot_inuse, cpu).val[idx];
-
- return res >= 0 ? res : 0;
-}
-EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
-#endif
static void assign_proto_idx(struct proto *prot)
{
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index eed1ebf7f29d..5eeb1d20cc38 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* To speed up listener socket lookup, create an array to store all sockets
* listening on the same port. This allows a decision to be made after finding
@@ -36,9 +37,14 @@ int reuseport_alloc(struct sock *sk)
* soft irq of receive path or setsockopt from process context
*/
spin_lock_bh(&reuseport_lock);
- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- "multiple allocations for the same socket");
+
+ /* Allocation attempts can occur concurrently via the setsockopt path
+ * and the bind/hash path. Nothing to do when we lose the race.
+ */
+ if (rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock)))
+ goto out;
+
reuse = __reuseport_alloc(INIT_SOCKS);
if (!reuse) {
spin_unlock_bh(&reuseport_lock);
@@ -49,6 +55,7 @@ int reuseport_alloc(struct sock *sk)
reuse->num_socks = 1;
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+out:
spin_unlock_bh(&reuseport_lock);
return 0;
diff --git a/net/core/stream.c b/net/core/stream.c
index 20231dbb1da0..1cff9c6270c6 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* SUCS NET3:
*
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b7cd9aafe99e..cbc3dde4cfcc 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* -*- linux-c -*-
* sysctl_net_core.c: sysctl interface to net core subsystem.
*
diff --git a/net/core/tso.c b/net/core/tso.c
index 5dca7ce8ee9f..43f4eba61933 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include <linux/if_vlan.h>
#include <net/ip.h>
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 733f523707ac..bae7d78aa068 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1938,8 +1938,8 @@ static int __init dcbnl_init(void)
{
INIT_LIST_HEAD(&dcb_app_list);
- rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0);
return 0;
}
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 5c8362b037ed..2e7b56097bc4 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index e1295d5f2c56..1c75cd1255f6 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -126,10 +126,10 @@ static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
DCCPF_SEQ_WMAX));
}
-static void ccid2_hc_tx_rto_expire(unsigned long data)
+static void ccid2_hc_tx_rto_expire(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+ struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer);
+ struct sock *sk = hc->sk;
const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
bh_lock_sock(sk);
@@ -733,8 +733,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
hc->tx_rpdupack = -1;
hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32;
hc->tx_cwnd_used = 0;
- setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
- (unsigned long)sk);
+ hc->sk = sk;
+ timer_setup(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 0);
INIT_LIST_HEAD(&hc->tx_av_chunks);
return 0;
}
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 6e50ef2898fb..1af0116dc6ce 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -85,6 +85,7 @@ struct ccid2_hc_tx_sock {
tx_rto;
u64 tx_rtt_seq:48;
struct timer_list tx_rtotimer;
+ struct sock *sk;
/* Congestion Window validation (optional, RFC 2861) */
u32 tx_cwnd_used,
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 119c04317d48..8b5ba6dffac7 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -195,10 +195,10 @@ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hc,
}
}
-static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
+static void ccid3_hc_tx_no_feedback_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
- struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
+ struct ccid3_hc_tx_sock *hc = from_timer(hc, t, tx_no_feedback_timer);
+ struct sock *sk = hc->sk;
unsigned long t_nfb = USEC_PER_SEC / 5;
bh_lock_sock(sk);
@@ -505,8 +505,9 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
hc->tx_state = TFRC_SSTATE_NO_SENT;
hc->tx_hist = NULL;
- setup_timer(&hc->tx_no_feedback_timer,
- ccid3_hc_tx_no_feedback_timer, (unsigned long)sk);
+ hc->sk = sk;
+ timer_setup(&hc->tx_no_feedback_timer,
+ ccid3_hc_tx_no_feedback_timer, 0);
return 0;
}
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 1a9933c29672..813d91c6e1e2 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -106,6 +106,7 @@ struct ccid3_hc_tx_sock {
u8 tx_last_win_count;
ktime_t tx_t_last_win_count;
struct timer_list tx_no_feedback_timer;
+ struct sock *sk;
ktime_t tx_t_ld;
ktime_t tx_t_nom;
struct tfrc_tx_hist_entry *tx_hist;
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 08df7a3acb3d..876e18592d71 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -149,10 +149,8 @@ static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
{
const u8 idx_a = tfrc_rx_hist_index(h, a),
idx_b = tfrc_rx_hist_index(h, b);
- struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
- h->ring[idx_a] = h->ring[idx_b];
- h->ring[idx_b] = tmp;
+ swap(h->ring[idx_a], h->ring[idx_b]);
}
/*
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index 62b5828acde0..d7f265e1f50c 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* TFRC library initialisation
*
diff --git a/net/dccp/input.c b/net/dccp/input.c
index fa6be9750bb4..d28d46bff6ab 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -534,6 +534,7 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
case DCCP_PKT_DATA:
if (sk->sk_state == DCCP_RESPOND)
break;
+ /* fall through */
case DCCP_PKT_DATAACK:
case DCCP_PKT_ACK:
/*
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 1b202f16531f..e65fcb45c3f6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
sk = __inet_lookup_established(net, &dccp_hashinfo,
iph->daddr, dh->dccph_dport,
iph->saddr, ntohs(dh->dccph_sport),
- inet_iif(skb));
+ inet_iif(skb), 0);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
@@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
sk_daddr_set(newsk, ireq->ir_rmt_addr);
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
newinet->inet_saddr = ireq->ir_loc_addr;
- newinet->inet_opt = ireq->opt;
- ireq->opt = NULL;
+ RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->inet_id = jiffies;
@@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
-
+ if (*own_req)
+ ireq->ireq_opt = NULL;
+ else
+ newinet->inet_opt = NULL;
return newsk;
exit_overflow:
@@ -441,6 +443,7 @@ exit:
__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
return NULL;
put_and_exit:
+ newinet->inet_opt = NULL;
inet_csk_prepare_forced_close(newsk);
dccp_done(newsk);
goto exit;
@@ -492,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
ireq->ir_rmt_addr);
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq->opt);
+ ireq_opt_deref(ireq));
err = net_xmit_eval(err);
}
@@ -548,7 +551,7 @@ out:
static void dccp_v4_reqsk_destructor(struct request_sock *req)
{
dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
- kfree(inet_rsk(req)->opt);
+ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
void dccp_syn_ack_timeout(const struct request_sock *req)
@@ -804,7 +807,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
lookup:
sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
- dh->dccph_sport, dh->dccph_dport, &refcounted);
+ dh->dccph_sport, dh->dccph_dport, 0, &refcounted);
if (!sk) {
dccp_pr_debug("failed to look up flow ID in table and "
"get corresponding socket\n");
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 1b58eac8aad3..5df7857fc0f3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -16,6 +16,7 @@
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/xfrm.h>
+#include <linux/string.h>
#include <net/addrconf.h>
#include <net/inet_common.h>
@@ -30,6 +31,7 @@
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
#include <net/secure_seq.h>
+#include <net/sock.h>
#include "dccp.h"
#include "ipv6.h"
@@ -89,7 +91,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk = __inet6_lookup_established(net, &dccp_hashinfo,
&hdr->daddr, dh->dccph_dport,
&hdr->saddr, ntohs(dh->dccph_sport),
- inet6_iif(skb));
+ inet6_iif(skb), 0);
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
@@ -597,19 +599,13 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- /*
- * FIXME: Add handling of IPV6_PKTOPTIONS skb. See the comments below
- * (wrt ipv6_pktopions) and net/ipv6/tcp_ipv6.c for an example.
- */
opt_skb = skb_clone(skb, GFP_ATOMIC);
if (sk->sk_state == DCCP_OPEN) { /* Fast path */
if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
goto reset;
- if (opt_skb) {
- /* XXX This is where we would goto ipv6_pktoptions. */
- __kfree_skb(opt_skb);
- }
+ if (opt_skb)
+ goto ipv6_pktoptions;
return 0;
}
@@ -640,10 +636,8 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
goto reset;
- if (opt_skb) {
- /* XXX This is where we would goto ipv6_pktoptions. */
- __kfree_skb(opt_skb);
- }
+ if (opt_skb)
+ goto ipv6_pktoptions;
return 0;
reset:
@@ -653,6 +647,35 @@ discard:
__kfree_skb(opt_skb);
kfree_skb(skb);
return 0;
+
+/* Handling IPV6_PKTOPTIONS skb the similar
+ * way it's done for net/ipv6/tcp_ipv6.c
+ */
+ipv6_pktoptions:
+ if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) {
+ if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
+ np->mcast_oif = inet6_iif(opt_skb);
+ if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
+ np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+ if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
+ np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
+ if (np->repflow)
+ np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
+ if (ipv6_opt_accepted(sk, opt_skb,
+ &DCCP_SKB_CB(opt_skb)->header.h6)) {
+ skb_set_owner_r(opt_skb, sk);
+ memmove(IP6CB(opt_skb),
+ &DCCP_SKB_CB(opt_skb)->header.h6,
+ sizeof(struct inet6_skb_parm));
+ opt_skb = xchg(&np->pktoptions, opt_skb);
+ } else {
+ __kfree_skb(opt_skb);
+ opt_skb = xchg(&np->pktoptions, NULL);
+ }
+ }
+
+ kfree_skb(opt_skb);
+ return 0;
}
static int dccp_v6_rcv(struct sk_buff *skb)
@@ -687,7 +710,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
lookup:
sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
dh->dccph_sport, dh->dccph_dport,
- inet6_iif(skb), &refcounted);
+ inet6_iif(skb), 0, &refcounted);
if (!sk) {
dccp_pr_debug("failed to look up flow ID in table and "
"get corresponding socket\n");
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 51cdfc3bd8ca..4e40db017e19 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -227,8 +227,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
* Ack vectors are processed by the TX CCID if it is
* interested. The RX CCID need not parse Ack Vectors,
* since it is only interested in clearing old state.
- * Fall through.
*/
+ /* fall through */
case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
pkt_type, opt, value, len))
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 86bc40ba6ba5..b68168fcc06a 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -24,6 +24,7 @@
#include <net/checksum.h>
#include <net/inet_sock.h>
+#include <net/inet_common.h>
#include <net/sock.h>
#include <net/xfrm.h>
@@ -170,6 +171,15 @@ const char *dccp_packet_name(const int type)
EXPORT_SYMBOL_GPL(dccp_packet_name);
+static void dccp_sk_destruct(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+ dp->dccps_hc_tx_ccid = NULL;
+ inet_sock_destruct(sk);
+}
+
int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
{
struct dccp_sock *dp = dccp_sk(sk);
@@ -179,6 +189,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
icsk->icsk_syn_retries = sysctl_dccp_request_retries;
sk->sk_state = DCCP_CLOSED;
sk->sk_write_space = dccp_write_space;
+ sk->sk_destruct = dccp_sk_destruct;
icsk->icsk_sync_mss = dccp_sync_mss;
dp->dccps_mss_cache = 536;
dp->dccps_rate_last = jiffies;
@@ -219,8 +230,7 @@ void dccp_destroy_sock(struct sock *sk)
dp->dccps_hc_rx_ackvec = NULL;
}
ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
- ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
- dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+ dp->dccps_hc_rx_ccid = NULL;
/* clean up feature negotiation state */
dccp_feat_list_purge(&dp->dccps_featneg);
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 3a2c34027758..b50a8732ff43 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -125,10 +125,11 @@ static void dccp_retransmit_timer(struct sock *sk)
__sk_dst_reset(sk);
}
-static void dccp_write_timer(unsigned long data)
+static void dccp_write_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
- struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_connection_sock *icsk =
+ from_timer(icsk, t, icsk_retransmit_timer);
+ struct sock *sk = &icsk->icsk_inet.sk;
int event = 0;
bh_lock_sock(sk);
@@ -161,19 +162,20 @@ out:
sock_put(sk);
}
-static void dccp_keepalive_timer(unsigned long data)
+static void dccp_keepalive_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
+ struct sock *sk = from_timer(sk, t, sk_timer);
pr_err("dccp should not use a keepalive timer !\n");
sock_put(sk);
}
/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
-static void dccp_delack_timer(unsigned long data)
+static void dccp_delack_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
- struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_connection_sock *icsk =
+ from_timer(icsk, t, icsk_delack_timer);
+ struct sock *sk = &icsk->icsk_inet.sk;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
@@ -232,10 +234,13 @@ static void dccp_write_xmitlet(unsigned long data)
bh_unlock_sock(sk);
}
-static void dccp_write_xmit_timer(unsigned long data)
+static void dccp_write_xmit_timer(struct timer_list *t)
{
- dccp_write_xmitlet(data);
- sock_put((struct sock *)data);
+ struct dccp_sock *dp = from_timer(dp, t, dccps_xmit_timer);
+ struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk;
+
+ dccp_write_xmitlet((unsigned long)sk);
+ sock_put(sk);
}
void dccp_init_xmit_timers(struct sock *sk)
@@ -243,8 +248,7 @@ void dccp_init_xmit_timers(struct sock *sk)
struct dccp_sock *dp = dccp_sk(sk);
tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
- setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
- (unsigned long)sk);
+ timer_setup(&dp->dccps_xmit_timer, dccp_write_xmit_timer, 0);
inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
&dccp_keepalive_timer);
}
diff --git a/net/decnet/Makefile b/net/decnet/Makefile
index e44003af71f6..9e38122d942b 100644
--- a/net/decnet/Makefile
+++ b/net/decnet/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DECNET) += decnet.o
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 73a0399dc7a2..518cea17b811 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -533,10 +533,6 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
scp->keepalive = 10 * HZ;
scp->keepalive_fxn = dn_keepalive;
- init_timer(&scp->delack_timer);
- scp->delack_pending = 0;
- scp->delack_fxn = dn_nsp_delayed_ack;
-
dn_start_slow_timer(sk);
out:
return sk;
@@ -634,10 +630,12 @@ static void dn_destroy_sock(struct sock *sk)
goto disc_reject;
case DN_RUN:
scp->state = DN_DI;
+ /* fall through */
case DN_DI:
case DN_DR:
disc_reject:
dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation);
+ /* fall through */
case DN_NC:
case DN_NR:
case DN_RJ:
@@ -651,6 +649,7 @@ disc_reject:
break;
default:
printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n");
+ /* fall through */
case DN_O:
dn_stop_slow_timer(sk);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index fa0110b57ca1..9153247dad28 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
* operating system. DECnet is implemented using the BSD Socket
@@ -1038,14 +1039,14 @@ static void dn_eth_down(struct net_device *dev)
static void dn_dev_set_timer(struct net_device *dev);
-static void dn_dev_timer_func(unsigned long arg)
+static void dn_dev_timer_func(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)arg;
- struct dn_dev *dn_db;
+ struct dn_dev *dn_db = from_timer(dn_db, t, timer);
+ struct net_device *dev;
struct dn_ifaddr *ifa;
rcu_read_lock();
- dn_db = rcu_dereference(dev->dn_ptr);
+ dev = dn_db->dev;
if (dn_db->t3 <= dn_db->parms.t2) {
if (dn_db->parms.timer3) {
for (ifa = rcu_dereference(dn_db->ifa_list);
@@ -1070,8 +1071,6 @@ static void dn_dev_set_timer(struct net_device *dev)
if (dn_db->parms.t2 > dn_db->parms.t3)
dn_db->parms.t2 = dn_db->parms.t3;
- dn_db->timer.data = (unsigned long)dev;
- dn_db->timer.function = dn_dev_timer_func;
dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ);
add_timer(&dn_db->timer);
@@ -1100,7 +1099,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
rcu_assign_pointer(dev->dn_ptr, dn_db);
dn_db->dev = dev;
- init_timer(&dn_db->timer);
+ timer_setup(&dn_db->timer, dn_dev_timer_func, 0);
dn_db->uptime = jiffies;
@@ -1419,9 +1418,9 @@ void __init dn_dev_init(void)
dn_dev_devices_on();
- rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, NULL);
- rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, NULL);
- rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, NULL);
+ rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, 0);
+ rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, 0);
+ rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, 0);
proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops);
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index f9f6fb3f3c5b..b37a1b833c77 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
* operating system. DECnet is implemented using the BSD Socket
@@ -791,8 +792,8 @@ void __init dn_fib_init(void)
register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
- rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, NULL);
- rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, NULL);
+ rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, 0);
+ rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, 0);
}
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 21dedf6fd0f7..528119a5618e 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
* operating system. DECnet is implemented using the BSD Socket
@@ -94,7 +95,7 @@ struct neigh_table dn_neigh_table = {
[NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
- [NEIGH_VAR_QUEUE_LEN_BYTES] = 64*1024,
+ [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
[NEIGH_VAR_PROXY_QLEN] = 0,
[NEIGH_VAR_ANYCAST_DELAY] = 0,
[NEIGH_VAR_PROXY_DELAY] = 0,
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 7ac086d5c0c0..1b2120645730 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -776,12 +776,8 @@ static int dn_nsp_rx_packet(struct net *net, struct sock *sk2,
* Swap src & dst and look up in the normal way.
*/
if (unlikely(cb->rt_flags & DN_RT_F_RTS)) {
- __le16 tmp = cb->dst_port;
- cb->dst_port = cb->src_port;
- cb->src_port = tmp;
- tmp = cb->dst;
- cb->dst = cb->src;
- cb->src = tmp;
+ swap(cb->dst_port, cb->src_port);
+ swap(cb->dst, cb->src);
}
/*
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 66f035e476ea..56a52a004c56 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -313,11 +313,8 @@ static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned c
ackcrs |= 0x8000;
/* If this is an "other data/ack" message, swap acknum and ackcrs */
- if (other) {
- unsigned short tmp = acknum;
- acknum = ackcrs;
- ackcrs = tmp;
- }
+ if (other)
+ swap(acknum, ackcrs);
/* Set "cross subchannel" bit in ackcrs */
ackcrs |= 0x2000;
@@ -491,17 +488,6 @@ void dn_send_conn_ack (struct sock *sk)
dn_nsp_send(skb);
}
-void dn_nsp_delayed_ack(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->ackxmt_oth != scp->numoth_rcv)
- dn_nsp_send_oth_ack(sk);
-
- if (scp->ackxmt_dat != scp->numdat_rcv)
- dn_nsp_send_data_ack(sk);
-}
-
static int dn_nsp_retrans_conn_conf(struct sock *sk)
{
struct dn_scp *scp = DN_SK(sk);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index bcbe548f8854..b36dceab0dc1 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -131,7 +131,7 @@ static struct dn_rt_hash_bucket *dn_rt_hash_table;
static unsigned int dn_rt_hash_mask;
static struct timer_list dn_route_timer;
-static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush, 0, 0);
+static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush);
int decnet_dst_gc_interval = 2;
static struct dst_ops dn_dst_ops = {
@@ -338,7 +338,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
dn_rt_hash_table[hash].chain);
rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
- dst_use(&rth->dst, now);
+ dst_hold_and_use(&rth->dst, now);
spin_unlock_bh(&dn_rt_hash_table[hash].lock);
dst_release_immediate(&rt->dst);
@@ -351,7 +351,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
- dst_use(&rt->dst, now);
+ dst_hold_and_use(&rt->dst, now);
spin_unlock_bh(&dn_rt_hash_table[hash].lock);
*rp = rt;
return 0;
@@ -1258,7 +1258,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *
(flp->flowidn_mark == rt->fld.flowidn_mark) &&
dn_is_output_route(rt) &&
(rt->fld.flowidn_oif == flp->flowidn_oif)) {
- dst_use(&rt->dst, jiffies);
+ dst_hold_and_use(&rt->dst, jiffies);
rcu_read_unlock_bh();
*pprt = &rt->dst;
return 0;
@@ -1535,7 +1535,7 @@ static int dn_route_input(struct sk_buff *skb)
(rt->fld.flowidn_oif == 0) &&
(rt->fld.flowidn_mark == skb->mark) &&
(rt->fld.flowidn_iif == cb->iif)) {
- dst_use(&rt->dst, jiffies);
+ dst_hold_and_use(&rt->dst, jiffies);
rcu_read_unlock();
skb_dst_set(skb, (struct dst_entry *)rt);
return 0;
@@ -1922,10 +1922,10 @@ void __init dn_route_init(void)
#ifdef CONFIG_DECNET_ROUTER
rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
- dn_fib_dump, NULL);
+ dn_fib_dump, 0);
#else
rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
- dn_cache_dump, NULL);
+ dn_cache_dump, 0);
#endif
}
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 295bbd6a56f2..c795c3f509c9 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 232675480756..f0710b5d037d 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
* operating system. DECnet is implemented using the BSD Socket
@@ -155,6 +156,7 @@ static void dn_rehash_zone(struct dn_zone *dz)
default:
printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n",
old_divisor);
+ /* fall through */
case 256:
new_divisor = 1024;
new_hashmask = 0x3FF;
diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c
index 1d330fd43dc7..f430daed24a0 100644
--- a/net/decnet/dn_timer.c
+++ b/net/decnet/dn_timer.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
* operating system. DECnet is implemented using the BSD Socket
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index aa8ffecc46a4..ab395e55cd78 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -115,7 +115,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
RCV_SKB_FAIL(-EINVAL);
}
-static struct nf_hook_ops dnrmg_ops __read_mostly = {
+static const struct nf_hook_ops dnrmg_ops = {
.hook = dnrmg_hook,
.pf = NFPROTO_DECNET,
.hooknum = NF_DN_ROUTE,
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 6c7da6c29bf0..55bf64a22b59 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
* operating system. DECnet is implemented using the BSD Socket
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 8737412c7b27..e1d4d898a007 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -224,7 +224,7 @@ static int dns_resolver_match_preparse(struct key_match_data *match_data)
static void dns_resolver_describe(const struct key *key, struct seq_file *m)
{
seq_puts(m, key->description);
- if (key_is_instantiated(key)) {
+ if (key_is_positive(key)) {
int err = PTR_ERR(key->payload.data[dns_key_error]);
if (err)
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index cc5f8f971689..03c3bdf25468 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -7,6 +7,7 @@ config HAVE_NET_DSA
config NET_DSA
tristate "Distributed Switch Architecture"
depends on HAVE_NET_DSA && MAY_USE_DEVLINK
+ depends on BRIDGE || BRIDGE=n
select NET_SWITCHDEV
select PHYLIB
---help---
@@ -19,6 +20,9 @@ if NET_DSA
config NET_DSA_TAG_BRCM
bool
+config NET_DSA_TAG_BRCM_PREPEND
+ bool
+
config NET_DSA_TAG_DSA
bool
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index fcce25da937c..0e13c1f95d13 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,9 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o
+dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o
# tagging formats
dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
+dsa_core-$(CONFIG_NET_DSA_TAG_BRCM_PREPEND) += tag_brcm.o
dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 416ac4ef9ba9..6a9d0f50fbee 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -14,6 +14,7 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/notifier.h>
#include <linux/of.h>
#include <linux/of_mdio.h>
#include <linux/of_platform.h>
@@ -43,6 +44,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
#ifdef CONFIG_NET_DSA_TAG_BRCM
[DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
#endif
+#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
+ [DSA_TAG_PROTO_BRCM_PREPEND] = &brcm_prepend_netdev_ops,
+#endif
#ifdef CONFIG_NET_DSA_TAG_DSA
[DSA_TAG_PROTO_DSA] = &dsa_netdev_ops,
#endif
@@ -67,37 +71,6 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
[DSA_TAG_PROTO_NONE] = &none_ops,
};
-int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
- struct dsa_port *dport, int port)
-{
- struct device_node *port_dn = dport->dn;
- struct phy_device *phydev;
- int ret, mode;
-
- if (of_phy_is_fixed_link(port_dn)) {
- ret = of_phy_register_fixed_link(port_dn);
- if (ret) {
- dev_err(dev, "failed to register fixed PHY\n");
- return ret;
- }
- phydev = of_phy_find_device(port_dn);
-
- mode = of_get_phy_mode(port_dn);
- if (mode < 0)
- mode = PHY_INTERFACE_MODE_NA;
- phydev->interface = mode;
-
- genphy_config_init(phydev);
- genphy_read_status(phydev);
- if (ds->ops->adjust_link)
- ds->ops->adjust_link(ds, port, phydev);
-
- put_device(&phydev->mdio.dev);
- }
-
- return 0;
-}
-
const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
{
const struct dsa_device_ops *ops;
@@ -112,42 +85,6 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
return ops;
}
-int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp)
-{
- struct dsa_switch *ds = cpu_dp->ds;
- struct net_device *master;
- struct ethtool_ops *cpu_ops;
-
- master = cpu_dp->netdev;
-
- cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL);
- if (!cpu_ops)
- return -ENOMEM;
-
- memcpy(&cpu_dp->ethtool_ops, master->ethtool_ops,
- sizeof(struct ethtool_ops));
- cpu_dp->orig_ethtool_ops = master->ethtool_ops;
- memcpy(cpu_ops, &cpu_dp->ethtool_ops,
- sizeof(struct ethtool_ops));
- dsa_cpu_port_ethtool_init(cpu_ops);
- master->ethtool_ops = cpu_ops;
-
- return 0;
-}
-
-void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp)
-{
- cpu_dp->netdev->ethtool_ops = cpu_dp->orig_ethtool_ops;
-}
-
-void dsa_cpu_dsa_destroy(struct dsa_port *port)
-{
- struct device_node *port_dn = port->dn;
-
- if (of_phy_is_fixed_link(port_dn))
- of_phy_deregister_fixed_link(port_dn);
-}
-
static int dev_is_class(struct device *dev, void *class)
{
if (dev->class != NULL && !strcmp(dev->class->name, class))
@@ -186,12 +123,14 @@ struct net_device *dsa_dev_to_net_device(struct device *dev)
EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+ struct packet_type *pt, struct net_device *unused)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
struct sk_buff *nskb = NULL;
+ struct pcpu_sw_netstats *s;
+ struct dsa_slave_priv *p;
- if (unlikely(dst == NULL)) {
+ if (unlikely(!cpu_dp)) {
kfree_skb(skb);
return 0;
}
@@ -200,19 +139,23 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
if (!skb)
return 0;
- nskb = dst->rcv(skb, dev, pt, orig_dev);
+ nskb = cpu_dp->rcv(skb, dev, pt);
if (!nskb) {
kfree_skb(skb);
return 0;
}
skb = nskb;
+ p = netdev_priv(skb->dev);
skb_push(skb, ETH_HLEN);
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
+ s = this_cpu_ptr(p->stats64);
+ u64_stats_update_begin(&s->syncp);
+ s->rx_packets++;
+ s->rx_bytes += skb->len;
+ u64_stats_update_end(&s->syncp);
netif_receive_skb(skb);
@@ -220,6 +163,11 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
}
#ifdef CONFIG_PM_SLEEP
+static bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
+{
+ return dsa_is_user_port(ds, p) && ds->ports[p].slave;
+}
+
int dsa_switch_suspend(struct dsa_switch *ds)
{
int i, ret = 0;
@@ -229,7 +177,7 @@ int dsa_switch_suspend(struct dsa_switch *ds)
if (!dsa_is_port_initialized(ds, i))
continue;
- ret = dsa_slave_suspend(ds->ports[i].netdev);
+ ret = dsa_slave_suspend(ds->ports[i].slave);
if (ret)
return ret;
}
@@ -256,7 +204,7 @@ int dsa_switch_resume(struct dsa_switch *ds)
if (!dsa_is_port_initialized(ds, i))
continue;
- ret = dsa_slave_resume(ds->ports[i].netdev);
+ ret = dsa_slave_resume(ds->ports[i].slave);
if (ret)
return ret;
}
@@ -271,10 +219,44 @@ static struct packet_type dsa_pack_type __read_mostly = {
.func = dsa_switch_rcv,
};
+static struct workqueue_struct *dsa_owq;
+
+bool dsa_schedule_work(struct work_struct *work)
+{
+ return queue_work(dsa_owq, work);
+}
+
+static ATOMIC_NOTIFIER_HEAD(dsa_notif_chain);
+
+int register_dsa_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&dsa_notif_chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_dsa_notifier);
+
+int unregister_dsa_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&dsa_notif_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_dsa_notifier);
+
+int call_dsa_notifiers(unsigned long val, struct net_device *dev,
+ struct dsa_notifier_info *info)
+{
+ info->dev = dev;
+ return atomic_notifier_call_chain(&dsa_notif_chain, val, info);
+}
+EXPORT_SYMBOL_GPL(call_dsa_notifiers);
+
static int __init dsa_init_module(void)
{
int rc;
+ dsa_owq = alloc_ordered_workqueue("dsa_ordered",
+ WQ_MEM_RECLAIM);
+ if (!dsa_owq)
+ return -ENOMEM;
+
rc = dsa_slave_register_notifier();
if (rc)
return rc;
@@ -294,6 +276,7 @@ static void __exit dsa_cleanup_module(void)
dsa_slave_unregister_notifier();
dev_remove_pack(&dsa_pack_type);
dsa_legacy_unregister();
+ destroy_workqueue(dsa_owq);
}
module_exit(dsa_cleanup_module);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index c442051d5a55..44e3fb7dec8c 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -21,293 +21,297 @@
#include "dsa_priv.h"
-static LIST_HEAD(dsa_switch_trees);
+static LIST_HEAD(dsa_tree_list);
static DEFINE_MUTEX(dsa2_mutex);
static const struct devlink_ops dsa_devlink_ops = {
};
-static struct dsa_switch_tree *dsa_get_dst(u32 tree)
+static struct dsa_switch_tree *dsa_tree_find(int index)
{
struct dsa_switch_tree *dst;
- list_for_each_entry(dst, &dsa_switch_trees, list)
- if (dst->tree == tree) {
- kref_get(&dst->refcount);
+ list_for_each_entry(dst, &dsa_tree_list, list)
+ if (dst->index == index)
return dst;
- }
+
return NULL;
}
-static void dsa_free_dst(struct kref *ref)
+static struct dsa_switch_tree *dsa_tree_alloc(int index)
{
- struct dsa_switch_tree *dst = container_of(ref, struct dsa_switch_tree,
- refcount);
+ struct dsa_switch_tree *dst;
- list_del(&dst->list);
- kfree(dst);
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return NULL;
+
+ dst->index = index;
+
+ INIT_LIST_HEAD(&dst->list);
+ list_add_tail(&dsa_tree_list, &dst->list);
+
+ /* Initialize the reference counter to the number of switches, not 1 */
+ kref_init(&dst->refcount);
+ refcount_set(&dst->refcount.refcount, 0);
+
+ return dst;
}
-static void dsa_put_dst(struct dsa_switch_tree *dst)
+static void dsa_tree_free(struct dsa_switch_tree *dst)
{
- kref_put(&dst->refcount, dsa_free_dst);
+ list_del(&dst->list);
+ kfree(dst);
}
-static struct dsa_switch_tree *dsa_add_dst(u32 tree)
+static struct dsa_switch_tree *dsa_tree_touch(int index)
{
struct dsa_switch_tree *dst;
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ dst = dsa_tree_find(index);
if (!dst)
- return NULL;
- dst->tree = tree;
- INIT_LIST_HEAD(&dst->list);
- list_add_tail(&dsa_switch_trees, &dst->list);
- kref_init(&dst->refcount);
+ dst = dsa_tree_alloc(index);
return dst;
}
-static void dsa_dst_add_ds(struct dsa_switch_tree *dst,
- struct dsa_switch *ds, u32 index)
+static void dsa_tree_get(struct dsa_switch_tree *dst)
{
kref_get(&dst->refcount);
- dst->ds[index] = ds;
}
-static void dsa_dst_del_ds(struct dsa_switch_tree *dst,
- struct dsa_switch *ds, u32 index)
+static void dsa_tree_release(struct kref *ref)
{
- dst->ds[index] = NULL;
- kref_put(&dst->refcount, dsa_free_dst);
+ struct dsa_switch_tree *dst;
+
+ dst = container_of(ref, struct dsa_switch_tree, refcount);
+
+ dsa_tree_free(dst);
}
-/* For platform data configurations, we need to have a valid name argument to
- * differentiate a disabled port from an enabled one
- */
-static bool dsa_port_is_valid(struct dsa_port *port)
+static void dsa_tree_put(struct dsa_switch_tree *dst)
{
- return !!(port->dn || port->name);
+ kref_put(&dst->refcount, dsa_tree_release);
}
static bool dsa_port_is_dsa(struct dsa_port *port)
{
- if (port->name && !strcmp(port->name, "dsa"))
- return true;
- else
- return !!of_parse_phandle(port->dn, "link", 0);
+ return port->type == DSA_PORT_TYPE_DSA;
}
static bool dsa_port_is_cpu(struct dsa_port *port)
{
- if (port->name && !strcmp(port->name, "cpu"))
- return true;
- else
- return !!of_parse_phandle(port->dn, "ethernet", 0);
+ return port->type == DSA_PORT_TYPE_CPU;
}
-static bool dsa_ds_find_port_dn(struct dsa_switch *ds,
- struct device_node *port)
+static bool dsa_port_is_user(struct dsa_port *dp)
{
- u32 index;
-
- for (index = 0; index < ds->num_ports; index++)
- if (ds->ports[index].dn == port)
- return true;
- return false;
+ return dp->type == DSA_PORT_TYPE_USER;
}
-static struct dsa_switch *dsa_dst_find_port_dn(struct dsa_switch_tree *dst,
- struct device_node *port)
+static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
+ struct device_node *dn)
{
struct dsa_switch *ds;
- u32 index;
+ struct dsa_port *dp;
+ int device, port;
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
if (!ds)
continue;
- if (dsa_ds_find_port_dn(ds, port))
- return ds;
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
+
+ if (dp->dn == dn)
+ return dp;
+ }
}
return NULL;
}
-static int dsa_port_complete(struct dsa_switch_tree *dst,
- struct dsa_switch *src_ds,
- struct dsa_port *port,
- u32 src_port)
+static bool dsa_port_setup_routing_table(struct dsa_port *dp)
{
- struct device_node *link;
- int index;
- struct dsa_switch *dst_ds;
-
- for (index = 0;; index++) {
- link = of_parse_phandle(port->dn, "link", index);
- if (!link)
- break;
-
- dst_ds = dsa_dst_find_port_dn(dst, link);
- of_node_put(link);
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst = ds->dst;
+ struct device_node *dn = dp->dn;
+ struct of_phandle_iterator it;
+ struct dsa_port *link_dp;
+ int err;
- if (!dst_ds)
- return 1;
+ of_for_each_phandle(&it, err, dn, "link", NULL, 0) {
+ link_dp = dsa_tree_find_port_by_node(dst, it.node);
+ if (!link_dp) {
+ of_node_put(it.node);
+ return false;
+ }
- src_ds->rtable[dst_ds->index] = src_port;
+ ds->rtable[link_dp->ds->index] = dp->index;
}
- return 0;
+ return true;
}
-/* A switch is complete if all the DSA ports phandles point to ports
- * known in the tree. A return value of 1 means the tree is not
- * complete. This is not an error condition. A value of 0 is
- * success.
- */
-static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+static bool dsa_switch_setup_routing_table(struct dsa_switch *ds)
{
- struct dsa_port *port;
- u32 index;
- int err;
-
- for (index = 0; index < ds->num_ports; index++) {
- port = &ds->ports[index];
- if (!dsa_port_is_valid(port))
- continue;
+ bool complete = true;
+ struct dsa_port *dp;
+ int i;
- if (!dsa_port_is_dsa(port))
- continue;
+ for (i = 0; i < DSA_MAX_SWITCHES; i++)
+ ds->rtable[i] = DSA_RTABLE_NONE;
- err = dsa_port_complete(dst, ds, port, index);
- if (err != 0)
- return err;
+ for (i = 0; i < ds->num_ports; i++) {
+ dp = &ds->ports[i];
- ds->dsa_port_mask |= BIT(index);
+ if (dsa_port_is_dsa(dp)) {
+ complete = dsa_port_setup_routing_table(dp);
+ if (!complete)
+ break;
+ }
}
- return 0;
+ return complete;
}
-/* A tree is complete if all the DSA ports phandles point to ports
- * known in the tree. A return value of 1 means the tree is not
- * complete. This is not an error condition. A value of 0 is
- * success.
- */
-static int dsa_dst_complete(struct dsa_switch_tree *dst)
+static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
{
struct dsa_switch *ds;
- u32 index;
- int err;
+ bool complete = true;
+ int device;
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
if (!ds)
continue;
- err = dsa_ds_complete(dst, ds);
- if (err != 0)
- return err;
+ complete = dsa_switch_setup_routing_table(ds);
+ if (!complete)
+ break;
}
- return 0;
+ return complete;
}
-static int dsa_dsa_port_apply(struct dsa_port *port)
+static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds = port->ds;
- int err;
+ struct dsa_switch *ds;
+ struct dsa_port *dp;
+ int device, port;
- err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index);
- if (err) {
- dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n",
- port->index, err);
- return err;
- }
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
+ if (!ds)
+ continue;
- memset(&port->devlink_port, 0, sizeof(port->devlink_port));
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
- return devlink_port_register(ds->devlink, &port->devlink_port,
- port->index);
-}
+ if (dsa_port_is_cpu(dp))
+ return dp;
+ }
+ }
-static void dsa_dsa_port_unapply(struct dsa_port *port)
-{
- devlink_port_unregister(&port->devlink_port);
- dsa_cpu_dsa_destroy(port);
+ return NULL;
}
-static int dsa_cpu_port_apply(struct dsa_port *port)
+static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds = port->ds;
- int err;
+ struct dsa_switch *ds;
+ struct dsa_port *dp;
+ int device, port;
- err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index);
- if (err) {
- dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n",
- port->index, err);
- return err;
+ /* DSA currently only supports a single CPU port */
+ dst->cpu_dp = dsa_tree_find_first_cpu(dst);
+ if (!dst->cpu_dp) {
+ pr_warn("Tree has no master device\n");
+ return -EINVAL;
}
- memset(&port->devlink_port, 0, sizeof(port->devlink_port));
- err = devlink_port_register(ds->devlink, &port->devlink_port,
- port->index);
- return err;
+ /* Assign the default CPU port to all ports of the fabric */
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
+ if (!ds)
+ continue;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
+
+ if (dsa_port_is_user(dp))
+ dp->cpu_dp = dst->cpu_dp;
+ }
+ }
+
+ return 0;
}
-static void dsa_cpu_port_unapply(struct dsa_port *port)
+static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
{
- devlink_port_unregister(&port->devlink_port);
- dsa_cpu_dsa_destroy(port);
- port->ds->cpu_port_mask &= ~BIT(port->index);
-
+ /* DSA currently only supports a single CPU port */
+ dst->cpu_dp = NULL;
}
-static int dsa_user_port_apply(struct dsa_port *port)
+static int dsa_port_setup(struct dsa_port *dp)
{
- struct dsa_switch *ds = port->ds;
- const char *name = port->name;
+ struct dsa_switch *ds = dp->ds;
int err;
- if (port->dn)
- name = of_get_property(port->dn, "label", NULL);
- if (!name)
- name = "eth%d";
+ memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
- err = dsa_slave_create(ds, ds->dev, port->index, name);
- if (err) {
- dev_warn(ds->dev, "Failed to create slave %d: %d\n",
- port->index, err);
- port->netdev = NULL;
- return err;
- }
-
- memset(&port->devlink_port, 0, sizeof(port->devlink_port));
- err = devlink_port_register(ds->devlink, &port->devlink_port,
- port->index);
+ err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index);
if (err)
return err;
- devlink_port_type_eth_set(&port->devlink_port, port->netdev);
+ switch (dp->type) {
+ case DSA_PORT_TYPE_UNUSED:
+ break;
+ case DSA_PORT_TYPE_CPU:
+ case DSA_PORT_TYPE_DSA:
+ err = dsa_port_fixed_link_register_of(dp);
+ if (err) {
+ dev_err(ds->dev, "failed to register fixed link for port %d.%d\n",
+ ds->index, dp->index);
+ return err;
+ }
+
+ break;
+ case DSA_PORT_TYPE_USER:
+ err = dsa_slave_create(dp);
+ if (err)
+ dev_err(ds->dev, "failed to create slave for port %d.%d\n",
+ ds->index, dp->index);
+ else
+ devlink_port_type_eth_set(&dp->devlink_port, dp->slave);
+ break;
+ }
return 0;
}
-static void dsa_user_port_unapply(struct dsa_port *port)
+static void dsa_port_teardown(struct dsa_port *dp)
{
- devlink_port_unregister(&port->devlink_port);
- if (port->netdev) {
- dsa_slave_destroy(port->netdev);
- port->netdev = NULL;
- port->ds->enabled_port_mask &= ~(1 << port->index);
+ devlink_port_unregister(&dp->devlink_port);
+
+ switch (dp->type) {
+ case DSA_PORT_TYPE_UNUSED:
+ break;
+ case DSA_PORT_TYPE_CPU:
+ case DSA_PORT_TYPE_DSA:
+ dsa_port_fixed_link_unregister_of(dp);
+ break;
+ case DSA_PORT_TYPE_USER:
+ if (dp->slave) {
+ dsa_slave_destroy(dp->slave);
+ dp->slave = NULL;
+ }
+ break;
}
}
-static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+static int dsa_switch_setup(struct dsa_switch *ds)
{
- struct dsa_port *port;
- u32 index;
int err;
/* Initialize ds->phys_mii_mask before registering the slave MDIO bus
@@ -315,7 +319,7 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
* the slave MDIO bus driver rely on these values for probing PHY
* devices or not
*/
- ds->phys_mii_mask = ds->enabled_port_mask;
+ ds->phys_mii_mask |= dsa_user_ports(ds);
/* Add the switch to devlink before calling setup, so that setup can
* add dpipe tables
@@ -336,12 +340,6 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
if (err)
return err;
- if (ds->ops->set_addr) {
- err = ds->ops->set_addr(ds, dst->cpu_dp->netdev->dev_addr);
- if (err < 0)
- return err;
- }
-
if (!ds->slave_mii_bus && ds->ops->phy_read) {
ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
if (!ds->slave_mii_bus)
@@ -354,56 +352,11 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
return err;
}
- for (index = 0; index < ds->num_ports; index++) {
- port = &ds->ports[index];
- if (!dsa_port_is_valid(port))
- continue;
-
- if (dsa_port_is_dsa(port)) {
- err = dsa_dsa_port_apply(port);
- if (err)
- return err;
- continue;
- }
-
- if (dsa_port_is_cpu(port)) {
- err = dsa_cpu_port_apply(port);
- if (err)
- return err;
- continue;
- }
-
- err = dsa_user_port_apply(port);
- if (err)
- continue;
- }
-
return 0;
}
-static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+static void dsa_switch_teardown(struct dsa_switch *ds)
{
- struct dsa_port *port;
- u32 index;
-
- for (index = 0; index < ds->num_ports; index++) {
- port = &ds->ports[index];
- if (!dsa_port_is_valid(port))
- continue;
-
- if (dsa_port_is_dsa(port)) {
- dsa_dsa_port_unapply(port);
- continue;
- }
-
- if (dsa_port_is_cpu(port)) {
- dsa_cpu_port_unapply(port);
- continue;
- }
-
- dsa_user_port_unapply(port);
- }
-
if (ds->slave_mii_bus && ds->ops->phy_read)
mdiobus_unregister(ds->slave_mii_bus);
@@ -417,198 +370,228 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
}
-static int dsa_dst_apply(struct dsa_switch_tree *dst)
+static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
{
struct dsa_switch *ds;
- u32 index;
+ struct dsa_port *dp;
+ int device, port;
int err;
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
if (!ds)
continue;
- err = dsa_ds_apply(dst, ds);
+ err = dsa_switch_setup(ds);
if (err)
return err;
- }
- if (dst->cpu_dp) {
- err = dsa_cpu_port_ethtool_setup(dst->cpu_dp);
- if (err)
- return err;
- }
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
- /* If we use a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point on get
- * sent to the tag format's receive function.
- */
- wmb();
- dst->cpu_dp->netdev->dsa_ptr = dst;
- dst->applied = true;
+ err = dsa_port_setup(dp);
+ if (err)
+ return err;
+ }
+ }
return 0;
}
-static void dsa_dst_unapply(struct dsa_switch_tree *dst)
+static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
{
struct dsa_switch *ds;
- u32 index;
-
- if (!dst->applied)
- return;
-
- dst->cpu_dp->netdev->dsa_ptr = NULL;
-
- /* If we used a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point get sent
- * without the tag and go through the regular receive path.
- */
- wmb();
+ struct dsa_port *dp;
+ int device, port;
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
+ for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+ ds = dst->ds[device];
if (!ds)
continue;
- dsa_ds_unapply(dst, ds);
- }
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
+
+ dsa_port_teardown(dp);
+ }
- if (dst->cpu_dp) {
- dsa_cpu_port_ethtool_restore(dst->cpu_dp);
- dst->cpu_dp = NULL;
+ dsa_switch_teardown(ds);
}
+}
+
+static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp = dst->cpu_dp;
+ struct net_device *master = cpu_dp->master;
- pr_info("DSA: tree %d unapplied\n", dst->tree);
- dst->applied = false;
+ /* DSA currently supports a single pair of CPU port and master device */
+ return dsa_master_setup(master, cpu_dp);
}
-static int dsa_cpu_parse(struct dsa_port *port, u32 index,
- struct dsa_switch_tree *dst,
- struct dsa_switch *ds)
+static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
{
- enum dsa_tag_protocol tag_protocol;
- struct net_device *ethernet_dev;
- struct device_node *ethernet;
+ struct dsa_port *cpu_dp = dst->cpu_dp;
+ struct net_device *master = cpu_dp->master;
- if (port->dn) {
- ethernet = of_parse_phandle(port->dn, "ethernet", 0);
- if (!ethernet)
- return -EINVAL;
- ethernet_dev = of_find_net_device_by_node(ethernet);
- } else {
- ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]);
- dev_put(ethernet_dev);
- }
+ return dsa_master_teardown(master);
+}
- if (!ethernet_dev)
- return -EPROBE_DEFER;
+static int dsa_tree_setup(struct dsa_switch_tree *dst)
+{
+ bool complete;
+ int err;
- if (!dst->cpu_dp) {
- dst->cpu_dp = port;
- dst->cpu_dp->netdev = ethernet_dev;
+ if (dst->setup) {
+ pr_err("DSA: tree %d already setup! Disjoint trees?\n",
+ dst->index);
+ return -EEXIST;
}
- /* Initialize cpu_port_mask now for drv->setup()
- * to have access to a correct value, just like what
- * net/dsa/dsa.c::dsa_switch_setup_one does.
- */
- ds->cpu_port_mask |= BIT(index);
+ complete = dsa_tree_setup_routing_table(dst);
+ if (!complete)
+ return 0;
- tag_protocol = ds->ops->get_tag_protocol(ds);
- dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
- if (IS_ERR(dst->tag_ops)) {
- dev_warn(ds->dev, "No tagger for this switch\n");
- ds->cpu_port_mask &= ~BIT(index);
- return PTR_ERR(dst->tag_ops);
- }
+ err = dsa_tree_setup_default_cpu(dst);
+ if (err)
+ return err;
- dst->rcv = dst->tag_ops->rcv;
+ err = dsa_tree_setup_switches(dst);
+ if (err)
+ return err;
+
+ err = dsa_tree_setup_master(dst);
+ if (err)
+ return err;
+
+ dst->setup = true;
+
+ pr_info("DSA: tree %d setup\n", dst->index);
return 0;
}
-static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+static void dsa_tree_teardown(struct dsa_switch_tree *dst)
{
- struct dsa_port *port;
- u32 index;
+ if (!dst->setup)
+ return;
+
+ dsa_tree_teardown_master(dst);
+
+ dsa_tree_teardown_switches(dst);
+
+ dsa_tree_teardown_default_cpu(dst);
+
+ pr_info("DSA: tree %d torn down\n", dst->index);
+
+ dst->setup = false;
+}
+
+static void dsa_tree_remove_switch(struct dsa_switch_tree *dst,
+ unsigned int index)
+{
+ dsa_tree_teardown(dst);
+
+ dst->ds[index] = NULL;
+ dsa_tree_put(dst);
+}
+
+static int dsa_tree_add_switch(struct dsa_switch_tree *dst,
+ struct dsa_switch *ds)
+{
+ unsigned int index = ds->index;
int err;
- for (index = 0; index < ds->num_ports; index++) {
- port = &ds->ports[index];
- if (!dsa_port_is_valid(port) ||
- dsa_port_is_dsa(port))
- continue;
+ if (dst->ds[index])
+ return -EBUSY;
- if (dsa_port_is_cpu(port)) {
- err = dsa_cpu_parse(port, index, dst, ds);
- if (err)
- return err;
- } else {
- /* Initialize enabled_port_mask now for drv->setup()
- * to have access to a correct value, just like what
- * net/dsa/dsa.c::dsa_switch_setup_one does.
- */
- ds->enabled_port_mask |= BIT(index);
- }
+ dsa_tree_get(dst);
+ dst->ds[index] = ds;
- }
+ err = dsa_tree_setup(dst);
+ if (err)
+ dsa_tree_remove_switch(dst, index);
- pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index);
+ return err;
+}
+
+static int dsa_port_parse_user(struct dsa_port *dp, const char *name)
+{
+ if (!name)
+ name = "eth%d";
+
+ dp->type = DSA_PORT_TYPE_USER;
+ dp->name = name;
return 0;
}
-static int dsa_dst_parse(struct dsa_switch_tree *dst)
+static int dsa_port_parse_dsa(struct dsa_port *dp)
{
- struct dsa_switch *ds;
- struct dsa_port *dp;
- u32 index;
- int port;
- int err;
+ dp->type = DSA_PORT_TYPE_DSA;
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
- if (!ds)
- continue;
+ return 0;
+}
- err = dsa_ds_parse(dst, ds);
- if (err)
- return err;
- }
+static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
+{
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst = ds->dst;
+ const struct dsa_device_ops *tag_ops;
+ enum dsa_tag_protocol tag_protocol;
- if (!dst->cpu_dp->netdev) {
- pr_warn("Tree has no master device\n");
- return -EINVAL;
+ tag_protocol = ds->ops->get_tag_protocol(ds, dp->index);
+ tag_ops = dsa_resolve_tag_protocol(tag_protocol);
+ if (IS_ERR(tag_ops)) {
+ dev_warn(ds->dev, "No tagger for this switch\n");
+ return PTR_ERR(tag_ops);
}
- /* Assign the default CPU port to all ports of the fabric */
- for (index = 0; index < DSA_MAX_SWITCHES; index++) {
- ds = dst->ds[index];
- if (!ds)
- continue;
+ dp->type = DSA_PORT_TYPE_CPU;
+ dp->rcv = tag_ops->rcv;
+ dp->tag_ops = tag_ops;
+ dp->master = master;
+ dp->dst = dst;
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
- if (!dsa_port_is_valid(dp) ||
- dsa_port_is_dsa(dp) ||
- dsa_port_is_cpu(dp))
- continue;
+ return 0;
+}
- dp->cpu_dp = dst->cpu_dp;
- }
+static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
+{
+ struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0);
+ const char *name = of_get_property(dn, "label", NULL);
+ bool link = of_property_read_bool(dn, "link");
+
+ dp->dn = dn;
+
+ if (ethernet) {
+ struct net_device *master;
+
+ master = of_find_net_device_by_node(ethernet);
+ if (!master)
+ return -EPROBE_DEFER;
+
+ return dsa_port_parse_cpu(dp, master);
}
- pr_info("DSA: tree %d parsed\n", dst->tree);
+ if (link)
+ return dsa_port_parse_dsa(dp);
- return 0;
+ return dsa_port_parse_user(dp, name);
}
-static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
+static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
+ struct device_node *dn)
{
- struct device_node *port;
- int err;
+ struct device_node *ports, *port;
+ struct dsa_port *dp;
u32 reg;
+ int err;
+
+ ports = of_get_child_by_name(dn, "ports");
+ if (!ports) {
+ dev_err(ds->dev, "no ports child node found\n");
+ return -EINVAL;
+ }
for_each_available_child_of_node(ports, port) {
err = of_property_read_u32(port, "reg", &reg);
@@ -618,174 +601,140 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
if (reg >= ds->num_ports)
return -EINVAL;
- ds->ports[reg].dn = port;
+ dp = &ds->ports[reg];
+
+ err = dsa_port_parse_of(dp, port);
+ if (err)
+ return err;
}
return 0;
}
-static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds)
+static int dsa_switch_parse_member_of(struct dsa_switch *ds,
+ struct device_node *dn)
{
- bool valid_name_found = false;
- unsigned int i;
+ u32 m[2] = { 0, 0 };
+ int sz;
- for (i = 0; i < DSA_MAX_PORTS; i++) {
- if (!cd->port_names[i])
- continue;
+ /* Don't error out if this optional property isn't found */
+ sz = of_property_read_variable_u32_array(dn, "dsa,member", m, 2, 2);
+ if (sz < 0 && sz != -EINVAL)
+ return sz;
- ds->ports[i].name = cd->port_names[i];
- valid_name_found = true;
- }
-
- if (!valid_name_found && i == DSA_MAX_PORTS)
+ ds->index = m[1];
+ if (ds->index >= DSA_MAX_SWITCHES)
return -EINVAL;
+ ds->dst = dsa_tree_touch(m[0]);
+ if (!ds->dst)
+ return -ENOMEM;
+
return 0;
}
-static int dsa_parse_member_dn(struct device_node *np, u32 *tree, u32 *index)
+static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn)
{
int err;
- *tree = *index = 0;
-
- err = of_property_read_u32_index(np, "dsa,member", 0, tree);
- if (err) {
- /* Does not exist, but it is optional */
- if (err == -EINVAL)
- return 0;
- return err;
- }
-
- err = of_property_read_u32_index(np, "dsa,member", 1, index);
+ err = dsa_switch_parse_member_of(ds, dn);
if (err)
return err;
- if (*index >= DSA_MAX_SWITCHES)
- return -EINVAL;
-
- return 0;
+ return dsa_switch_parse_ports_of(ds, dn);
}
-static int dsa_parse_member(struct dsa_chip_data *pd, u32 *tree, u32 *index)
+static int dsa_port_parse(struct dsa_port *dp, const char *name,
+ struct device *dev)
{
- if (!pd)
- return -ENODEV;
+ if (!strcmp(name, "cpu")) {
+ struct net_device *master;
- /* We do not support complex trees with dsa_chip_data */
- *tree = 0;
- *index = 0;
+ master = dsa_dev_to_net_device(dev);
+ if (!master)
+ return -EPROBE_DEFER;
- return 0;
-}
-
-static struct device_node *dsa_get_ports(struct dsa_switch *ds,
- struct device_node *np)
-{
- struct device_node *ports;
+ dev_put(master);
- ports = of_get_child_by_name(np, "ports");
- if (!ports) {
- dev_err(ds->dev, "no ports child node found\n");
- return ERR_PTR(-EINVAL);
+ return dsa_port_parse_cpu(dp, master);
}
- return ports;
+ if (!strcmp(name, "dsa"))
+ return dsa_port_parse_dsa(dp);
+
+ return dsa_port_parse_user(dp, name);
}
-static int _dsa_register_switch(struct dsa_switch *ds)
+static int dsa_switch_parse_ports(struct dsa_switch *ds,
+ struct dsa_chip_data *cd)
{
- struct dsa_chip_data *pdata = ds->dev->platform_data;
- struct device_node *np = ds->dev->of_node;
- struct dsa_switch_tree *dst;
- struct device_node *ports;
- u32 tree, index;
- int i, err;
-
- if (np) {
- err = dsa_parse_member_dn(np, &tree, &index);
- if (err)
- return err;
+ bool valid_name_found = false;
+ struct dsa_port *dp;
+ struct device *dev;
+ const char *name;
+ unsigned int i;
+ int err;
- ports = dsa_get_ports(ds, np);
- if (IS_ERR(ports))
- return PTR_ERR(ports);
+ for (i = 0; i < DSA_MAX_PORTS; i++) {
+ name = cd->port_names[i];
+ dev = cd->netdev[i];
+ dp = &ds->ports[i];
- err = dsa_parse_ports_dn(ports, ds);
- if (err)
- return err;
- } else {
- err = dsa_parse_member(pdata, &tree, &index);
- if (err)
- return err;
+ if (!name)
+ continue;
- err = dsa_parse_ports(pdata, ds);
+ err = dsa_port_parse(dp, name, dev);
if (err)
return err;
- }
- dst = dsa_get_dst(tree);
- if (!dst) {
- dst = dsa_add_dst(tree);
- if (!dst)
- return -ENOMEM;
- }
-
- if (dst->ds[index]) {
- err = -EBUSY;
- goto out;
+ valid_name_found = true;
}
- ds->dst = dst;
- ds->index = index;
- ds->cd = pdata;
-
- /* Initialize the routing table */
- for (i = 0; i < DSA_MAX_SWITCHES; ++i)
- ds->rtable[i] = DSA_RTABLE_NONE;
+ if (!valid_name_found && i == DSA_MAX_PORTS)
+ return -EINVAL;
- dsa_dst_add_ds(dst, ds, index);
+ return 0;
+}
- err = dsa_dst_complete(dst);
- if (err < 0)
- goto out_del_dst;
+static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
+{
+ ds->cd = cd;
- if (err == 1) {
- /* Not all switches registered yet */
- err = 0;
- goto out;
- }
+ /* We don't support interconnected switches nor multiple trees via
+ * platform data, so this is the unique switch of the tree.
+ */
+ ds->index = 0;
+ ds->dst = dsa_tree_touch(0);
+ if (!ds->dst)
+ return -ENOMEM;
- if (dst->applied) {
- pr_info("DSA: Disjoint trees?\n");
- return -EINVAL;
- }
+ return dsa_switch_parse_ports(ds, cd);
+}
- err = dsa_dst_parse(dst);
- if (err) {
- if (err == -EPROBE_DEFER) {
- dsa_dst_del_ds(dst, ds, ds->index);
- return err;
- }
+static int dsa_switch_add(struct dsa_switch *ds)
+{
+ struct dsa_switch_tree *dst = ds->dst;
- goto out_del_dst;
- }
+ return dsa_tree_add_switch(dst, ds);
+}
- err = dsa_dst_apply(dst);
- if (err) {
- dsa_dst_unapply(dst);
- goto out_del_dst;
- }
+static int dsa_switch_probe(struct dsa_switch *ds)
+{
+ struct dsa_chip_data *pdata = ds->dev->platform_data;
+ struct device_node *np = ds->dev->of_node;
+ int err;
- dsa_put_dst(dst);
- return 0;
+ if (np)
+ err = dsa_switch_parse_of(ds, np);
+ else if (pdata)
+ err = dsa_switch_parse(ds, pdata);
+ else
+ err = -ENODEV;
-out_del_dst:
- dsa_dst_del_ds(dst, ds, ds->index);
-out:
- dsa_put_dst(dst);
+ if (err)
+ return err;
- return err;
+ return dsa_switch_add(ds);
}
struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
@@ -815,26 +764,25 @@ int dsa_register_switch(struct dsa_switch *ds)
int err;
mutex_lock(&dsa2_mutex);
- err = _dsa_register_switch(ds);
+ err = dsa_switch_probe(ds);
mutex_unlock(&dsa2_mutex);
return err;
}
EXPORT_SYMBOL_GPL(dsa_register_switch);
-static void _dsa_unregister_switch(struct dsa_switch *ds)
+static void dsa_switch_remove(struct dsa_switch *ds)
{
struct dsa_switch_tree *dst = ds->dst;
+ unsigned int index = ds->index;
- dsa_dst_unapply(dst);
-
- dsa_dst_del_ds(dst, ds, ds->index);
+ dsa_tree_remove_switch(dst, index);
}
void dsa_unregister_switch(struct dsa_switch *ds)
{
mutex_lock(&dsa2_mutex);
- _dsa_unregister_switch(ds);
+ dsa_switch_remove(ds);
mutex_unlock(&dsa2_mutex);
}
EXPORT_SYMBOL_GPL(dsa_unregister_switch);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 55982cc39b24..7d036696e8c4 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -43,10 +43,10 @@ struct dsa_notifier_bridge_info {
/* DSA_NOTIFIER_FDB_* */
struct dsa_notifier_fdb_info {
- const struct switchdev_obj_port_fdb *fdb;
- struct switchdev_trans *trans;
int sw_index;
int port;
+ const unsigned char *addr;
+ u16 vid;
};
/* DSA_NOTIFIER_MDB_* */
@@ -65,18 +65,13 @@ struct dsa_notifier_vlan_info {
int port;
};
-struct dsa_device_ops {
- struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
- struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt,
- struct net_device *orig_dev);
-};
-
struct dsa_slave_priv {
- /* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */
+ /* Copy of CPU port xmit for faster access in slave transmit hot path */
struct sk_buff * (*xmit)(struct sk_buff *skb,
struct net_device *dev);
+ struct pcpu_sw_netstats *stats64;
+
/* DSA port data, such as switch, port index, etc. */
struct dsa_port *dp;
@@ -84,7 +79,6 @@ struct dsa_slave_priv {
* The phylib phy_device pointer for the PHY connected
* to this port.
*/
- struct phy_device *phy;
phy_interface_t phy_interface;
int old_link;
int old_pause;
@@ -99,68 +93,105 @@ struct dsa_slave_priv {
};
/* dsa.c */
-int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
- struct dsa_port *dport, int port);
-void dsa_cpu_dsa_destroy(struct dsa_port *dport);
const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
-int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp);
-void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp);
+bool dsa_schedule_work(struct work_struct *work);
/* legacy.c */
int dsa_legacy_register(void);
void dsa_legacy_unregister(void);
+int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 vid,
+ u16 flags);
+int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 vid);
+
+/* master.c */
+int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp);
+void dsa_master_teardown(struct net_device *dev);
+
+static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
+ int device, int port)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct dsa_switch_tree *dst = cpu_dp->dst;
+ struct dsa_switch *ds;
+
+ if (device < 0 || device >= DSA_MAX_SWITCHES)
+ return NULL;
+
+ ds = dst->ds[device];
+ if (!ds)
+ return NULL;
+
+ if (port < 0 || port >= ds->num_ports)
+ return NULL;
+
+ return ds->ports[port].slave;
+}
/* port.c */
int dsa_port_set_state(struct dsa_port *dp, u8 state,
struct switchdev_trans *trans);
-void dsa_port_set_state_now(struct dsa_port *dp, u8 state);
+int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
+void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy);
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
struct switchdev_trans *trans);
int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
struct switchdev_trans *trans);
-int dsa_port_fdb_add(struct dsa_port *dp,
- const struct switchdev_obj_port_fdb *fdb,
- struct switchdev_trans *trans);
-int dsa_port_fdb_del(struct dsa_port *dp,
- const struct switchdev_obj_port_fdb *fdb);
-int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
- switchdev_obj_dump_cb_t *cb);
-int dsa_port_mdb_add(struct dsa_port *dp,
+int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
+int dsa_port_mdb_add(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb,
struct switchdev_trans *trans);
-int dsa_port_mdb_del(struct dsa_port *dp,
+int dsa_port_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
- switchdev_obj_dump_cb_t *cb);
int dsa_port_vlan_add(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
struct switchdev_trans *trans);
int dsa_port_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan);
-int dsa_port_vlan_dump(struct dsa_port *dp,
- struct switchdev_obj_port_vlan *vlan,
- switchdev_obj_dump_cb_t *cb);
+int dsa_port_fixed_link_register_of(struct dsa_port *dp);
+void dsa_port_fixed_link_unregister_of(struct dsa_port *dp);
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
-void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops);
-int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
- int port, const char *name);
+int dsa_slave_create(struct dsa_port *dp);
void dsa_slave_destroy(struct net_device *slave_dev);
int dsa_slave_suspend(struct net_device *slave_dev);
int dsa_slave_resume(struct net_device *slave_dev);
int dsa_slave_register_notifier(void);
void dsa_slave_unregister_notifier(void);
+static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+
+ return p->dp;
+}
+
+static inline struct net_device *
+dsa_slave_to_master(const struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ return dp->cpu_dp->master;
+}
+
/* switch.c */
int dsa_switch_register_notifier(struct dsa_switch *ds);
void dsa_switch_unregister_notifier(struct dsa_switch *ds);
/* tag_brcm.c */
extern const struct dsa_device_ops brcm_netdev_ops;
+extern const struct dsa_device_ops brcm_prepend_netdev_ops;
/* tag_dsa.c */
extern const struct dsa_device_ops dsa_netdev_ops;
@@ -183,14 +214,4 @@ extern const struct dsa_device_ops qca_netdev_ops;
/* tag_trailer.c */
extern const struct dsa_device_ops trailer_netdev_ops;
-static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p)
-{
- return p->dp->cpu_dp->netdev;
-}
-
-static inline struct dsa_port *dsa_get_cpu_port(struct dsa_switch_tree *dst)
-{
- return dst->cpu_dp;
-}
-
#endif
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 1d7a3282f2a7..84611d7fcfa2 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -78,31 +78,30 @@ dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
}
/* basic switch operations **************************************************/
-static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev)
+static int dsa_cpu_dsa_setups(struct dsa_switch *ds)
{
- struct dsa_port *dport;
int ret, port;
for (port = 0; port < ds->num_ports; port++) {
if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
continue;
- dport = &ds->ports[port];
- ret = dsa_cpu_dsa_setup(ds, dev, dport, port);
+ ret = dsa_port_fixed_link_register_of(&ds->ports[port]);
if (ret)
return ret;
}
return 0;
}
-static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master,
- struct device *parent)
+static int dsa_switch_setup_one(struct dsa_switch *ds,
+ struct net_device *master)
{
const struct dsa_switch_ops *ops = ds->ops;
struct dsa_switch_tree *dst = ds->dst;
struct dsa_chip_data *cd = ds->cd;
bool valid_name_found = false;
int index = ds->index;
+ struct dsa_port *dp;
int i, ret;
/*
@@ -111,9 +110,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master
for (i = 0; i < ds->num_ports; i++) {
char *name;
+ dp = &ds->ports[i];
+
name = cd->port_names[i];
if (name == NULL)
continue;
+ dp->name = name;
if (!strcmp(name, "cpu")) {
if (dst->cpu_dp) {
@@ -122,12 +124,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master
return -EINVAL;
}
dst->cpu_dp = &ds->ports[i];
- dst->cpu_dp->netdev = master;
- ds->cpu_port_mask |= 1 << i;
+ dst->cpu_dp->master = master;
+ dp->type = DSA_PORT_TYPE_CPU;
} else if (!strcmp(name, "dsa")) {
- ds->dsa_port_mask |= 1 << i;
+ dp->type = DSA_PORT_TYPE_DSA;
} else {
- ds->enabled_port_mask |= 1 << i;
+ dp->type = DSA_PORT_TYPE_USER;
}
valid_name_found = true;
}
@@ -138,7 +140,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master
/* Make the built-in MII bus mask match the number of ports,
* switch drivers can override this later
*/
- ds->phys_mii_mask = ds->enabled_port_mask;
+ ds->phys_mii_mask |= dsa_user_ports(ds);
/*
* If the CPU connects to this switch, set the switch tree
@@ -146,14 +148,19 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master
* switch.
*/
if (dst->cpu_dp->ds == ds) {
+ const struct dsa_device_ops *tag_ops;
enum dsa_tag_protocol tag_protocol;
- tag_protocol = ops->get_tag_protocol(ds);
- dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
- if (IS_ERR(dst->tag_ops))
- return PTR_ERR(dst->tag_ops);
+ tag_protocol = ops->get_tag_protocol(ds, dst->cpu_dp->index);
+ tag_ops = dsa_resolve_tag_protocol(tag_protocol);
+ if (IS_ERR(tag_ops))
+ return PTR_ERR(tag_ops);
+
+ dst->cpu_dp->tag_ops = tag_ops;
- dst->rcv = dst->tag_ops->rcv;
+ /* Few copies for faster access in master receive hot path */
+ dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv;
+ dst->cpu_dp->dst = dst;
}
memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
@@ -169,14 +176,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master
if (ret)
return ret;
- if (ops->set_addr) {
- ret = ops->set_addr(ds, master->dev_addr);
- if (ret < 0)
- return ret;
- }
-
if (!ds->slave_mii_bus && ops->phy_read) {
- ds->slave_mii_bus = devm_mdiobus_alloc(parent);
+ ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
if (!ds->slave_mii_bus)
return -ENOMEM;
dsa_slave_mii_bus_init(ds);
@@ -193,25 +194,21 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master
ds->ports[i].dn = cd->port_dn[i];
ds->ports[i].cpu_dp = dst->cpu_dp;
- if (!(ds->enabled_port_mask & (1 << i)))
+ if (dsa_is_user_port(ds, i))
continue;
- ret = dsa_slave_create(ds, parent, i, cd->port_names[i]);
+ ret = dsa_slave_create(&ds->ports[i]);
if (ret < 0)
netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
index, i, cd->port_names[i], ret);
}
/* Perform configuration of the CPU and DSA ports */
- ret = dsa_cpu_dsa_setups(ds, parent);
+ ret = dsa_cpu_dsa_setups(ds);
if (ret < 0)
netdev_err(master, "[%d] : can't configure CPU and DSA ports\n",
index);
- ret = dsa_cpu_port_ethtool_setup(ds->dst->cpu_dp);
- if (ret)
- return ret;
-
return 0;
}
@@ -252,7 +249,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, struct net_device *master,
ds->ops = ops;
ds->priv = priv;
- ret = dsa_switch_setup_one(ds, master, parent);
+ ret = dsa_switch_setup_one(ds, master);
if (ret)
return ERR_PTR(ret);
@@ -265,24 +262,20 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
/* Destroy network devices for physical switch ports. */
for (port = 0; port < ds->num_ports; port++) {
- if (!(ds->enabled_port_mask & (1 << port)))
+ if (!dsa_is_user_port(ds, port))
continue;
- if (!ds->ports[port].netdev)
+ if (!ds->ports[port].slave)
continue;
- dsa_slave_destroy(ds->ports[port].netdev);
+ dsa_slave_destroy(ds->ports[port].slave);
}
/* Disable configuration of the CPU and DSA ports */
for (port = 0; port < ds->num_ports; port++) {
if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
continue;
- dsa_cpu_dsa_destroy(&ds->ports[port]);
-
- /* Clearing a bit which is not set does no harm */
- ds->cpu_port_mask |= ~(1 << port);
- ds->dsa_port_mask |= ~(1 << port);
+ dsa_port_fixed_link_unregister_of(&ds->ports[port]);
}
if (ds->slave_mii_bus && ds->ops->phy_read)
@@ -600,15 +593,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
if (!configured)
return -EPROBE_DEFER;
- /*
- * If we use a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point on get
- * sent to the tag format's receive function.
- */
- wmb();
- dev->dsa_ptr = dst;
-
- return 0;
+ return dsa_master_setup(dst->cpu_dp->master, dst->cpu_dp);
}
static int dsa_probe(struct platform_device *pdev)
@@ -673,13 +658,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
{
int i;
- dst->cpu_dp->netdev->dsa_ptr = NULL;
-
- /* If we used a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point get sent
- * without the tag and go through the regular receive path.
- */
- wmb();
+ dsa_master_teardown(dst->cpu_dp->master);
for (i = 0; i < dst->pd->nr_chips; i++) {
struct dsa_switch *ds = dst->ds[i];
@@ -688,9 +667,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
dsa_switch_destroy(ds);
}
- dsa_cpu_port_ethtool_restore(dst->cpu_dp);
-
- dev_put(dst->cpu_dp->netdev);
+ dev_put(dst->cpu_dp->master);
}
static int dsa_remove(struct platform_device *pdev)
@@ -741,6 +718,26 @@ static int dsa_resume(struct device *d)
}
#endif
+/* legacy way, bypassing the bridge *****************************************/
+int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 vid,
+ u16 flags)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ return dsa_port_fdb_add(dp, addr, vid);
+}
+
+int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 vid)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ return dsa_port_fdb_del(dp, addr, vid);
+}
+
static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
static const struct of_device_id dsa_of_match_table[] = {
diff --git a/net/dsa/master.c b/net/dsa/master.c
new file mode 100644
index 000000000000..00589147f042
--- /dev/null
+++ b/net/dsa/master.c
@@ -0,0 +1,143 @@
+/*
+ * Handling of a master device, switching frames via its switch fabric CPU port
+ *
+ * Copyright (c) 2017 Savoir-faire Linux Inc.
+ * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "dsa_priv.h"
+
+static void dsa_master_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ uint64_t *data)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+ struct dsa_switch *ds = cpu_dp->ds;
+ int port = cpu_dp->index;
+ int count = 0;
+
+ if (ops && ops->get_sset_count && ops->get_ethtool_stats) {
+ count = ops->get_sset_count(dev, ETH_SS_STATS);
+ ops->get_ethtool_stats(dev, stats, data);
+ }
+
+ if (ds->ops->get_ethtool_stats)
+ ds->ops->get_ethtool_stats(ds, port, data + count);
+}
+
+static int dsa_master_get_sset_count(struct net_device *dev, int sset)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+ struct dsa_switch *ds = cpu_dp->ds;
+ int count = 0;
+
+ if (ops && ops->get_sset_count)
+ count += ops->get_sset_count(dev, sset);
+
+ if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
+ count += ds->ops->get_sset_count(ds);
+
+ return count;
+}
+
+static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
+ uint8_t *data)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+ struct dsa_switch *ds = cpu_dp->ds;
+ int port = cpu_dp->index;
+ int len = ETH_GSTRING_LEN;
+ int mcount = 0, count;
+ unsigned int i;
+ uint8_t pfx[4];
+ uint8_t *ndata;
+
+ snprintf(pfx, sizeof(pfx), "p%.2d", port);
+ /* We do not want to be NULL-terminated, since this is a prefix */
+ pfx[sizeof(pfx) - 1] = '_';
+
+ if (ops && ops->get_sset_count && ops->get_strings) {
+ mcount = ops->get_sset_count(dev, ETH_SS_STATS);
+ ops->get_strings(dev, stringset, data);
+ }
+
+ if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
+ ndata = data + mcount * len;
+ /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
+ * the output after to prepend our CPU port prefix we
+ * constructed earlier
+ */
+ ds->ops->get_strings(ds, port, ndata);
+ count = ds->ops->get_sset_count(ds);
+ for (i = 0; i < count; i++) {
+ memmove(ndata + (i * len + sizeof(pfx)),
+ ndata + i * len, len - sizeof(pfx));
+ memcpy(ndata + i * len, pfx, sizeof(pfx));
+ }
+ }
+}
+
+static int dsa_master_ethtool_setup(struct net_device *dev)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct dsa_switch *ds = cpu_dp->ds;
+ struct ethtool_ops *ops;
+
+ ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL);
+ if (!ops)
+ return -ENOMEM;
+
+ cpu_dp->orig_ethtool_ops = dev->ethtool_ops;
+ if (cpu_dp->orig_ethtool_ops)
+ memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops));
+
+ ops->get_sset_count = dsa_master_get_sset_count;
+ ops->get_ethtool_stats = dsa_master_get_ethtool_stats;
+ ops->get_strings = dsa_master_get_strings;
+
+ dev->ethtool_ops = ops;
+
+ return 0;
+}
+
+static void dsa_master_ethtool_teardown(struct net_device *dev)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+
+ dev->ethtool_ops = cpu_dp->orig_ethtool_ops;
+ cpu_dp->orig_ethtool_ops = NULL;
+}
+
+int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
+{
+ /* If we use a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point on get
+ * sent to the tag format's receive function.
+ */
+ wmb();
+
+ dev->dsa_ptr = cpu_dp;
+
+ return dsa_master_ethtool_setup(dev);
+}
+
+void dsa_master_teardown(struct net_device *dev)
+{
+ dsa_master_ethtool_teardown(dev);
+
+ dev->dsa_ptr = NULL;
+
+ /* If we used a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point get sent
+ * without the tag and go through the regular receive path.
+ */
+ wmb();
+}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index efc3bce3a89d..bb4be2679904 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -12,10 +12,12 @@
#include <linux/if_bridge.h>
#include <linux/notifier.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
#include "dsa_priv.h"
-static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v)
+static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
{
struct raw_notifier_head *nh = &dp->ds->dst->nh;
int err;
@@ -56,7 +58,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state,
return 0;
}
-void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
{
int err;
@@ -65,6 +67,35 @@ void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
}
+int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
+{
+ u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+ int err;
+
+ if (ds->ops->port_enable) {
+ err = ds->ops->port_enable(ds, port, phy);
+ if (err)
+ return err;
+ }
+
+ dsa_port_set_state_now(dp, stp_state);
+
+ return 0;
+}
+
+void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy)
+{
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+
+ dsa_port_set_state_now(dp, BR_STATE_DISABLED);
+
+ if (ds->ops->port_disable)
+ ds->ops->port_disable(ds, port, phy);
+}
+
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
{
struct dsa_notifier_bridge_info info = {
@@ -146,44 +177,45 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
}
-int dsa_port_fdb_add(struct dsa_port *dp,
- const struct switchdev_obj_port_fdb *fdb,
- struct switchdev_trans *trans)
+int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid)
{
struct dsa_notifier_fdb_info info = {
.sw_index = dp->ds->index,
.port = dp->index,
- .trans = trans,
- .fdb = fdb,
+ .addr = addr,
+ .vid = vid,
};
return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info);
}
-int dsa_port_fdb_del(struct dsa_port *dp,
- const struct switchdev_obj_port_fdb *fdb)
+int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid)
{
struct dsa_notifier_fdb_info info = {
.sw_index = dp->ds->index,
.port = dp->index,
- .fdb = fdb,
+ .addr = addr,
+ .vid = vid,
+
};
return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
}
-int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
- switchdev_obj_dump_cb_t *cb)
+int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data)
{
struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
- if (ds->ops->port_fdb_dump)
- return ds->ops->port_fdb_dump(ds, dp->index, fdb, cb);
+ if (!ds->ops->port_fdb_dump)
+ return -EOPNOTSUPP;
- return -EOPNOTSUPP;
+ return ds->ops->port_fdb_dump(ds, port, cb, data);
}
-int dsa_port_mdb_add(struct dsa_port *dp,
+int dsa_port_mdb_add(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb,
struct switchdev_trans *trans)
{
@@ -197,7 +229,7 @@ int dsa_port_mdb_add(struct dsa_port *dp,
return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info);
}
-int dsa_port_mdb_del(struct dsa_port *dp,
+int dsa_port_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb)
{
struct dsa_notifier_mdb_info info = {
@@ -209,17 +241,6 @@ int dsa_port_mdb_del(struct dsa_port *dp,
return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info);
}
-int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
- switchdev_obj_dump_cb_t *cb)
-{
- struct dsa_switch *ds = dp->ds;
-
- if (ds->ops->port_mdb_dump)
- return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb);
-
- return -EOPNOTSUPP;
-}
-
int dsa_port_vlan_add(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
struct switchdev_trans *trans)
@@ -231,7 +252,10 @@ int dsa_port_vlan_add(struct dsa_port *dp,
.vlan = vlan,
};
- return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
+ if (br_vlan_enabled(dp->bridge_dev))
+ return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
+
+ return 0;
}
int dsa_port_vlan_del(struct dsa_port *dp,
@@ -243,17 +267,53 @@ int dsa_port_vlan_del(struct dsa_port *dp,
.vlan = vlan,
};
- return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
+ if (br_vlan_enabled(dp->bridge_dev))
+ return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
+
+ return 0;
}
-int dsa_port_vlan_dump(struct dsa_port *dp,
- struct switchdev_obj_port_vlan *vlan,
- switchdev_obj_dump_cb_t *cb)
+int dsa_port_fixed_link_register_of(struct dsa_port *dp)
{
+ struct device_node *dn = dp->dn;
struct dsa_switch *ds = dp->ds;
+ struct phy_device *phydev;
+ int port = dp->index;
+ int mode;
+ int err;
- if (ds->ops->port_vlan_dump)
- return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb);
+ if (of_phy_is_fixed_link(dn)) {
+ err = of_phy_register_fixed_link(dn);
+ if (err) {
+ dev_err(ds->dev,
+ "failed to register the fixed PHY of port %d\n",
+ port);
+ return err;
+ }
+
+ phydev = of_phy_find_device(dn);
+
+ mode = of_get_phy_mode(dn);
+ if (mode < 0)
+ mode = PHY_INTERFACE_MODE_NA;
+ phydev->interface = mode;
+
+ genphy_config_init(phydev);
+ genphy_read_status(phydev);
+
+ if (ds->ops->adjust_link)
+ ds->ops->adjust_link(ds, port, phydev);
+
+ put_device(&phydev->mdio.dev);
+ }
+
+ return 0;
+}
+
+void dsa_port_fixed_link_unregister_of(struct dsa_port *dp)
+{
+ struct device_node *dn = dp->dn;
- return -EOPNOTSUPP;
+ if (of_phy_is_fixed_link(dn))
+ of_phy_deregister_fixed_link(dn);
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9507bd38cf04..d6e7a642493b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -55,7 +55,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
ds->slave_mii_bus->read = dsa_slave_phy_read;
ds->slave_mii_bus->write = dsa_slave_phy_write;
snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d",
- ds->dst->tree, ds->index);
+ ds->dst->index, ds->index);
ds->slave_mii_bus->parent = ds->dev;
ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
}
@@ -64,18 +64,13 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
/* slave device handling ****************************************************/
static int dsa_slave_get_iflink(const struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- return dsa_master_netdev(p)->ifindex;
+ return dsa_slave_to_master(dev)->ifindex;
}
static int dsa_slave_open(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
- struct dsa_switch *ds = dp->ds;
- struct net_device *master = dsa_master_netdev(p);
- u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
+ struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
if (!(master->flags & IFF_UP))
@@ -98,16 +93,12 @@ static int dsa_slave_open(struct net_device *dev)
goto clear_allmulti;
}
- if (ds->ops->port_enable) {
- err = ds->ops->port_enable(ds, p->dp->index, p->phy);
- if (err)
- goto clear_promisc;
- }
-
- dsa_port_set_state_now(p->dp, stp_state);
+ err = dsa_port_enable(dp, dev->phydev);
+ if (err)
+ goto clear_promisc;
- if (p->phy)
- phy_start(p->phy);
+ if (dev->phydev)
+ phy_start(dev->phydev);
return 0;
@@ -126,12 +117,13 @@ out:
static int dsa_slave_close(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
- struct dsa_switch *ds = p->dp->ds;
+ struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ if (dev->phydev)
+ phy_stop(dev->phydev);
- if (p->phy)
- phy_stop(p->phy);
+ dsa_port_disable(dp, dev->phydev);
dev_mc_unsync(master, dev);
dev_uc_unsync(master, dev);
@@ -143,18 +135,12 @@ static int dsa_slave_close(struct net_device *dev)
if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
dev_uc_del(master, dev->dev_addr);
- if (ds->ops->port_disable)
- ds->ops->port_disable(ds, p->dp->index, p->phy);
-
- dsa_port_set_state_now(p->dp, BR_STATE_DISABLED);
-
return 0;
}
static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
+ struct net_device *master = dsa_slave_to_master(dev);
if (change & IFF_ALLMULTI)
dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1);
@@ -164,8 +150,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
static void dsa_slave_set_rx_mode(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
+ struct net_device *master = dsa_slave_to_master(dev);
dev_mc_sync(master, dev);
dev_uc_sync(master, dev);
@@ -173,8 +158,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev)
static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
+ struct net_device *master = dsa_slave_to_master(dev);
struct sockaddr *addr = a;
int err;
@@ -199,22 +183,90 @@ out:
return 0;
}
-static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+struct dsa_slave_dump_ctx {
+ struct net_device *dev;
+ struct sk_buff *skb;
+ struct netlink_callback *cb;
+ int idx;
+};
+
+static int
+dsa_slave_port_fdb_do_dump(const unsigned char *addr, u16 vid,
+ bool is_static, void *data)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_slave_dump_ctx *dump = data;
+ u32 portid = NETLINK_CB(dump->cb->skb).portid;
+ u32 seq = dump->cb->nlh->nlmsg_seq;
+ struct nlmsghdr *nlh;
+ struct ndmsg *ndm;
+
+ if (dump->idx < dump->cb->args[2])
+ goto skip;
+
+ nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
+ sizeof(*ndm), NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ ndm = nlmsg_data(nlh);
+ ndm->ndm_family = AF_BRIDGE;
+ ndm->ndm_pad1 = 0;
+ ndm->ndm_pad2 = 0;
+ ndm->ndm_flags = NTF_SELF;
+ ndm->ndm_type = 0;
+ ndm->ndm_ifindex = dump->dev->ifindex;
+ ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
+
+ if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, addr))
+ goto nla_put_failure;
+
+ if (vid && nla_put_u16(dump->skb, NDA_VLAN, vid))
+ goto nla_put_failure;
+
+ nlmsg_end(dump->skb, nlh);
+
+skip:
+ dump->idx++;
+ return 0;
- if (p->phy != NULL)
- return phy_mii_ioctl(p->phy, ifr, cmd);
+nla_put_failure:
+ nlmsg_cancel(dump->skb, nlh);
+ return -EMSGSIZE;
+}
- return -EOPNOTSUPP;
+static int
+dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct net_device *dev, struct net_device *filter_dev,
+ int *idx)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_slave_dump_ctx dump = {
+ .dev = dev,
+ .skb = skb,
+ .cb = cb,
+ .idx = *idx,
+ };
+ int err;
+
+ err = dsa_port_fdb_dump(dp, dsa_slave_port_fdb_do_dump, &dump);
+ *idx = dump.idx;
+
+ return err;
+}
+
+static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ if (!dev->phydev)
+ return -ENODEV;
+
+ return phy_mii_ioctl(dev->phydev, ifr, cmd);
}
static int dsa_slave_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct switchdev_trans *trans)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int ret;
switch (attr->id) {
@@ -240,8 +292,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct switchdev_trans *trans)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
/* For the prepare phase, ensure the full set of changes is feasable in
@@ -250,12 +301,16 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
*/
switch (obj->id) {
- case SWITCHDEV_OBJ_ID_PORT_FDB:
- err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj), trans);
- break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
break;
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ /* DSA can directly translate this to a normal MDB add,
+ * but on the CPU port.
+ */
+ err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj),
+ trans);
+ break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj),
trans);
@@ -271,45 +326,21 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
static int dsa_slave_port_obj_del(struct net_device *dev,
const struct switchdev_obj *obj)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
switch (obj->id) {
- case SWITCHDEV_OBJ_ID_PORT_FDB:
- err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj));
- break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
- case SWITCHDEV_OBJ_ID_PORT_VLAN:
- err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
- break;
- default:
- err = -EOPNOTSUPP;
- break;
- }
-
- return err;
-}
-
-static int dsa_slave_port_obj_dump(struct net_device *dev,
- struct switchdev_obj *obj,
- switchdev_obj_dump_cb_t *cb)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
- int err;
-
- switch (obj->id) {
- case SWITCHDEV_OBJ_ID_PORT_FDB:
- err = dsa_port_fdb_dump(dp, SWITCHDEV_OBJ_PORT_FDB(obj), cb);
- break;
- case SWITCHDEV_OBJ_ID_PORT_MDB:
- err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb);
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ /* DSA can directly translate this to a normal MDB add,
+ * but on the CPU port.
+ */
+ err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- err = dsa_port_vlan_dump(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), cb);
+ err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
break;
default:
err = -EOPNOTSUPP;
@@ -322,13 +353,17 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
static int dsa_slave_port_attr_get(struct net_device *dev,
struct switchdev_attr *attr)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst = ds->dst;
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
- attr->u.ppid.id_len = sizeof(ds->index);
- memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len);
+ attr->u.ppid.id_len = sizeof(dst->index);
+ memcpy(&attr->u.ppid.id, &dst->index, attr->u.ppid.id_len);
+ break;
+ case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
+ attr->u.brport_flags_support = 0;
break;
default:
return -EOPNOTSUPP;
@@ -337,10 +372,12 @@ static int dsa_slave_port_attr_get(struct net_device *dev,
return 0;
}
-static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p,
- struct sk_buff *skb)
+static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
+ struct sk_buff *skb)
{
#ifdef CONFIG_NET_POLL_CONTROLLER
+ struct dsa_slave_priv *p = netdev_priv(dev);
+
if (p->netpoll)
netpoll_send_skb(p->netpoll, skb);
#else
@@ -352,10 +389,14 @@ static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p,
static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
+ struct pcpu_sw_netstats *s;
struct sk_buff *nskb;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
+ s = this_cpu_ptr(p->stats64);
+ u64_stats_update_begin(&s->syncp);
+ s->tx_packets++;
+ s->tx_bytes += skb->len;
+ u64_stats_update_end(&s->syncp);
/* Transmit function may have to reallocate the original SKB,
* in which case it must have freed it. Only free it here on error.
@@ -370,43 +411,18 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
* tag to be successfully transmitted
*/
if (unlikely(netpoll_tx_running(dev)))
- return dsa_netpoll_send_skb(p, nskb);
+ return dsa_slave_netpoll_send_skb(dev, nskb);
/* Queue the SKB for transmission on the parent interface, but
* do not modify its EtherType
*/
- nskb->dev = dsa_master_netdev(p);
+ nskb->dev = dsa_slave_to_master(dev);
dev_queue_xmit(nskb);
return NETDEV_TX_OK;
}
/* ethtool operations *******************************************************/
-static int
-dsa_slave_get_link_ksettings(struct net_device *dev,
- struct ethtool_link_ksettings *cmd)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- if (!p->phy)
- return -EOPNOTSUPP;
-
- phy_ethtool_ksettings_get(p->phy, cmd);
-
- return 0;
-}
-
-static int
-dsa_slave_set_link_ksettings(struct net_device *dev,
- const struct ethtool_link_ksettings *cmd)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- if (p->phy != NULL)
- return phy_ethtool_ksettings_set(p->phy, cmd);
-
- return -EOPNOTSUPP;
-}
static void dsa_slave_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
@@ -418,11 +434,11 @@ static void dsa_slave_get_drvinfo(struct net_device *dev,
static int dsa_slave_get_regs_len(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->get_regs_len)
- return ds->ops->get_regs_len(ds, p->dp->index);
+ return ds->ops->get_regs_len(ds, dp->index);
return -EOPNOTSUPP;
}
@@ -430,39 +446,27 @@ static int dsa_slave_get_regs_len(struct net_device *dev)
static void
dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->get_regs)
- ds->ops->get_regs(ds, p->dp->index, regs, _p);
-}
-
-static int dsa_slave_nway_reset(struct net_device *dev)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- if (p->phy != NULL)
- return genphy_restart_aneg(p->phy);
-
- return -EOPNOTSUPP;
+ ds->ops->get_regs(ds, dp->index, regs, _p);
}
static u32 dsa_slave_get_link(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ if (!dev->phydev)
+ return -ENODEV;
- if (p->phy != NULL) {
- genphy_update_link(p->phy);
- return p->phy->link;
- }
+ genphy_update_link(dev->phydev);
- return -EOPNOTSUPP;
+ return dev->phydev->link;
}
static int dsa_slave_get_eeprom_len(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->cd && ds->cd->eeprom_len)
return ds->cd->eeprom_len;
@@ -476,8 +480,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
static int dsa_slave_get_eeprom(struct net_device *dev,
struct ethtool_eeprom *eeprom, u8 *data)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->get_eeprom)
return ds->ops->get_eeprom(ds, eeprom, data);
@@ -488,8 +492,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev,
static int dsa_slave_set_eeprom(struct net_device *dev,
struct ethtool_eeprom *eeprom, u8 *data)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->set_eeprom)
return ds->ops->set_eeprom(ds, eeprom, data);
@@ -500,8 +504,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev,
static void dsa_slave_get_strings(struct net_device *dev,
uint32_t stringset, uint8_t *data)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (stringset == ETH_SS_STATS) {
int len = ETH_GSTRING_LEN;
@@ -511,80 +515,7 @@ static void dsa_slave_get_strings(struct net_device *dev,
strncpy(data + 2 * len, "rx_packets", len);
strncpy(data + 3 * len, "rx_bytes", len);
if (ds->ops->get_strings)
- ds->ops->get_strings(ds, p->dp->index, data + 4 * len);
- }
-}
-
-static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
- struct ethtool_stats *stats,
- uint64_t *data)
-{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
- s8 cpu_port = cpu_dp->index;
- int count = 0;
-
- if (cpu_dp->ethtool_ops.get_sset_count) {
- count = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS);
- cpu_dp->ethtool_ops.get_ethtool_stats(dev, stats, data);
- }
-
- if (ds->ops->get_ethtool_stats)
- ds->ops->get_ethtool_stats(ds, cpu_port, data + count);
-}
-
-static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
-{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
- int count = 0;
-
- if (cpu_dp->ethtool_ops.get_sset_count)
- count += cpu_dp->ethtool_ops.get_sset_count(dev, sset);
-
- if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
- count += ds->ops->get_sset_count(ds);
-
- return count;
-}
-
-static void dsa_cpu_port_get_strings(struct net_device *dev,
- uint32_t stringset, uint8_t *data)
-{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
- s8 cpu_port = cpu_dp->index;
- int len = ETH_GSTRING_LEN;
- int mcount = 0, count;
- unsigned int i;
- uint8_t pfx[4];
- uint8_t *ndata;
-
- snprintf(pfx, sizeof(pfx), "p%.2d", cpu_port);
- /* We do not want to be NULL-terminated, since this is a prefix */
- pfx[sizeof(pfx) - 1] = '_';
-
- if (cpu_dp->ethtool_ops.get_sset_count) {
- mcount = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS);
- cpu_dp->ethtool_ops.get_strings(dev, stringset, data);
- }
-
- if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
- ndata = data + mcount * len;
- /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
- * the output after to prepend our CPU port prefix we
- * constructed earlier
- */
- ds->ops->get_strings(ds, cpu_port, ndata);
- count = ds->ops->get_sset_count(ds);
- for (i = 0; i < count; i++) {
- memmove(ndata + (i * len + sizeof(pfx)),
- ndata + i * len, len - sizeof(pfx));
- memcpy(ndata + i * len, pfx, sizeof(pfx));
- }
+ ds->ops->get_strings(ds, dp->index, data + 4 * len);
}
}
@@ -592,21 +523,37 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats,
uint64_t *data)
{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- data[0] = dev->stats.tx_packets;
- data[1] = dev->stats.tx_bytes;
- data[2] = dev->stats.rx_packets;
- data[3] = dev->stats.rx_bytes;
+ struct dsa_switch *ds = dp->ds;
+ struct pcpu_sw_netstats *s;
+ unsigned int start;
+ int i;
+
+ for_each_possible_cpu(i) {
+ u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+
+ s = per_cpu_ptr(p->stats64, i);
+ do {
+ start = u64_stats_fetch_begin_irq(&s->syncp);
+ tx_packets = s->tx_packets;
+ tx_bytes = s->tx_bytes;
+ rx_packets = s->rx_packets;
+ rx_bytes = s->rx_bytes;
+ } while (u64_stats_fetch_retry_irq(&s->syncp, start));
+ data[0] += tx_packets;
+ data[1] += tx_bytes;
+ data[2] += rx_packets;
+ data[3] += rx_bytes;
+ }
if (ds->ops->get_ethtool_stats)
- ds->ops->get_ethtool_stats(ds, p->dp->index, data + 4);
+ ds->ops->get_ethtool_stats(ds, dp->index, data + 4);
}
static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (sset == ETH_SS_STATS) {
int count;
@@ -623,69 +570,77 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->get_wol)
- ds->ops->get_wol(ds, p->dp->index, w);
+ ds->ops->get_wol(ds, dp->index, w);
}
static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
int ret = -EOPNOTSUPP;
if (ds->ops->set_wol)
- ret = ds->ops->set_wol(ds, p->dp->index, w);
+ ret = ds->ops->set_wol(ds, dp->index, w);
return ret;
}
static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
int ret;
- if (!ds->ops->set_eee)
+ /* Port's PHY and MAC both need to be EEE capable */
+ if (!dev->phydev)
+ return -ENODEV;
+
+ if (!ds->ops->set_mac_eee)
return -EOPNOTSUPP;
- ret = ds->ops->set_eee(ds, p->dp->index, p->phy, e);
+ ret = ds->ops->set_mac_eee(ds, dp->index, e);
if (ret)
return ret;
- if (p->phy)
- ret = phy_ethtool_set_eee(p->phy, e);
+ if (e->eee_enabled) {
+ ret = phy_init_eee(dev->phydev, 0);
+ if (ret)
+ return ret;
+ }
- return ret;
+ return phy_ethtool_set_eee(dev->phydev, e);
}
static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
int ret;
- if (!ds->ops->get_eee)
+ /* Port's PHY and MAC both need to be EEE capable */
+ if (!dev->phydev)
+ return -ENODEV;
+
+ if (!ds->ops->get_mac_eee)
return -EOPNOTSUPP;
- ret = ds->ops->get_eee(ds, p->dp->index, e);
+ ret = ds->ops->get_mac_eee(ds, dp->index, e);
if (ret)
return ret;
- if (p->phy)
- ret = phy_ethtool_get_eee(p->phy, e);
-
- return ret;
+ return phy_ethtool_get_eee(dev->phydev, e);
}
#ifdef CONFIG_NET_POLL_CONTROLLER
static int dsa_slave_netpoll_setup(struct net_device *dev,
struct netpoll_info *ni)
{
+ struct net_device *master = dsa_slave_to_master(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
struct netpoll *netpoll;
int err = 0;
@@ -725,18 +680,18 @@ static void dsa_slave_poll_controller(struct net_device *dev)
static int dsa_slave_get_phys_port_name(struct net_device *dev,
char *name, size_t len)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
- if (snprintf(name, len, "p%d", p->dp->index) >= len)
+ if (snprintf(name, len, "p%d", dp->index) >= len)
return -EINVAL;
return 0;
}
static struct dsa_mall_tc_entry *
-dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p,
- unsigned long cookie)
+dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_mall_tc_entry *mall_tc_entry;
list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list)
@@ -747,17 +702,18 @@ dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p,
}
static int dsa_slave_add_cls_matchall(struct net_device *dev,
- __be16 protocol,
struct tc_cls_matchall_offload *cls,
bool ingress)
{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_mall_tc_entry *mall_tc_entry;
- struct dsa_switch *ds = p->dp->ds;
+ __be16 protocol = cls->common.protocol;
struct net *net = dev_net(dev);
- struct dsa_slave_priv *to_p;
+ struct dsa_switch *ds = dp->ds;
struct net_device *to_dev;
const struct tc_action *a;
+ struct dsa_port *to_dp;
int err = -EOPNOTSUPP;
LIST_HEAD(actions);
int ifindex;
@@ -765,7 +721,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
if (!ds->ops->port_mirror_add)
return err;
- if (!tc_single_action(cls->exts))
+ if (!tcf_exts_has_one_action(cls->exts))
return err;
tcf_exts_to_list(cls->exts, &actions);
@@ -790,13 +746,12 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
mall_tc_entry->type = DSA_PORT_MALL_MIRROR;
mirror = &mall_tc_entry->mirror;
- to_p = netdev_priv(to_dev);
+ to_dp = dsa_slave_to_port(to_dev);
- mirror->to_local_port = to_p->dp->index;
+ mirror->to_local_port = to_dp->index;
mirror->ingress = ingress;
- err = ds->ops->port_mirror_add(ds, p->dp->index, mirror,
- ingress);
+ err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress);
if (err) {
kfree(mall_tc_entry);
return err;
@@ -811,14 +766,14 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
static void dsa_slave_del_cls_matchall(struct net_device *dev,
struct tc_cls_matchall_offload *cls)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_mall_tc_entry *mall_tc_entry;
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_switch *ds = dp->ds;
if (!ds->ops->port_mirror_del)
return;
- mall_tc_entry = dsa_slave_mall_tc_entry_find(p, cls->cookie);
+ mall_tc_entry = dsa_slave_mall_tc_entry_find(dev, cls->cookie);
if (!mall_tc_entry)
return;
@@ -826,8 +781,7 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
switch (mall_tc_entry->type) {
case DSA_PORT_MALL_MIRROR:
- ds->ops->port_mirror_del(ds, p->dp->index,
- &mall_tc_entry->mirror);
+ ds->ops->port_mirror_del(ds, dp->index, &mall_tc_entry->mirror);
break;
default:
WARN_ON(1);
@@ -836,67 +790,143 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
kfree(mall_tc_entry);
}
-static int dsa_slave_setup_tc(struct net_device *dev, u32 handle,
- u32 chain_index, __be16 protocol,
- struct tc_to_netdev *tc)
+static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
+ struct tc_cls_matchall_offload *cls,
+ bool ingress)
+{
+ if (cls->common.chain_index)
+ return -EOPNOTSUPP;
+
+ switch (cls->command) {
+ case TC_CLSMATCHALL_REPLACE:
+ return dsa_slave_add_cls_matchall(dev, cls, ingress);
+ case TC_CLSMATCHALL_DESTROY:
+ dsa_slave_del_cls_matchall(dev, cls);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv, bool ingress)
{
- bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS);
+ struct net_device *dev = cb_priv;
- if (chain_index)
+ if (!tc_can_offload(dev))
return -EOPNOTSUPP;
- switch (tc->type) {
- case TC_SETUP_MATCHALL:
- switch (tc->cls_mall->command) {
- case TC_CLSMATCHALL_REPLACE:
- return dsa_slave_add_cls_matchall(dev, protocol,
- tc->cls_mall,
- ingress);
- case TC_CLSMATCHALL_DESTROY:
- dsa_slave_del_cls_matchall(dev, tc->cls_mall);
- return 0;
- }
+ switch (type) {
+ case TC_SETUP_CLSMATCHALL:
+ return dsa_slave_setup_tc_cls_matchall(dev, type_data, ingress);
default:
return -EOPNOTSUPP;
}
}
-void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
+static int dsa_slave_setup_tc_block_cb_ig(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
+{
+ return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, true);
+}
+
+static int dsa_slave_setup_tc_block_cb_eg(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
{
- ops->get_sset_count = dsa_cpu_port_get_sset_count;
- ops->get_ethtool_stats = dsa_cpu_port_get_ethtool_stats;
- ops->get_strings = dsa_cpu_port_get_strings;
+ return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, false);
+}
+
+static int dsa_slave_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+{
+ tc_setup_cb_t *cb;
+
+ if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ cb = dsa_slave_setup_tc_block_cb_ig;
+ else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+ cb = dsa_slave_setup_tc_block_cb_eg;
+ else
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, cb, dev, dev);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, cb, dev);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return dsa_slave_setup_tc_block(dev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void dsa_slave_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct pcpu_sw_netstats *s;
+ unsigned int start;
+ int i;
+
+ netdev_stats_to_stats64(stats, &dev->stats);
+ for_each_possible_cpu(i) {
+ u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+
+ s = per_cpu_ptr(p->stats64, i);
+ do {
+ start = u64_stats_fetch_begin_irq(&s->syncp);
+ tx_packets = s->tx_packets;
+ tx_bytes = s->tx_bytes;
+ rx_packets = s->rx_packets;
+ rx_bytes = s->rx_bytes;
+ } while (u64_stats_fetch_retry_irq(&s->syncp, start));
+
+ stats->tx_packets += tx_packets;
+ stats->tx_bytes += tx_bytes;
+ stats->rx_packets += rx_packets;
+ stats->rx_bytes += rx_bytes;
+ }
}
static int dsa_slave_get_rxnfc(struct net_device *dev,
struct ethtool_rxnfc *nfc, u32 *rule_locs)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (!ds->ops->get_rxnfc)
return -EOPNOTSUPP;
- return ds->ops->get_rxnfc(ds, p->dp->index, nfc, rule_locs);
+ return ds->ops->get_rxnfc(ds, dp->index, nfc, rule_locs);
}
static int dsa_slave_set_rxnfc(struct net_device *dev,
struct ethtool_rxnfc *nfc)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (!ds->ops->set_rxnfc)
return -EOPNOTSUPP;
- return ds->ops->set_rxnfc(ds, p->dp->index, nfc);
+ return ds->ops->set_rxnfc(ds, dp->index, nfc);
}
static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_drvinfo = dsa_slave_get_drvinfo,
.get_regs_len = dsa_slave_get_regs_len,
.get_regs = dsa_slave_get_regs,
- .nway_reset = dsa_slave_nway_reset,
+ .nway_reset = phy_ethtool_nway_reset,
.get_link = dsa_slave_get_link,
.get_eeprom_len = dsa_slave_get_eeprom_len,
.get_eeprom = dsa_slave_get_eeprom,
@@ -908,8 +938,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_wol = dsa_slave_get_wol,
.set_eee = dsa_slave_set_eee,
.get_eee = dsa_slave_get_eee,
- .get_link_ksettings = dsa_slave_get_link_ksettings,
- .set_link_ksettings = dsa_slave_set_link_ksettings,
+ .get_link_ksettings = phy_ethtool_get_link_ksettings,
+ .set_link_ksettings = phy_ethtool_set_link_ksettings,
.get_rxnfc = dsa_slave_get_rxnfc,
.set_rxnfc = dsa_slave_set_rxnfc,
};
@@ -921,9 +951,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_change_rx_flags = dsa_slave_change_rx_flags,
.ndo_set_rx_mode = dsa_slave_set_rx_mode,
.ndo_set_mac_address = dsa_slave_set_mac_address,
- .ndo_fdb_add = switchdev_port_fdb_add,
- .ndo_fdb_del = switchdev_port_fdb_del,
- .ndo_fdb_dump = switchdev_port_fdb_dump,
+ .ndo_fdb_add = dsa_legacy_fdb_add,
+ .ndo_fdb_del = dsa_legacy_fdb_del,
+ .ndo_fdb_dump = dsa_slave_fdb_dump,
.ndo_do_ioctl = dsa_slave_ioctl,
.ndo_get_iflink = dsa_slave_get_iflink,
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -931,11 +961,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_netpoll_cleanup = dsa_slave_netpoll_cleanup,
.ndo_poll_controller = dsa_slave_poll_controller,
#endif
- .ndo_bridge_getlink = switchdev_port_bridge_getlink,
- .ndo_bridge_setlink = switchdev_port_bridge_setlink,
- .ndo_bridge_dellink = switchdev_port_bridge_dellink,
.ndo_get_phys_port_name = dsa_slave_get_phys_port_name,
.ndo_setup_tc = dsa_slave_setup_tc,
+ .ndo_get_stats64 = dsa_slave_get_stats64,
};
static const struct switchdev_ops dsa_slave_switchdev_ops = {
@@ -943,7 +971,6 @@ static const struct switchdev_ops dsa_slave_switchdev_ops = {
.switchdev_port_attr_set = dsa_slave_port_attr_set,
.switchdev_port_obj_add = dsa_slave_port_obj_add,
.switchdev_port_obj_del = dsa_slave_port_obj_del,
- .switchdev_port_obj_dump = dsa_slave_port_obj_dump,
};
static struct device_type dsa_type = {
@@ -952,78 +979,81 @@ static struct device_type dsa_type = {
static void dsa_slave_adjust_link(struct net_device *dev)
{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_switch *ds = dp->ds;
unsigned int status_changed = 0;
- if (p->old_link != p->phy->link) {
+ if (p->old_link != dev->phydev->link) {
status_changed = 1;
- p->old_link = p->phy->link;
+ p->old_link = dev->phydev->link;
}
- if (p->old_duplex != p->phy->duplex) {
+ if (p->old_duplex != dev->phydev->duplex) {
status_changed = 1;
- p->old_duplex = p->phy->duplex;
+ p->old_duplex = dev->phydev->duplex;
}
- if (p->old_pause != p->phy->pause) {
+ if (p->old_pause != dev->phydev->pause) {
status_changed = 1;
- p->old_pause = p->phy->pause;
+ p->old_pause = dev->phydev->pause;
}
if (ds->ops->adjust_link && status_changed)
- ds->ops->adjust_link(ds, p->dp->index, p->phy);
+ ds->ops->adjust_link(ds, dp->index, dev->phydev);
if (status_changed)
- phy_print_status(p->phy);
+ phy_print_status(dev->phydev);
}
static int dsa_slave_fixed_link_update(struct net_device *dev,
struct fixed_phy_status *status)
{
- struct dsa_slave_priv *p;
struct dsa_switch *ds;
+ struct dsa_port *dp;
if (dev) {
- p = netdev_priv(dev);
- ds = p->dp->ds;
+ dp = dsa_slave_to_port(dev);
+ ds = dp->ds;
if (ds->ops->fixed_link_update)
- ds->ops->fixed_link_update(ds, p->dp->index, status);
+ ds->ops->fixed_link_update(ds, dp->index, status);
}
return 0;
}
/* slave device setup *******************************************************/
-static int dsa_slave_phy_connect(struct dsa_slave_priv *p,
- struct net_device *slave_dev,
- int addr)
+static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr)
{
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(slave_dev);
+ struct dsa_slave_priv *p = netdev_priv(slave_dev);
+ struct dsa_switch *ds = dp->ds;
- p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr);
- if (!p->phy) {
+ slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr);
+ if (!slave_dev->phydev) {
netdev_err(slave_dev, "no phy at %d\n", addr);
return -ENODEV;
}
/* Use already configured phy mode */
if (p->phy_interface == PHY_INTERFACE_MODE_NA)
- p->phy_interface = p->phy->interface;
- return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
- p->phy_interface);
+ p->phy_interface = slave_dev->phydev->interface;
+
+ return phy_connect_direct(slave_dev, slave_dev->phydev,
+ dsa_slave_adjust_link, p->phy_interface);
}
-static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
- struct net_device *slave_dev)
+static int dsa_slave_phy_setup(struct net_device *slave_dev)
{
- struct dsa_switch *ds = p->dp->ds;
- struct device_node *phy_dn, *port_dn;
+ struct dsa_port *dp = dsa_slave_to_port(slave_dev);
+ struct dsa_slave_priv *p = netdev_priv(slave_dev);
+ struct device_node *port_dn = dp->dn;
+ struct dsa_switch *ds = dp->ds;
+ struct device_node *phy_dn;
bool phy_is_fixed = false;
u32 phy_flags = 0;
int mode, ret;
- port_dn = p->dp->dn;
mode = of_get_phy_mode(port_dn);
if (mode < 0)
mode = PHY_INTERFACE_MODE_NA;
@@ -1044,52 +1074,35 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
}
if (ds->ops->get_phy_flags)
- phy_flags = ds->ops->get_phy_flags(ds, p->dp->index);
+ phy_flags = ds->ops->get_phy_flags(ds, dp->index);
if (phy_dn) {
- int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn);
-
- /* If this PHY address is part of phys_mii_mask, which means
- * that we need to divert reads and writes to/from it, then we
- * want to bind this device using the slave MII bus created by
- * DSA to make that happen.
- */
- if (!phy_is_fixed && phy_id >= 0 &&
- (ds->phys_mii_mask & (1 << phy_id))) {
- ret = dsa_slave_phy_connect(p, slave_dev, phy_id);
- if (ret) {
- netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret);
- of_node_put(phy_dn);
- return ret;
- }
- } else {
- p->phy = of_phy_connect(slave_dev, phy_dn,
- dsa_slave_adjust_link,
- phy_flags,
- p->phy_interface);
- }
-
+ slave_dev->phydev = of_phy_connect(slave_dev, phy_dn,
+ dsa_slave_adjust_link,
+ phy_flags,
+ p->phy_interface);
of_node_put(phy_dn);
}
- if (p->phy && phy_is_fixed)
- fixed_phy_set_link_update(p->phy, dsa_slave_fixed_link_update);
+ if (slave_dev->phydev && phy_is_fixed)
+ fixed_phy_set_link_update(slave_dev->phydev,
+ dsa_slave_fixed_link_update);
/* We could not connect to a designated PHY, so use the switch internal
* MDIO bus instead
*/
- if (!p->phy) {
- ret = dsa_slave_phy_connect(p, slave_dev, p->dp->index);
+ if (!slave_dev->phydev) {
+ ret = dsa_slave_phy_connect(slave_dev, dp->index);
if (ret) {
netdev_err(slave_dev, "failed to connect to port %d: %d\n",
- p->dp->index, ret);
+ dp->index, ret);
if (phy_is_fixed)
of_phy_deregister_fixed_link(port_dn);
return ret;
}
}
- phy_attached_info(p->phy);
+ phy_attached_info(slave_dev->phydev);
return 0;
}
@@ -1109,12 +1122,12 @@ int dsa_slave_suspend(struct net_device *slave_dev)
netif_device_detach(slave_dev);
- if (p->phy) {
- phy_stop(p->phy);
+ if (slave_dev->phydev) {
+ phy_stop(slave_dev->phydev);
p->old_pause = -1;
p->old_link = -1;
p->old_duplex = -1;
- phy_suspend(p->phy);
+ phy_suspend(slave_dev->phydev);
}
return 0;
@@ -1122,33 +1135,46 @@ int dsa_slave_suspend(struct net_device *slave_dev)
int dsa_slave_resume(struct net_device *slave_dev)
{
- struct dsa_slave_priv *p = netdev_priv(slave_dev);
-
netif_device_attach(slave_dev);
- if (p->phy) {
- phy_resume(p->phy);
- phy_start(p->phy);
+ if (slave_dev->phydev) {
+ phy_resume(slave_dev->phydev);
+ phy_start(slave_dev->phydev);
}
return 0;
}
-int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
- int port, const char *name)
+static void dsa_slave_notify(struct net_device *dev, unsigned long val)
{
- struct dsa_switch_tree *dst = ds->dst;
- struct net_device *master;
+ struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_notifier_register_info rinfo = {
+ .switch_number = dp->ds->index,
+ .port_number = dp->index,
+ .master = master,
+ .info.dev = dev,
+ };
+
+ call_dsa_notifiers(val, dev, &rinfo.info);
+}
+
+int dsa_slave_create(struct dsa_port *port)
+{
+ const struct dsa_port *cpu_dp = port->cpu_dp;
+ struct net_device *master = cpu_dp->master;
+ struct dsa_switch *ds = port->ds;
+ const char *name = port->name;
struct net_device *slave_dev;
struct dsa_slave_priv *p;
- struct dsa_port *cpu_dp;
int ret;
- cpu_dp = ds->dst->cpu_dp;
- master = cpu_dp->netdev;
+ if (!ds->num_tx_queues)
+ ds->num_tx_queues = 1;
- slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name,
- NET_NAME_UNKNOWN, ether_setup);
+ slave_dev = alloc_netdev_mqs(sizeof(struct dsa_slave_priv), name,
+ NET_NAME_UNKNOWN, ether_setup,
+ ds->num_tx_queues, 1);
if (slave_dev == NULL)
return -ENOMEM;
@@ -1166,57 +1192,72 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
NULL);
- SET_NETDEV_DEV(slave_dev, parent);
- slave_dev->dev.of_node = ds->ports[port].dn;
+ SET_NETDEV_DEV(slave_dev, port->ds->dev);
+ slave_dev->dev.of_node = port->dn;
slave_dev->vlan_features = master->vlan_features;
p = netdev_priv(slave_dev);
- p->dp = &ds->ports[port];
+ p->stats64 = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!p->stats64) {
+ free_netdev(slave_dev);
+ return -ENOMEM;
+ }
+ p->dp = port;
INIT_LIST_HEAD(&p->mall_tc_list);
- p->xmit = dst->tag_ops->xmit;
+ p->xmit = cpu_dp->tag_ops->xmit;
p->old_pause = -1;
p->old_link = -1;
p->old_duplex = -1;
- ds->ports[port].netdev = slave_dev;
- ret = register_netdev(slave_dev);
- if (ret) {
- netdev_err(master, "error %d registering interface %s\n",
- ret, slave_dev->name);
- ds->ports[port].netdev = NULL;
- free_netdev(slave_dev);
- return ret;
- }
+ port->slave = slave_dev;
netif_carrier_off(slave_dev);
- ret = dsa_slave_phy_setup(p, slave_dev);
+ ret = dsa_slave_phy_setup(slave_dev);
if (ret) {
netdev_err(master, "error %d setting up slave phy\n", ret);
- unregister_netdev(slave_dev);
- free_netdev(slave_dev);
- return ret;
+ goto out_free;
+ }
+
+ dsa_slave_notify(slave_dev, DSA_PORT_REGISTER);
+
+ ret = register_netdev(slave_dev);
+ if (ret) {
+ netdev_err(master, "error %d registering interface %s\n",
+ ret, slave_dev->name);
+ goto out_phy;
}
return 0;
+
+out_phy:
+ phy_disconnect(slave_dev->phydev);
+ if (of_phy_is_fixed_link(port->dn))
+ of_phy_deregister_fixed_link(port->dn);
+out_free:
+ free_percpu(p->stats64);
+ free_netdev(slave_dev);
+ port->slave = NULL;
+ return ret;
}
void dsa_slave_destroy(struct net_device *slave_dev)
{
+ struct dsa_port *dp = dsa_slave_to_port(slave_dev);
struct dsa_slave_priv *p = netdev_priv(slave_dev);
- struct device_node *port_dn;
-
- port_dn = p->dp->dn;
+ struct device_node *port_dn = dp->dn;
netif_carrier_off(slave_dev);
- if (p->phy) {
- phy_disconnect(p->phy);
+ if (slave_dev->phydev) {
+ phy_disconnect(slave_dev->phydev);
if (of_phy_is_fixed_link(port_dn))
of_phy_deregister_fixed_link(port_dn);
}
+ dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
unregister_netdev(slave_dev);
+ free_percpu(p->stats64);
free_netdev(slave_dev);
}
@@ -1228,8 +1269,7 @@ static bool dsa_slave_dev_check(struct net_device *dev)
static int dsa_slave_changeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int err = NOTIFY_DONE;
if (netif_is_bridge_master(info->upper_dev)) {
@@ -1250,7 +1290,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- if (dev->netdev_ops != &dsa_slave_netdev_ops)
+ if (!dsa_slave_dev_check(dev))
return NOTIFY_DONE;
if (event == NETDEV_CHANGEUPPER)
@@ -1259,19 +1299,142 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
return NOTIFY_DONE;
}
+struct dsa_switchdev_event_work {
+ struct work_struct work;
+ struct switchdev_notifier_fdb_info fdb_info;
+ struct net_device *dev;
+ unsigned long event;
+};
+
+static void dsa_slave_switchdev_event_work(struct work_struct *work)
+{
+ struct dsa_switchdev_event_work *switchdev_work =
+ container_of(work, struct dsa_switchdev_event_work, work);
+ struct net_device *dev = switchdev_work->dev;
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ int err;
+
+ rtnl_lock();
+ switch (switchdev_work->event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ fdb_info = &switchdev_work->fdb_info;
+ err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid);
+ if (err) {
+ netdev_dbg(dev, "fdb add failed err=%d\n", err);
+ break;
+ }
+ call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
+ &fdb_info->info);
+ break;
+
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ fdb_info = &switchdev_work->fdb_info;
+ err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid);
+ if (err) {
+ netdev_dbg(dev, "fdb del failed err=%d\n", err);
+ dev_close(dev);
+ }
+ break;
+ }
+ rtnl_unlock();
+
+ kfree(switchdev_work->fdb_info.addr);
+ kfree(switchdev_work);
+ dev_put(dev);
+}
+
+static int
+dsa_slave_switchdev_fdb_work_init(struct dsa_switchdev_event_work *
+ switchdev_work,
+ const struct switchdev_notifier_fdb_info *
+ fdb_info)
+{
+ memcpy(&switchdev_work->fdb_info, fdb_info,
+ sizeof(switchdev_work->fdb_info));
+ switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+ if (!switchdev_work->fdb_info.addr)
+ return -ENOMEM;
+ ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
+ fdb_info->addr);
+ return 0;
+}
+
+/* Called under rcu_read_lock() */
+static int dsa_slave_switchdev_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ struct dsa_switchdev_event_work *switchdev_work;
+
+ if (!dsa_slave_dev_check(dev))
+ return NOTIFY_DONE;
+
+ switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
+ if (!switchdev_work)
+ return NOTIFY_BAD;
+
+ INIT_WORK(&switchdev_work->work,
+ dsa_slave_switchdev_event_work);
+ switchdev_work->dev = dev;
+ switchdev_work->event = event;
+
+ switch (event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ if (dsa_slave_switchdev_fdb_work_init(switchdev_work,
+ ptr))
+ goto err_fdb_work_init;
+ dev_hold(dev);
+ break;
+ default:
+ kfree(switchdev_work);
+ return NOTIFY_DONE;
+ }
+
+ dsa_schedule_work(&switchdev_work->work);
+ return NOTIFY_OK;
+
+err_fdb_work_init:
+ kfree(switchdev_work);
+ return NOTIFY_BAD;
+}
+
static struct notifier_block dsa_slave_nb __read_mostly = {
- .notifier_call = dsa_slave_netdevice_event,
+ .notifier_call = dsa_slave_netdevice_event,
+};
+
+static struct notifier_block dsa_slave_switchdev_notifier = {
+ .notifier_call = dsa_slave_switchdev_event,
};
int dsa_slave_register_notifier(void)
{
- return register_netdevice_notifier(&dsa_slave_nb);
+ int err;
+
+ err = register_netdevice_notifier(&dsa_slave_nb);
+ if (err)
+ return err;
+
+ err = register_switchdev_notifier(&dsa_slave_switchdev_notifier);
+ if (err)
+ goto err_switchdev_nb;
+
+ return 0;
+
+err_switchdev_nb:
+ unregister_netdevice_notifier(&dsa_slave_nb);
+ return err;
}
void dsa_slave_unregister_notifier(void)
{
int err;
+ err = unregister_switchdev_notifier(&dsa_slave_switchdev_notifier);
+ if (err)
+ pr_err("DSA: failed to unregister switchdev notifier (%d)\n", err);
+
err = unregister_netdevice_notifier(&dsa_slave_nb);
if (err)
pr_err("DSA: failed to unregister slave notifier (%d)\n", err);
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 97e2e9c8cf3f..29608d087a7c 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -83,30 +83,20 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
static int dsa_switch_fdb_add(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
- const struct switchdev_obj_port_fdb *fdb = info->fdb;
- struct switchdev_trans *trans = info->trans;
-
/* Do not care yet about other switch chips of the fabric */
if (ds->index != info->sw_index)
return 0;
- if (switchdev_trans_ph_prepare(trans)) {
- if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
- return -EOPNOTSUPP;
-
- return ds->ops->port_fdb_prepare(ds, info->port, fdb, trans);
- }
-
- ds->ops->port_fdb_add(ds, info->port, fdb, trans);
+ if (!ds->ops->port_fdb_add)
+ return -EOPNOTSUPP;
- return 0;
+ return ds->ops->port_fdb_add(ds, info->port, info->addr,
+ info->vid);
}
static int dsa_switch_fdb_del(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
- const struct switchdev_obj_port_fdb *fdb = info->fdb;
-
/* Do not care yet about other switch chips of the fabric */
if (ds->index != info->sw_index)
return 0;
@@ -114,7 +104,8 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds,
if (!ds->ops->port_fdb_del)
return -EOPNOTSUPP;
- return ds->ops->port_fdb_del(ds, info->port, fdb);
+ return ds->ops->port_fdb_del(ds, info->port, info->addr,
+ info->vid);
}
static int dsa_switch_mdb_add(struct dsa_switch *ds,
@@ -130,7 +121,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
if (ds->index == info->sw_index)
set_bit(info->port, group);
for (port = 0; port < ds->num_ports; port++)
- if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+ if (dsa_is_dsa_port(ds, port))
set_bit(port, group);
if (switchdev_trans_ph_prepare(trans)) {
@@ -142,6 +133,8 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
if (err)
return err;
}
+
+ return 0;
}
for_each_set_bit(port, group, ds->num_ports)
@@ -189,6 +182,8 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
if (err)
return err;
}
+
+ return 0;
}
for_each_set_bit(port, members, ds->num_ports)
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index c697d9815177..e6e0b7b6025c 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -59,9 +59,12 @@
#define BRCM_EG_TC_MASK 0x7
#define BRCM_EG_PID_MASK 0x1f
-static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
+ struct net_device *dev,
+ unsigned int offset)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ u16 queue = skb_get_queue_mapping(skb);
u8 *brcm_tag;
if (skb_cow_head(skb, BRCM_TAG_LEN) < 0)
@@ -69,40 +72,42 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
skb_push(skb, BRCM_TAG_LEN);
- memmove(skb->data, skb->data + BRCM_TAG_LEN, 2 * ETH_ALEN);
+ if (offset)
+ memmove(skb->data, skb->data + BRCM_TAG_LEN, offset);
- /* Build the tag after the MAC Source Address */
- brcm_tag = skb->data + 2 * ETH_ALEN;
+ brcm_tag = skb->data + offset;
/* Set the ingress opcode, traffic class, tag enforcment is
* deprecated
*/
brcm_tag[0] = (1 << BRCM_OPCODE_SHIFT) |
- ((skb->priority << BRCM_IG_TC_SHIFT) & BRCM_IG_TC_MASK);
+ ((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT);
brcm_tag[1] = 0;
brcm_tag[2] = 0;
- if (p->dp->index == 8)
+ if (dp->index == 8)
brcm_tag[2] = BRCM_IG_DSTMAP2_MASK;
- brcm_tag[3] = (1 << p->dp->index) & BRCM_IG_DSTMAP1_MASK;
+ brcm_tag[3] = (1 << dp->index) & BRCM_IG_DSTMAP1_MASK;
+
+ /* Now tell the master network device about the desired output queue
+ * as well
+ */
+ skb_set_queue_mapping(skb, BRCM_TAG_SET_PORT_QUEUE(dp->index, queue));
return skb;
}
-static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt,
- struct net_device *orig_dev)
+static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
+ struct net_device *dev,
+ struct packet_type *pt,
+ unsigned int offset)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
int source_port;
u8 *brcm_tag;
if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN)))
return NULL;
- /* skb->data points to the EtherType, the tag is right before it */
- brcm_tag = skb->data - 2;
+ brcm_tag = skb->data - offset;
/* The opcode should never be different than 0b000 */
if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK))
@@ -117,24 +122,67 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
/* Locate which port this is coming from */
source_port = brcm_tag[3] & BRCM_EG_PID_MASK;
- /* Validate port against switch setup, either the port is totally */
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+ skb->dev = dsa_master_find_slave(dev, 0, source_port);
+ if (!skb->dev)
return NULL;
/* Remove Broadcom tag and update checksum */
skb_pull_rcsum(skb, BRCM_TAG_LEN);
+ return skb;
+}
+
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ /* Build the tag after the MAC Source Address */
+ return brcm_tag_xmit_ll(skb, dev, 2 * ETH_ALEN);
+}
+
+
+static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt)
+{
+ struct sk_buff *nskb;
+
+ /* skb->data points to the EtherType, the tag is right before it */
+ nskb = brcm_tag_rcv_ll(skb, dev, pt, 2);
+ if (!nskb)
+ return nskb;
+
/* Move the Ethernet DA and SA */
- memmove(skb->data - ETH_HLEN,
- skb->data - ETH_HLEN - BRCM_TAG_LEN,
+ memmove(nskb->data - ETH_HLEN,
+ nskb->data - ETH_HLEN - BRCM_TAG_LEN,
2 * ETH_ALEN);
- skb->dev = ds->ports[source_port].netdev;
-
- return skb;
+ return nskb;
}
const struct dsa_device_ops brcm_netdev_ops = {
.xmit = brcm_tag_xmit,
.rcv = brcm_tag_rcv,
};
+#endif
+
+#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
+static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ /* tag is prepended to the packet */
+ return brcm_tag_xmit_ll(skb, dev, 0);
+}
+
+static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
+ struct net_device *dev,
+ struct packet_type *pt)
+{
+ /* tag is prepended to the packet */
+ return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
+}
+
+const struct dsa_device_ops brcm_prepend_netdev_ops = {
+ .xmit = brcm_tag_xmit_prepend,
+ .rcv = brcm_tag_rcv_prepend,
+};
+#endif
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 12867a4b458f..cd13cfc542ce 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -18,7 +18,7 @@
static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *dsa_header;
/*
@@ -34,8 +34,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
* Construct tagged FROM_CPU DSA tag from 802.1q tag.
*/
dsa_header = skb->data + 2 * ETH_ALEN;
- dsa_header[0] = 0x60 | p->dp->ds->index;
- dsa_header[1] = p->dp->index << 3;
+ dsa_header[0] = 0x60 | dp->ds->index;
+ dsa_header[1] = dp->index << 3;
/*
* Move CFI field from byte 2 to byte 1.
@@ -55,8 +55,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
* Construct untagged FROM_CPU DSA tag.
*/
dsa_header = skb->data + 2 * ETH_ALEN;
- dsa_header[0] = 0x40 | p->dp->ds->index;
- dsa_header[1] = p->dp->index << 3;
+ dsa_header[0] = 0x40 | dp->ds->index;
+ dsa_header[1] = dp->index << 3;
dsa_header[2] = 0x00;
dsa_header[3] = 0x00;
}
@@ -65,11 +65,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
}
static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt,
- struct net_device *orig_dev)
+ struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds;
u8 *dsa_header;
int source_device;
int source_port;
@@ -94,18 +91,8 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
source_device = dsa_header[0] & 0x1f;
source_port = (dsa_header[1] >> 3) & 0x1f;
- /*
- * Check that the source device exists and that the source
- * port is a registered DSA port.
- */
- if (source_device >= DSA_MAX_SWITCHES)
- return NULL;
-
- ds = dst->ds[source_device];
- if (!ds)
- return NULL;
-
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+ skb->dev = dsa_master_find_slave(dev, source_device, source_port);
+ if (!skb->dev)
return NULL;
/*
@@ -154,7 +141,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
- skb->dev = ds->ports[source_port].netdev;
+ skb->offload_fwd_mark = 1;
return skb;
}
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 67a9d26f9075..4083326b806e 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -19,7 +19,7 @@
static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *edsa_header;
/*
@@ -43,8 +43,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
edsa_header[1] = ETH_P_EDSA & 0xff;
edsa_header[2] = 0x00;
edsa_header[3] = 0x00;
- edsa_header[4] = 0x60 | p->dp->ds->index;
- edsa_header[5] = p->dp->index << 3;
+ edsa_header[4] = 0x60 | dp->ds->index;
+ edsa_header[5] = dp->index << 3;
/*
* Move CFI field from byte 6 to byte 5.
@@ -68,8 +68,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
edsa_header[1] = ETH_P_EDSA & 0xff;
edsa_header[2] = 0x00;
edsa_header[3] = 0x00;
- edsa_header[4] = 0x40 | p->dp->ds->index;
- edsa_header[5] = p->dp->index << 3;
+ edsa_header[4] = 0x40 | dp->ds->index;
+ edsa_header[5] = dp->index << 3;
edsa_header[6] = 0x00;
edsa_header[7] = 0x00;
}
@@ -78,11 +78,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
}
static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt,
- struct net_device *orig_dev)
+ struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds;
u8 *edsa_header;
int source_device;
int source_port;
@@ -107,18 +104,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
source_device = edsa_header[0] & 0x1f;
source_port = (edsa_header[1] >> 3) & 0x1f;
- /*
- * Check that the source device exists and that the source
- * port is a registered DSA port.
- */
- if (source_device >= DSA_MAX_SWITCHES)
- return NULL;
-
- ds = dst->ds[source_device];
- if (!ds)
- return NULL;
-
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+ skb->dev = dsa_master_find_slave(dev, source_device, source_port);
+ if (!skb->dev)
return NULL;
/*
@@ -173,7 +160,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
- skb->dev = ds->ports[source_port].netdev;
+ skb->offload_fwd_mark = 1;
return skb;
}
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index de66ca8e6201..0f62effad88f 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -34,7 +34,7 @@
static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct sk_buff *nskb;
int padlen;
u8 *tag;
@@ -42,7 +42,8 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len;
if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) {
- if (skb_put_padto(skb, skb->len + padlen))
+ /* Let dsa_slave_xmit() free skb */
+ if (__skb_put_padto(skb, skb->len + padlen, false))
return NULL;
nskb = skb;
@@ -60,41 +61,38 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
skb_transport_header(skb) - skb->head);
skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len));
- if (skb_put_padto(nskb, nskb->len + padlen)) {
- kfree_skb(nskb);
+ /* Let skb_put_padto() free nskb, and let dsa_slave_xmit() free
+ * skb
+ */
+ if (skb_put_padto(nskb, nskb->len + padlen))
return NULL;
- }
- kfree_skb(skb);
+ consume_skb(skb);
}
tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
tag[0] = 0;
- tag[1] = 1 << p->dp->index; /* destination port */
+ tag[1] = 1 << dp->index; /* destination port */
return nskb;
}
static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt,
- struct net_device *orig_dev)
+ struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
u8 *tag;
int source_port;
tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
source_port = tag[0] & 7;
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+
+ skb->dev = dsa_master_find_slave(dev, 0, source_port);
+ if (!skb->dev)
return NULL;
pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN);
- skb->dev = ds->ports[source_port].netdev;
-
return skb;
}
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 247774d149f9..548c00254c07 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -11,6 +11,7 @@
* GNU General Public License for more details.
*
*/
+#include <linux/dsa/lan9303.h>
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
@@ -39,11 +40,30 @@
*/
#define LAN9303_TAG_LEN 4
-#define LAN9303_MAX_PORTS 3
+# define LAN9303_TAG_TX_USE_ALR BIT(3)
+# define LAN9303_TAG_TX_STP_OVERRIDE BIT(4)
+# define LAN9303_TAG_RX_IGMP BIT(3)
+# define LAN9303_TAG_RX_STP BIT(4)
+# define LAN9303_TAG_RX_TRAPPED_TO_CPU (LAN9303_TAG_RX_IGMP | \
+ LAN9303_TAG_RX_STP)
+
+/* Decide whether to transmit using ALR lookup, or transmit directly to
+ * port using tag. ALR learning is performed only when using ALR lookup.
+ * If the two external ports are bridged and the frame is unicast,
+ * then use ALR lookup to allow ALR learning on CPU port.
+ * Otherwise transmit directly to port with STP state override.
+ * See also: lan9303_separate_ports() and lan9303.pdf 6.4.10.1
+ */
+static int lan9303_xmit_use_arl(struct dsa_port *dp, u8 *dest_addr)
+{
+ struct lan9303 *chip = dp->ds->priv;
+
+ return chip->is_bridged && !is_multicast_ether_addr(dest_addr);
+}
static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u16 *lan9303_tag;
/* insert a special VLAN tag between the MAC addresses
@@ -63,26 +83,21 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN);
lan9303_tag[0] = htons(ETH_P_8021Q);
- lan9303_tag[1] = htons(p->dp->index | BIT(4));
+ lan9303_tag[1] = lan9303_xmit_use_arl(dp, skb->data) ?
+ LAN9303_TAG_TX_USE_ALR :
+ dp->index | LAN9303_TAG_TX_STP_OVERRIDE;
+ lan9303_tag[1] = htons(lan9303_tag[1]);
return skb;
}
static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+ struct packet_type *pt)
{
u16 *lan9303_tag;
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds;
+ u16 lan9303_tag1;
unsigned int source_port;
- ds = dst->ds[0];
-
- if (unlikely(!ds)) {
- dev_warn_ratelimited(&dev->dev, "Dropping packet, due to missing DSA switch device\n");
- return NULL;
- }
-
if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) {
dev_warn_ratelimited(&dev->dev,
"Dropping packet, cannot pull\n");
@@ -102,27 +117,22 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
return NULL;
}
- source_port = ntohs(lan9303_tag[1]) & 0x3;
+ lan9303_tag1 = ntohs(lan9303_tag[1]);
+ source_port = lan9303_tag1 & 0x3;
- if (source_port >= LAN9303_MAX_PORTS) {
+ skb->dev = dsa_master_find_slave(dev, 0, source_port);
+ if (!skb->dev) {
dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n");
return NULL;
}
- if (!ds->ports[source_port].netdev) {
- dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid netdev or device\n");
- return NULL;
- }
-
/* remove the special VLAN tag between the MAC addresses
* and the current ethertype field.
*/
skb_pull_rcsum(skb, 2 + 2);
memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN),
2 * ETH_ALEN);
-
- /* forward the packet to the dedicated interface */
- skb->dev = ds->ports[source_port].netdev;
+ skb->offload_fwd_mark = !(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU);
return skb;
}
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 2f32b7ea3365..8475434af7d5 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -23,7 +23,7 @@
static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *mtk_tag;
if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
@@ -36,7 +36,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
/* Build the tag after the MAC Source Address */
mtk_tag = skb->data + 2 * ETH_ALEN;
mtk_tag[0] = 0;
- mtk_tag[1] = (1 << p->dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
+ mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
mtk_tag[2] = 0;
mtk_tag[3] = 0;
@@ -44,11 +44,8 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
}
static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt,
- struct net_device *orig_dev)
+ struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds;
int port;
__be16 *phdr, hdr;
@@ -69,25 +66,27 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
skb->data - ETH_HLEN - MTK_HDR_LEN,
2 * ETH_ALEN);
- /* This protocol doesn't support cascading multiple
- * switches so it's safe to assume the switch is first
- * in the tree.
- */
- ds = dst->ds[0];
- if (!ds)
- return NULL;
-
/* Get source port information */
port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
- if (!ds->ports[port].netdev)
- return NULL;
- skb->dev = ds->ports[port].netdev;
+ skb->dev = dsa_master_find_slave(dev, 0, port);
+ if (!skb->dev)
+ return NULL;
return skb;
}
+static int mtk_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto,
+ int *offset)
+{
+ *offset = 4;
+ *proto = ((__be16 *)skb->data)[1];
+
+ return 0;
+}
+
const struct dsa_device_ops mtk_netdev_ops = {
- .xmit = mtk_tag_xmit,
- .rcv = mtk_tag_rcv,
+ .xmit = mtk_tag_xmit,
+ .rcv = mtk_tag_rcv,
+ .flow_dissect = mtk_tag_flow_dissect,
};
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 1867a3d11f28..613f4ee97771 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -38,7 +38,7 @@
static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u16 *phdr, hdr;
dev->stats.tx_packets++;
@@ -54,8 +54,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
/* Set the version field, and set destination port information */
hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
- QCA_HDR_XMIT_FROM_CPU |
- BIT(p->dp->index);
+ QCA_HDR_XMIT_FROM_CPU | BIT(dp->index);
*phdr = htons(hdr);
@@ -63,12 +62,8 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
}
static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt,
- struct net_device *orig_dev)
+ struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds;
u8 ver;
int port;
__be16 *phdr, hdr;
@@ -93,20 +88,12 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN,
ETH_HLEN - QCA_HDR_LEN);
- /* This protocol doesn't support cascading multiple switches so it's
- * safe to assume the switch is first in the tree
- */
- ds = cpu_dp->ds;
- if (!ds)
- return NULL;
-
/* Get source port information */
port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
- if (!ds->ports[port].netdev)
- return NULL;
- /* Update skb & forward the frame accordingly */
- skb->dev = ds->ports[port].netdev;
+ skb->dev = dsa_master_find_slave(dev, 0, port);
+ if (!skb->dev)
+ return NULL;
return skb;
}
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index b09e56214005..7d20e1f3de28 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -16,7 +16,7 @@
static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct sk_buff *nskb;
int padlen;
u8 *trailer;
@@ -40,7 +40,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
skb_set_network_header(nskb, skb_network_header(skb) - skb->head);
skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head);
skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len));
- kfree_skb(skb);
+ consume_skb(skb);
if (padlen) {
skb_put_zero(nskb, padlen);
@@ -48,7 +48,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
trailer = skb_put(nskb, 4);
trailer[0] = 0x80;
- trailer[1] = 1 << p->dp->index;
+ trailer[1] = 1 << dp->index;
trailer[2] = 0x10;
trailer[3] = 0x00;
@@ -56,12 +56,8 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
}
static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt,
- struct net_device *orig_dev)
+ struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
u8 *trailer;
int source_port;
@@ -74,13 +70,13 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
return NULL;
source_port = trailer[1] & 7;
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+
+ skb->dev = dsa_master_find_slave(dev, 0, source_port);
+ if (!skb->dev)
return NULL;
pskb_trim_rcsum(skb, skb->len - 4);
- skb->dev = ds->ports[source_port].netdev;
-
return skb;
}
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 4e7bdb213cd0..b8cd43c9ed5b 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -314,7 +314,8 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
ether_addr_copy(hsr_sp->MacAddressA, master->dev->dev_addr);
- skb_put_padto(skb, ETH_ZLEN + HSR_HLEN);
+ if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN))
+ return;
hsr_forward_skb(skb, master);
return;
@@ -327,12 +328,12 @@ out:
/* Announce (supervision frame) timer function
*/
-static void hsr_announce(unsigned long data)
+static void hsr_announce(struct timer_list *t)
{
struct hsr_priv *hsr;
struct hsr_port *master;
- hsr = (struct hsr_priv *) data;
+ hsr = from_timer(hsr, t, announce_timer);
rcu_read_lock();
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
@@ -462,9 +463,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
hsr->sequence_nr = HSR_SEQNR_START;
hsr->sup_sequence_nr = HSR_SUP_SEQNR_START;
- setup_timer(&hsr->announce_timer, hsr_announce, (unsigned long)hsr);
-
- setup_timer(&hsr->prune_timer, hsr_prune_nodes, (unsigned long)hsr);
+ timer_setup(&hsr->announce_timer, hsr_announce, 0);
+ timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0);
ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr);
hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 284a9b820df8..286ceb41ac0c 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -365,16 +365,14 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr,
/* Remove stale sequence_nr records. Called by timer every
* HSR_LIFE_CHECK_INTERVAL (two seconds or so).
*/
-void hsr_prune_nodes(unsigned long data)
+void hsr_prune_nodes(struct timer_list *t)
{
- struct hsr_priv *hsr;
+ struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
struct hsr_node *node;
struct hsr_port *port;
unsigned long timestamp;
unsigned long time_a, time_b;
- hsr = (struct hsr_priv *) data;
-
rcu_read_lock();
list_for_each_entry_rcu(node, &hsr->node_db, mac_list) {
/* Shorthand */
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 4e04f0e868e9..370b45998121 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -33,7 +33,7 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
u16 sequence_nr);
-void hsr_prune_nodes(unsigned long data);
+void hsr_prune_nodes(struct timer_list *t);
int hsr_create_self_node(struct list_head *self_node_db,
unsigned char addr_a[ETH_ALEN],
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index ac7c96b73ad5..d8de3bcfb103 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __IEEE802154_6LOWPAN_I_H__
#define __IEEE802154_6LOWPAN_I_H__
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index de2661cd0328..974765b7d92a 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -54,7 +54,7 @@
static int open_count;
-static struct header_ops lowpan_header_ops = {
+static const struct header_ops lowpan_header_ops = {
.create = lowpan_header_create,
};
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 30d875dff6b5..85bf86ad6b18 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -80,12 +80,13 @@ static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
fq->daddr = *arg->dst;
}
-static void lowpan_frag_expire(unsigned long data)
+static void lowpan_frag_expire(struct timer_list *t)
{
+ struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
struct net *net;
- fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags);
spin_lock(&fq->q.lock);
@@ -580,19 +581,14 @@ static int __net_init lowpan_frags_init_net(struct net *net)
{
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
- int res;
ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
- res = inet_frags_init_net(&ieee802154_lowpan->frags);
- if (res)
- return res;
- res = lowpan_frags_ns_sysctl_register(net);
- if (res)
- inet_frags_uninit_net(&ieee802154_lowpan->frags);
- return res;
+ inet_frags_init_net(&ieee802154_lowpan->frags);
+
+ return lowpan_frags_ns_sysctl_register(net);
}
static void __net_exit lowpan_frags_exit_net(struct net *net)
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 9b92ade687a3..f05b7bdae2aa 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_IEEE802154) += ieee802154.o
obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o
obj-y += 6lowpan/
diff --git a/net/ieee802154/core.h b/net/ieee802154/core.h
index 81141f58d079..1c19f575d574 100644
--- a/net/ieee802154/core.h
+++ b/net/ieee802154/core.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __IEEE802154_CORE_H
#define __IEEE802154_CORE_H
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 6bde9e5a5503..96636e3b7aa9 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -89,7 +89,7 @@ int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info)
return genlmsg_reply(msg, info);
}
-static const struct genl_ops ieee8021154_ops[] = {
+static const struct genl_ops ieee802154_ops[] = {
/* see nl-phy.c */
IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy,
ieee802154_dump_phy),
@@ -137,8 +137,8 @@ struct genl_family nl802154_family __ro_after_init = {
.version = 1,
.maxattr = IEEE802154_ATTR_MAX,
.module = THIS_MODULE,
- .ops = ieee8021154_ops,
- .n_ops = ARRAY_SIZE(ieee8021154_ops),
+ .ops = ieee802154_ops,
+ .n_ops = ARRAY_SIZE(ieee802154_ops),
.mcgrps = ieee802154_mcgrps,
.n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps),
};
diff --git a/net/ieee802154/nl802154.h b/net/ieee802154/nl802154.h
index 3846a89d0958..8c4b6d08954c 100644
--- a/net/ieee802154/nl802154.h
+++ b/net/ieee802154/nl802154.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __IEEE802154_NL802154_H
#define __IEEE802154_NL802154_H
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 4441c63b3ea6..598f5af49775 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __CFG802154_RDEV_OPS
#define __CFG802154_RDEV_OPS
diff --git a/net/ieee802154/sysfs.h b/net/ieee802154/sysfs.h
index aa42e39ecbec..337545b639e9 100644
--- a/net/ieee802154/sysfs.h
+++ b/net/ieee802154/sysfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __IEEE802154_SYSFS_H
#define __IEEE802154_SYSFS_H
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index 9a471e41ec73..19c2e5d60e76 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Based on net/wireless/trace.h */
#undef TRACE_SYSTEM
diff --git a/net/ife/ife.c b/net/ife/ife.c
index f360341c72eb..7d1ec76e7f43 100644
--- a/net/ife/ife.c
+++ b/net/ife/ife.c
@@ -137,6 +137,6 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval)
EXPORT_SYMBOL_GPL(ife_tlv_meta_encode);
MODULE_AUTHOR("Jamal Hadi Salim <jhs@mojatatu.com>");
-MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>");
+MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
MODULE_DESCRIPTION("Inter-FE LFB action");
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 91a2557942fa..f48fe6fc7e8c 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -70,11 +70,9 @@ config IP_MULTIPLE_TABLES
address into account. Furthermore, the TOS (Type-Of-Service) field
of the packet can be used for routing decisions as well.
- If you are interested in this, please see the preliminary
- documentation at <http://www.compendium.com.ar/policy-routing.txt>
- and <ftp://post.tepkom.ru/pub/vol2/Linux/docs/advanced-routing.tex>.
- You will need supporting software from
- <ftp://ftp.tux.org/pub/net/ip-routing/>.
+ If you need more information, see the Linux Advanced
+ Routing and Traffic Control documentation at
+ <http://lartc.org/howto/lartc.rpdb.html>
If unsure, say N.
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index afcb435adfbe..c6c8ad1d4b6d 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux TCP/IP (INET) layer.
#
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 2e548eca3489..ce4aa827be05 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
- int err;
+ int err, tcp_fastopen;
lock_sock(sk);
@@ -217,11 +217,12 @@ int inet_listen(struct socket *sock, int backlog)
* because the socket was in TCP_LISTEN state previously but
* was shutdown() rather than close().
*/
- if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
- (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+ tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+ (tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
- tcp_fastopen_init_key_once(true);
+ tcp_fastopen_init_key_once(sock_net(sk));
}
err = inet_csk_listen_start(sk, backlog);
@@ -826,6 +827,7 @@ int inet_shutdown(struct socket *sock, int how)
err = -ENOTCONN;
/* Hack to wake up other listeners, who can poll for
POLLHUP, even on eg. unconnected UDP sockets -- RR */
+ /* fall through */
default:
sk->sk_shutdown |= how;
if (sk->sk_prot->shutdown)
@@ -839,7 +841,7 @@ int inet_shutdown(struct socket *sock, int how)
case TCP_LISTEN:
if (!(how & RCV_SHUTDOWN))
break;
- /* Fall through */
+ /* fall through */
case TCP_SYN_SENT:
err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
@@ -944,6 +946,8 @@ const struct proto_ops inet_stream_ops = {
.sendpage = inet_sendpage,
.splice_read = tcp_splice_read,
.read_sock = tcp_read_sock,
+ .sendmsg_locked = tcp_sendmsg_locked,
+ .sendpage_locked = tcp_sendpage_locked,
.peek_len = tcp_peek_len,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
@@ -1219,10 +1223,9 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
struct sk_buff *inet_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
- bool udpfrag = false, fixedid = false, gso_partial, encap;
+ bool fixedid = false, gso_partial, encap;
struct sk_buff *segs = ERR_PTR(-EINVAL);
const struct net_offload *ops;
- unsigned int offset = 0;
struct iphdr *iph;
int proto, tot_len;
int nhoff;
@@ -1257,7 +1260,6 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
segs = ERR_PTR(-EPROTONOSUPPORT);
if (!skb->encapsulation || encap) {
- udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
/* fixed ID is invalid if DF bit is not set */
@@ -1277,13 +1279,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
skb = segs;
do {
iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
- if (udpfrag) {
- iph->frag_off = htons(offset >> 3);
- if (skb->next)
- iph->frag_off |= htons(IP_MF);
- offset += skb->len - nhoff - ihl;
- tot_len = skb->len - nhoff;
- } else if (skb_is_gso(skb)) {
+ if (skb_is_gso(skb)) {
if (!fixedid) {
iph->id = htons(id);
id += skb_shinfo(skb)->gso_segs;
@@ -1602,6 +1598,9 @@ static const struct net_protocol igmp_protocol = {
};
#endif
+/* thinking of making this const? Don't.
+ * early_demux can change based on sysctl.
+ */
static struct net_protocol tcp_protocol = {
.early_demux = tcp_v4_early_demux,
.early_demux_handler = tcp_v4_early_demux,
@@ -1612,6 +1611,9 @@ static struct net_protocol tcp_protocol = {
.icmp_strict_tag_validation = 1,
};
+/* thinking of making this const? Don't.
+ * early_demux can change based on sysctl.
+ */
static struct net_protocol udp_protocol = {
.early_demux = udp_v4_early_demux,
.early_demux_handler = udp_v4_early_demux,
@@ -1778,6 +1780,11 @@ static const struct net_offload ipip_offload = {
},
};
+static int __init ipip_offload_init(void)
+{
+ return inet_add_offload(&ipip_offload, IPPROTO_IPIP);
+}
+
static int __init ipv4_offload_init(void)
{
/*
@@ -1787,9 +1794,10 @@ static int __init ipv4_offload_init(void)
pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
if (tcpv4_offload_init() < 0)
pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
+ if (ipip_offload_init() < 0)
+ pr_crit("%s: Cannot add IPIP protocol offload\n", __func__);
dev_add_offload(&ip_packet_offload);
- inet_add_offload(&ipip_offload, IPPROTO_IPIP);
return 0;
}
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 37db44f60718..4dd95cdd8070 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -240,7 +240,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
if (err == -EINPROGRESS)
goto out;
- if (err == -EBUSY)
+ if (err == -ENOSPC)
err = NET_XMIT_DROP;
goto out_free;
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 8b52179ddc6e..a8d7c5a9fb05 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -171,7 +171,7 @@ struct neigh_table arp_tbl = {
[NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
- [NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
+ [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
[NEIGH_VAR_PROXY_QLEN] = 64,
[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
@@ -1180,6 +1180,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCSARP:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
+ /* fall through */
case SIOCGARP:
err = copy_from_user(&r, arg, sizeof(struct arpreq));
if (err)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2ae8f54cb321..82178cc69c96 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1951,7 +1951,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
buf = NULL;
req_inet = inet_rsk(req);
- opt = xchg(&req_inet->opt, opt);
+ opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt);
if (opt)
kfree_rcu(opt, rcu);
@@ -1973,11 +1973,13 @@ req_setattr_failure:
* values on failure.
*
*/
-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
+static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
{
+ struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1);
int hdr_delta = 0;
- struct ip_options_rcu *opt = *opt_ptr;
+ if (!opt || opt->opt.cipso == 0)
+ return 0;
if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
u8 cipso_len;
u8 cipso_off;
@@ -2039,14 +2041,10 @@ static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
*/
void cipso_v4_sock_delattr(struct sock *sk)
{
- int hdr_delta;
- struct ip_options_rcu *opt;
struct inet_sock *sk_inet;
+ int hdr_delta;
sk_inet = inet_sk(sk);
- opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
- if (!opt || opt->opt.cipso == 0)
- return;
hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
if (sk_inet->is_icsk && hdr_delta > 0) {
@@ -2066,15 +2064,7 @@ void cipso_v4_sock_delattr(struct sock *sk)
*/
void cipso_v4_req_delattr(struct request_sock *req)
{
- struct ip_options_rcu *opt;
- struct inet_request_sock *req_inet;
-
- req_inet = inet_rsk(req);
- opt = req_inet->opt;
- if (!opt || opt->opt.cipso == 0)
- return;
-
- cipso_v4_delopt(&req_inet->opt);
+ cipso_v4_delopt(&inet_rsk(req)->ireq_opt);
}
/**
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 38d9af9b917c..a4573bccd6da 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -137,22 +137,12 @@ static void inet_hash_remove(struct in_ifaddr *ifa)
*/
struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
{
- u32 hash = inet_addr_hash(net, addr);
struct net_device *result = NULL;
struct in_ifaddr *ifa;
rcu_read_lock();
- hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
- if (ifa->ifa_local == addr) {
- struct net_device *dev = ifa->ifa_dev->dev;
-
- if (!net_eq(dev_net(dev), net))
- continue;
- result = dev;
- break;
- }
- }
- if (!result) {
+ ifa = inet_lookup_ifaddr_rcu(net, addr);
+ if (!ifa) {
struct flowi4 fl4 = { .daddr = addr };
struct fib_result res = { 0 };
struct fib_table *local;
@@ -165,6 +155,8 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
!fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
res.type == RTN_LOCAL)
result = FIB_RES_DEV(res);
+ } else {
+ result = ifa->ifa_dev->dev;
}
if (result && devref)
dev_hold(result);
@@ -173,6 +165,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
}
EXPORT_SYMBOL(__ip_dev_find);
+/* called under RCU lock */
+struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
+{
+ u32 hash = inet_addr_hash(net, addr);
+ struct in_ifaddr *ifa;
+
+ hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
+ if (ifa->ifa_local == addr &&
+ net_eq(dev_net(ifa->ifa_dev->dev), net))
+ return ifa;
+
+ return NULL;
+}
+
static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
@@ -438,7 +444,7 @@ static void check_lifetime(struct work_struct *work);
static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
- u32 portid)
+ u32 portid, struct netlink_ext_ack *extack)
{
struct in_device *in_dev = ifa->ifa_dev;
struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -483,6 +489,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
*/
ivi.ivi_addr = ifa->ifa_address;
ivi.ivi_dev = ifa->ifa_dev;
+ ivi.extack = extack;
ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
NETDEV_UP, &ivi);
ret = notifier_to_errno(ret);
@@ -515,7 +522,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
static int inet_insert_ifa(struct in_ifaddr *ifa)
{
- return __inet_insert_ifa(ifa, NULL, 0);
+ return __inet_insert_ifa(ifa, NULL, 0, NULL);
}
static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
@@ -896,7 +903,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
return ret;
}
}
- return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
+ return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
+ extack);
} else {
inet_free_ifa(ifa);
@@ -1516,6 +1524,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
if (inetdev_valid_mtu(dev->mtu))
break;
/* disable IP when MTU is not enough */
+ /* fall through */
case NETDEV_UNREGISTER:
inetdev_destroy(in_dev);
break;
@@ -1751,7 +1760,7 @@ static int inet_validate_link_af(const struct net_device *dev,
struct nlattr *a, *tb[IFLA_INET_MAX+1];
int err, rem;
- if (dev && !__in_dev_get_rtnl(dev))
+ if (dev && !__in_dev_get_rcu(dev))
return -EAFNOSUPPORT;
err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
@@ -1775,7 +1784,7 @@ static int inet_validate_link_af(const struct net_device *dev,
static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
{
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
struct nlattr *a, *tb[IFLA_INET_MAX+1];
int rem;
@@ -2491,9 +2500,9 @@ void __init devinet_init(void)
rtnl_af_register(&inet_af_ops);
- rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
- rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
- rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
+ rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
+ rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
+ rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
- inet_netconf_dump_devconf, NULL);
+ inet_netconf_dump_devconf, 0);
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 0cbee0a666ff..d57aa64fa7c7 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -258,7 +258,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
esp_output_udp_encap(x, skb, esp);
if (!skb_cloned(skb)) {
- if (tailen <= skb_availroom(skb)) {
+ if (tailen <= skb_tailroom(skb)) {
nfrags = 1;
trailer = skb;
tail = skb_tail_pointer(trailer);
@@ -292,8 +292,6 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
kunmap_atomic(vaddr);
- spin_unlock_bh(&x->lock);
-
nfrags = skb_shinfo(skb)->nr_frags;
__skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@ -301,6 +299,9 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
skb_shinfo(skb)->nr_frags = ++nfrags;
pfrag->offset = pfrag->offset + allocsize;
+
+ spin_unlock_bh(&x->lock);
+
nfrags++;
skb->len += tailen;
@@ -381,7 +382,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
(unsigned char *)esph - skb->data,
assoclen + ivlen + esp->clen + alen);
if (unlikely(err < 0))
- goto error;
+ goto error_free;
if (!esp->inplace) {
int allocsize;
@@ -392,7 +393,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
spin_lock_bh(&x->lock);
if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
spin_unlock_bh(&x->lock);
- goto error;
+ goto error_free;
}
skb_shinfo(skb)->nr_frags = 1;
@@ -409,7 +410,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
(unsigned char *)esph - skb->data,
assoclen + ivlen + esp->clen + alen);
if (unlikely(err < 0))
- goto error;
+ goto error_free;
}
if ((x->props.flags & XFRM_STATE_ESN))
@@ -431,7 +432,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
case -EINPROGRESS:
goto error;
- case -EBUSY:
+ case -ENOSPC:
err = NET_XMIT_DROP;
break;
@@ -442,8 +443,9 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
if (sg != dsg)
esp_ssg_unref(x, tmp);
- kfree(tmp);
+error_free:
+ kfree(tmp);
error:
return err;
}
@@ -499,18 +501,59 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
return esp_output_tail(x, skb, &esp);
}
+static inline int esp_remove_trailer(struct sk_buff *skb)
+{
+ struct xfrm_state *x = xfrm_input_state(skb);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct crypto_aead *aead = x->data;
+ int alen, hlen, elen;
+ int padlen, trimlen;
+ __wsum csumdiff;
+ u8 nexthdr[2];
+ int ret;
+
+ alen = crypto_aead_authsize(aead);
+ hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+ elen = skb->len - hlen;
+
+ if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
+ ret = xo->proto;
+ goto out;
+ }
+
+ if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
+ BUG();
+
+ ret = -EINVAL;
+ padlen = nexthdr[0];
+ if (padlen + 2 + alen >= elen) {
+ net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n",
+ padlen + 2, elen - alen);
+ goto out;
+ }
+
+ trimlen = alen + padlen + 2;
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0);
+ skb->csum = csum_block_sub(skb->csum, csumdiff,
+ skb->len - trimlen);
+ }
+ pskb_trim(skb, skb->len - trimlen);
+
+ ret = nexthdr[1];
+
+out:
+ return ret;
+}
+
int esp_input_done2(struct sk_buff *skb, int err)
{
const struct iphdr *iph;
struct xfrm_state *x = xfrm_input_state(skb);
struct xfrm_offload *xo = xfrm_offload(skb);
struct crypto_aead *aead = x->data;
- int alen = crypto_aead_authsize(aead);
int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
- int elen = skb->len - hlen;
int ihl;
- u8 nexthdr[2];
- int padlen;
if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
kfree(ESP_SKB_CB(skb)->tmp);
@@ -518,16 +561,10 @@ int esp_input_done2(struct sk_buff *skb, int err)
if (unlikely(err))
goto out;
- if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
- BUG();
-
- err = -EINVAL;
- padlen = nexthdr[0];
- if (padlen + 2 + alen >= elen)
+ err = esp_remove_trailer(skb);
+ if (unlikely(err < 0))
goto out;
- /* ... check padding bits here. Silly. :-) */
-
iph = ip_hdr(skb);
ihl = iph->ihl * 4;
@@ -568,15 +605,12 @@ int esp_input_done2(struct sk_buff *skb, int err)
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- pskb_trim(skb, skb->len - alen - padlen - 2);
- __skb_pull(skb, hlen);
+ skb_pull_rcsum(skb, hlen);
if (x->props.mode == XFRM_MODE_TUNNEL)
skb_reset_transport_header(skb);
else
skb_set_transport_header(skb, -ihl);
- err = nexthdr[1];
-
/* RFC4303: Drop dummy packets without any error */
if (err == IPPROTO_NONE)
err = -EINVAL;
@@ -695,8 +729,10 @@ skip_cow:
sg_init_table(sg, nfrags);
err = skb_to_sgvec(skb, sg, 0, skb->len);
- if (unlikely(err < 0))
+ if (unlikely(err < 0)) {
+ kfree(tmp);
goto out;
+ }
skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index e0666016a764..f8b918c766b0 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -182,11 +182,13 @@ out:
static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb)
{
struct crypto_aead *aead = x->data;
+ struct xfrm_offload *xo = xfrm_offload(skb);
if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead)))
return -EINVAL;
- skb->ip_summed = CHECKSUM_NONE;
+ if (!(xo->flags & CRYPTO_DONE))
+ skb->ip_summed = CHECKSUM_NONE;
return esp_input_done2(skb, 0);
}
@@ -257,7 +259,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
err = esp_output_tail(x, skb, &esp);
- if (err < 0)
+ if (err)
return err;
secpath_reset(skb);
@@ -303,3 +305,4 @@ module_init(esp4_offload_init);
module_exit(esp4_offload_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
+MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET, XFRM_PROTO_ESP);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 044d2a159a3c..f52d27a422c3 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -73,6 +73,11 @@ fail:
fib_free_table(main_table);
return -ENOMEM;
}
+
+static bool fib4_has_custom_rules(struct net *net)
+{
+ return false;
+}
#else
struct fib_table *fib_new_table(struct net *net, u32 id)
@@ -128,6 +133,11 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
}
return NULL;
}
+
+static bool fib4_has_custom_rules(struct net *net)
+{
+ return net->ipv4.fib_has_custom_rules;
+}
#endif /* CONFIG_IP_MULTIPLE_TABLES */
static void fib_replace_table(struct net *net, struct fib_table *old,
@@ -345,9 +355,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (res.type != RTN_UNICAST &&
(res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
goto e_inval;
- if (!rpf && !fib_num_tclassid_users(net) &&
- (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev)))
- goto last_resort;
fib_combine_itag(itag, &res);
dev_match = false;
@@ -402,13 +409,28 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
struct in_device *idev, u32 *itag)
{
int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
+ struct net *net = dev_net(dev);
- if (!r && !fib_num_tclassid_users(dev_net(dev)) &&
- IN_DEV_ACCEPT_LOCAL(idev) &&
+ if (!r && !fib_num_tclassid_users(net) &&
(dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
+ if (IN_DEV_ACCEPT_LOCAL(idev))
+ goto ok;
+ /* with custom local routes in place, checking local addresses
+ * only will be too optimistic, with custom rules, checking
+ * local addresses only can be too strict, e.g. due to vrf
+ */
+ if (net->ipv4.fib_has_custom_local_routes ||
+ fib4_has_custom_rules(net))
+ goto full_check;
+ if (inet_lookup_ifaddr_rcu(net, src))
+ return -EINVAL;
+
+ok:
*itag = 0;
return 0;
}
+
+full_check:
return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
}
@@ -759,6 +781,8 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
}
err = fib_table_insert(net, tb, &cfg, extack);
+ if (!err && cfg.fc_type == RTN_LOCAL)
+ net->ipv4.fib_has_custom_local_routes = true;
errout:
return err;
}
@@ -1247,22 +1271,28 @@ static int __net_init ip_fib_net_init(struct net *net)
int err;
size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
- net->ipv4.fib_seq = 0;
+ err = fib4_notifier_init(net);
+ if (err)
+ return err;
/* Avoid false sharing : Use at least a full cache line */
size = max_t(size_t, size, L1_CACHE_BYTES);
net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
- if (!net->ipv4.fib_table_hash)
- return -ENOMEM;
+ if (!net->ipv4.fib_table_hash) {
+ err = -ENOMEM;
+ goto err_table_hash_alloc;
+ }
err = fib4_rules_init(net);
if (err < 0)
- goto fail;
+ goto err_rules_init;
return 0;
-fail:
+err_rules_init:
kfree(net->ipv4.fib_table_hash);
+err_table_hash_alloc:
+ fib4_notifier_exit(net);
return err;
}
@@ -1292,6 +1322,7 @@ static void ip_fib_net_exit(struct net *net)
#endif
rtnl_unlock();
kfree(net->ipv4.fib_table_hash);
+ fib4_notifier_exit(net);
}
static int __net_init fib_net_init(struct net *net)
@@ -1341,7 +1372,7 @@ void __init ip_fib_init(void)
register_netdevice_notifier(&fib_netdev_notifier);
register_inetaddr_notifier(&fib_inetaddr_notifier);
- rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
- rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
- rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
+ rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
+ rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
+ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, 0);
}
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 769ab87ebc4b..e6ff282bb7f4 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _FIB_LOOKUP_H
#define _FIB_LOOKUP_H
@@ -32,6 +33,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
struct netlink_ext_ack *extack);
int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
struct netlink_ext_ack *extack);
+bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi);
int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi,
unsigned int);
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index e0714d975947..b804ccbdb241 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -1,86 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/rtnetlink.h>
#include <linux/notifier.h>
-#include <linux/rcupdate.h>
+#include <linux/socket.h>
#include <linux/kernel.h>
+#include <linux/export.h>
#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
#include <net/netns/ipv4.h>
#include <net/ip_fib.h>
-static ATOMIC_NOTIFIER_HEAD(fib_chain);
-
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
- enum fib_event_type event_type,
- struct fib_notifier_info *info)
+int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info)
{
- info->net = net;
- return nb->notifier_call(nb, event_type, info);
+ info->family = AF_INET;
+ return call_fib_notifier(nb, net, event_type, info);
}
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
- struct fib_notifier_info *info)
+int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info)
{
+ ASSERT_RTNL();
+
+ info->family = AF_INET;
net->ipv4.fib_seq++;
- info->net = net;
- return atomic_notifier_call_chain(&fib_chain, event_type, info);
+ return call_fib_notifiers(net, event_type, info);
}
-static unsigned int fib_seq_sum(void)
+static unsigned int fib4_seq_read(struct net *net)
{
- unsigned int fib_seq = 0;
- struct net *net;
-
- rtnl_lock();
- for_each_net(net)
- fib_seq += net->ipv4.fib_seq;
- rtnl_unlock();
+ ASSERT_RTNL();
- return fib_seq;
+ return net->ipv4.fib_seq + fib4_rules_seq_read(net);
}
-static bool fib_dump_is_consistent(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb),
- unsigned int fib_seq)
+static int fib4_dump(struct net *net, struct notifier_block *nb)
{
- atomic_notifier_chain_register(&fib_chain, nb);
- if (fib_seq == fib_seq_sum())
- return true;
- atomic_notifier_chain_unregister(&fib_chain, nb);
- if (cb)
- cb(nb);
- return false;
+ int err;
+
+ err = fib4_rules_dump(net, nb);
+ if (err)
+ return err;
+
+ fib_notify(net, nb);
+
+ return 0;
}
-#define FIB_DUMP_MAX_RETRIES 5
-int register_fib_notifier(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb))
-{
- int retries = 0;
+static const struct fib_notifier_ops fib4_notifier_ops_template = {
+ .family = AF_INET,
+ .fib_seq_read = fib4_seq_read,
+ .fib_dump = fib4_dump,
+ .owner = THIS_MODULE,
+};
- do {
- unsigned int fib_seq = fib_seq_sum();
- struct net *net;
+int __net_init fib4_notifier_init(struct net *net)
+{
+ struct fib_notifier_ops *ops;
- /* Mutex semantics guarantee that every change done to
- * FIB tries before we read the change sequence counter
- * is now visible to us.
- */
- rcu_read_lock();
- for_each_net_rcu(net) {
- fib_rules_notify(net, nb);
- fib_notify(net, nb);
- }
- rcu_read_unlock();
+ net->ipv4.fib_seq = 0;
- if (fib_dump_is_consistent(nb, cb, fib_seq))
- return 0;
- } while (++retries < FIB_DUMP_MAX_RETRIES);
+ ops = fib_notifier_ops_register(&fib4_notifier_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+ net->ipv4.notifier_ops = ops;
- return -EBUSY;
+ return 0;
}
-EXPORT_SYMBOL(register_fib_notifier);
-int unregister_fib_notifier(struct notifier_block *nb)
+void __net_exit fib4_notifier_exit(struct net *net)
{
- return atomic_notifier_chain_unregister(&fib_chain, nb);
+ fib_notifier_ops_unregister(net->ipv4.notifier_ops);
}
-EXPORT_SYMBOL(unregister_fib_notifier);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 778ecf977eb2..35d646a62ad4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -68,6 +68,16 @@ bool fib4_rule_default(const struct fib_rule *rule)
}
EXPORT_SYMBOL_GPL(fib4_rule_default);
+int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return fib_rules_dump(net, nb, AF_INET);
+}
+
+unsigned int fib4_rules_seq_read(struct net *net)
+{
+ return fib_rules_seq_read(net, AF_INET);
+}
+
int __fib_lookup(struct net *net, struct flowi4 *flp,
struct fib_result *res, unsigned int flags)
{
@@ -185,38 +195,6 @@ static struct fib_table *fib_empty_table(struct net *net)
return NULL;
}
-static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
- enum fib_event_type event_type,
- struct fib_rule *rule)
-{
- struct fib_rule_notifier_info info = {
- .rule = rule,
- };
-
- return call_fib_notifier(nb, net, event_type, &info.info);
-}
-
-static int call_fib_rule_notifiers(struct net *net,
- enum fib_event_type event_type,
- struct fib_rule *rule)
-{
- struct fib_rule_notifier_info info = {
- .rule = rule,
- };
-
- return call_fib_notifiers(net, event_type, &info.info);
-}
-
-/* Called with rcu_read_lock() */
-void fib_rules_notify(struct net *net, struct notifier_block *nb)
-{
- struct fib_rules_ops *ops = net->ipv4.rules_ops;
- struct fib_rule *rule;
-
- list_for_each_entry_rcu(rule, &ops->rules_list, list)
- call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule);
-}
-
static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
FRA_GENERIC_POLICY,
[FRA_FLOW] = { .type = NLA_U32 },
@@ -273,7 +251,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->tos = frh->tos;
net->ipv4.fib_has_custom_rules = true;
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule);
err = 0;
errout:
@@ -295,7 +272,6 @@ static int fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule);
errout:
return err;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ec3a9ce281a6..f04d944f8abe 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -44,6 +44,7 @@
#include <net/netlink.h>
#include <net/nexthop.h>
#include <net/lwtunnel.h>
+#include <net/fib_notifier.h>
#include "fib_lookup.h"
@@ -219,7 +220,7 @@ static void free_fib_info_rcu(struct rcu_head *head)
} endfor_nexthops(fi);
m = fi->fib_metrics;
- if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
+ if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
kfree(m);
kfree(fi);
}
@@ -600,17 +601,9 @@ static void fib_rebalance(struct fib_info *fi)
atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
} endfor_nexthops(fi);
}
-
-static inline void fib_add_weight(struct fib_info *fi,
- const struct fib_nh *nh)
-{
- fi->fib_weight += nh->nh_weight;
-}
-
#else /* CONFIG_IP_ROUTE_MULTIPATH */
#define fib_rebalance(fi) do { } while (0)
-#define fib_add_weight(fi, nh) do { } while (0)
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
@@ -695,6 +688,40 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
return 0;
}
+bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
+{
+ struct nlattr *nla;
+ int remaining;
+
+ if (!cfg->fc_mx)
+ return true;
+
+ nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+ int type = nla_type(nla);
+ u32 val;
+
+ if (!type)
+ continue;
+ if (type > RTAX_MAX)
+ return false;
+
+ if (type == RTAX_CC_ALGO) {
+ char tmp[TCP_CA_NAME_MAX];
+ bool ecn_ca = false;
+
+ nla_strlcpy(tmp, nla, sizeof(tmp));
+ val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
+ } else {
+ val = nla_get_u32(nla);
+ }
+
+ if (fi->fib_metrics->metrics[type - 1] != val)
+ return false;
+ }
+
+ return true;
+}
+
/*
* Picture
@@ -739,8 +766,8 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
* |
* |-> {local prefix} (terminal node)
*/
-static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
- struct fib_nh *nh, struct netlink_ext_ack *extack)
+static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
+ struct netlink_ext_ack *extack)
{
int err = 0;
struct net *net;
@@ -1003,7 +1030,7 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
char tmp[TCP_CA_NAME_MAX];
nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+ val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
if (val == TCP_CA_UNSPEC)
return -EINVAL;
} else {
@@ -1089,7 +1116,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
kfree(fi);
return ERR_PTR(err);
}
- atomic_set(&fi->fib_metrics->refcnt, 1);
+ refcount_set(&fi->fib_metrics->refcnt, 1);
} else {
fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
}
@@ -1223,7 +1250,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
int linkdown = 0;
change_nexthops(fi) {
- err = fib_check_nh(cfg, fi, nexthop_nh, extack);
+ err = fib_check_nh(cfg, nexthop_nh, extack);
if (err != 0)
goto failure;
if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
@@ -1240,7 +1267,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
change_nexthops(fi) {
fib_info_update_nh_saddr(net, nexthop_nh);
- fib_add_weight(fi, nexthop_nh);
} endfor_nexthops(fi)
fib_rebalance(fi);
@@ -1330,8 +1356,6 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
goto nla_put_failure;
if (fi->fib_nhs == 1) {
- struct in_device *in_dev;
-
if (fi->fib_nh->nh_gw &&
nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
goto nla_put_failure;
@@ -1339,11 +1363,17 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
goto nla_put_failure;
if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
- in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev);
+ struct in_device *in_dev;
+
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev);
if (in_dev &&
IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
rtm->rtm_flags |= RTNH_F_DEAD;
+ rcu_read_unlock();
}
+ if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD)
+ rtm->rtm_flags |= RTNH_F_OFFLOAD;
#ifdef CONFIG_IP_ROUTE_CLASSID
if (fi->fib_nh[0].nh_tclassid &&
nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
@@ -1363,18 +1393,20 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
goto nla_put_failure;
for_nexthops(fi) {
- struct in_device *in_dev;
-
rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
if (!rtnh)
goto nla_put_failure;
rtnh->rtnh_flags = nh->nh_flags & 0xFF;
if (nh->nh_flags & RTNH_F_LINKDOWN) {
- in_dev = __in_dev_get_rtnl(nh->nh_dev);
+ struct in_device *in_dev;
+
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(nh->nh_dev);
if (in_dev &&
IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
rtnh->rtnh_flags |= RTNH_F_DEAD;
+ rcu_read_unlock();
}
rtnh->rtnh_hops = nh->nh_weight - 1;
rtnh->rtnh_ifindex = nh->nh_oif;
@@ -1451,14 +1483,14 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
fib_nh->nh_flags & RTNH_F_LINKDOWN)
break;
- return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
- &info.info);
+ return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type,
+ &info.info);
case FIB_EVENT_NH_DEL:
if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
(fib_nh->nh_flags & RTNH_F_DEAD))
- return call_fib_notifiers(dev_net(fib_nh->nh_dev),
- event_type, &info.info);
+ return call_fib4_notifiers(dev_net(fib_nh->nh_dev),
+ event_type, &info.info);
default:
break;
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 64668c69dda6..5ddc4aefff12 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -81,39 +81,40 @@
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
+#include <net/fib_notifier.h>
#include <trace/events/fib.h>
#include "fib_lookup.h"
static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type, u32 dst,
- int dst_len, struct fib_info *fi,
- u8 tos, u8 type, u32 tb_id)
+ int dst_len, struct fib_alias *fa)
{
struct fib_entry_notifier_info info = {
.dst = dst,
.dst_len = dst_len,
- .fi = fi,
- .tos = tos,
- .type = type,
- .tb_id = tb_id,
+ .fi = fa->fa_info,
+ .tos = fa->fa_tos,
+ .type = fa->fa_type,
+ .tb_id = fa->tb_id,
};
- return call_fib_notifier(nb, net, event_type, &info.info);
+ return call_fib4_notifier(nb, net, event_type, &info.info);
}
static int call_fib_entry_notifiers(struct net *net,
enum fib_event_type event_type, u32 dst,
- int dst_len, struct fib_info *fi,
- u8 tos, u8 type, u32 tb_id)
+ int dst_len, struct fib_alias *fa,
+ struct netlink_ext_ack *extack)
{
struct fib_entry_notifier_info info = {
+ .info.extack = extack,
.dst = dst,
.dst_len = dst_len,
- .fi = fi,
- .tos = tos,
- .type = type,
- .tb_id = tb_id,
+ .fi = fa->fa_info,
+ .tos = fa->fa_tos,
+ .type = fa->fa_type,
+ .tb_id = fa->tb_id,
};
- return call_fib_notifiers(net, event_type, &info.info);
+ return call_fib4_notifiers(net, event_type, &info.info);
}
#define MAX_STAT_DEPTH 32
@@ -1215,9 +1216,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->fa_default = -1;
call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
- key, plen, fi,
- new_fa->fa_tos, cfg->fc_type,
- tb->tb_id);
+ key, plen, new_fa, extack);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, nlflags);
@@ -1272,8 +1271,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
tb->tb_num_default++;
rt_cache_flush(cfg->fc_nlinfo.nl_net);
- call_fib_entry_notifiers(net, event, key, plen, fi, tos, cfg->fc_type,
- tb->tb_id);
+ call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
&cfg->fc_nlinfo, nlflags);
succeeded:
@@ -1562,7 +1560,8 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
fi->fib_prefsrc == cfg->fc_prefsrc) &&
(!cfg->fc_protocol ||
fi->fib_protocol == cfg->fc_protocol) &&
- fib_nh_match(cfg, fi, extack) == 0) {
+ fib_nh_match(cfg, fi, extack) == 0 &&
+ fib_metrics_match(cfg, fi)) {
fa_to_delete = fa;
break;
}
@@ -1572,8 +1571,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
return -ESRCH;
call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen,
- fa_to_delete->fa_info, tos,
- fa_to_delete->fa_type, tb->tb_id);
+ fa_to_delete, extack);
rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
@@ -1890,9 +1888,8 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
n->key,
- KEYLENGTH - fa->fa_slen,
- fi, fa->fa_tos, fa->fa_type,
- tb->tb_id);
+ KEYLENGTH - fa->fa_slen, fa,
+ NULL);
hlist_del_rcu(&fa->fa_list);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
@@ -1930,8 +1927,7 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l,
continue;
call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
- KEYLENGTH - fa->fa_slen, fi, fa->fa_tos,
- fa->fa_type, fa->tb_id);
+ KEYLENGTH - fa->fa_slen, fa);
}
}
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index d5cac99170b1..1859c473b21a 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
__be16 protocol = skb->protocol;
u16 mac_len = skb->mac_len;
int gre_offset, outer_hlen;
- bool need_csum, ufo, gso_partial;
+ bool need_csum, gso_partial;
if (!skb->encapsulation)
goto out;
@@ -47,20 +47,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
skb->encap_hdr_csum = need_csum;
- ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
-
features &= skb->dev->hw_enc_features;
- /* The only checksum offload we care about from here on out is the
- * outer one so strip the existing checksum feature flags based
- * on the fact that we will be computing our checksum in software.
- */
- if (ufo) {
- features &= ~NETIF_F_CSUM_MASK;
- if (!need_csum)
- features |= NETIF_F_HW_CSUM;
- }
-
/* segment inner packet. */
segs = skb_mac_gso_segment(skb, features);
if (IS_ERR_OR_NULL(segs)) {
@@ -98,7 +86,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
greh = (struct gre_base_hdr *)skb_transport_header(skb);
pcsum = (__sum16 *)(greh + 1);
- if (gso_partial) {
+ if (gso_partial && skb_is_gso(skb)) {
unsigned int partial_adj;
/* Adjust checksum to account for the fact that
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c2be26b98b5f..1617604c9284 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -412,7 +412,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
int type = icmp_param->data.icmph.type;
int code = icmp_param->data.icmph.code;
- if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
+ if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
return;
/* Needed by both icmp_global_allow and icmp_xmit_lock */
@@ -694,7 +694,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
iph->tos;
mark = IP4_REPLY_MARK(net, skb_in->mark);
- if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
+ if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
goto out_unlock;
@@ -782,7 +782,7 @@ static bool icmp_tag_validation(int proto)
}
/*
- * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and
+ * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEEDED, ICMP_QUENCH, and
* ICMP_PARAMETERPROB.
*/
@@ -810,7 +810,8 @@ static bool icmp_unreach(struct sk_buff *skb)
if (iph->ihl < 5) /* Mangled header, drop. */
goto out_err;
- if (icmph->type == ICMP_DEST_UNREACH) {
+ switch (icmph->type) {
+ case ICMP_DEST_UNREACH:
switch (icmph->code & 15) {
case ICMP_NET_UNREACH:
case ICMP_HOST_UNREACH:
@@ -846,8 +847,16 @@ static bool icmp_unreach(struct sk_buff *skb)
}
if (icmph->code > NR_ICMP_UNREACH)
goto out;
- } else if (icmph->type == ICMP_PARAMETERPROB)
+ break;
+ case ICMP_PARAMETERPROB:
info = ntohl(icmph->un.gateway) >> 24;
+ break;
+ case ICMP_TIME_EXCEEDED:
+ __ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS);
+ if (icmph->code == ICMP_EXC_FRAGTIME)
+ goto out;
+ break;
+ }
/*
* Throw it at our lower layers
@@ -959,8 +968,9 @@ static bool icmp_timestamp(struct sk_buff *skb)
*/
icmp_param.data.times[1] = inet_current_timestamp();
icmp_param.data.times[2] = icmp_param.data.times[1];
- if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4))
- BUG();
+
+ BUG_ON(skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4));
+
icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
icmp_param.data.icmph.code = 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 498706b072fb..ab183af0b5b6 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1007,10 +1007,18 @@ int igmp_rcv(struct sk_buff *skb)
{
/* This basically follows the spec line by line -- see RFC1112 */
struct igmphdr *ih;
- struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+ struct net_device *dev = skb->dev;
+ struct in_device *in_dev;
int len = skb->len;
bool dropped = true;
+ if (netif_is_l3_master(dev)) {
+ dev = dev_get_by_index_rcu(dev_net(dev), IPCB(skb)->iif);
+ if (!dev)
+ goto drop;
+ }
+
+ in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
goto drop;
@@ -2549,7 +2557,8 @@ done:
/*
* check if a multicast source filter allows delivery for a given <src,dst,intf>
*/
-int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
+int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr,
+ int dif, int sdif)
{
struct inet_sock *inet = inet_sk(sk);
struct ip_mc_socklist *pmc;
@@ -2564,7 +2573,8 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
rcu_read_lock();
for_each_pmc_rcu(inet, pmc) {
if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
- pmc->multi.imr_ifindex == dif)
+ (pmc->multi.imr_ifindex == dif ||
+ (sdif && pmc->multi.imr_ifindex == sdif)))
break;
}
ret = inet->mc_all;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4089c013cb03..4ca46dc08e63 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -39,11 +39,11 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
* IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
* and 0.0.0.0 equals to 0.0.0.0 only
*/
-static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
- const struct in6_addr *sk2_rcv_saddr6,
- __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
- bool sk1_ipv6only, bool sk2_ipv6only,
- bool match_wildcard)
+static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
+ const struct in6_addr *sk2_rcv_saddr6,
+ __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
+ bool sk1_ipv6only, bool sk2_ipv6only,
+ bool match_wildcard)
{
int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -52,29 +52,29 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
if (!sk2_ipv6only) {
if (sk1_rcv_saddr == sk2_rcv_saddr)
- return 1;
+ return true;
if (!sk1_rcv_saddr || !sk2_rcv_saddr)
return match_wildcard;
}
- return 0;
+ return false;
}
if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
- return 1;
+ return true;
if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
!(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
- return 1;
+ return true;
if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
!(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
- return 1;
+ return true;
if (sk2_rcv_saddr6 &&
ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
- return 1;
+ return true;
- return 0;
+ return false;
}
#endif
@@ -82,20 +82,20 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
* match_wildcard == false: addresses must be exactly the same, i.e.
* 0.0.0.0 only equals to 0.0.0.0
*/
-static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
- bool sk2_ipv6only, bool match_wildcard)
+static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
+ bool sk2_ipv6only, bool match_wildcard)
{
if (!sk2_ipv6only) {
if (sk1_rcv_saddr == sk2_rcv_saddr)
- return 1;
+ return true;
if (!sk1_rcv_saddr || !sk2_rcv_saddr)
return match_wildcard;
}
- return 0;
+ return false;
}
-int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard)
+bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
{
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
@@ -266,7 +266,7 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
#if IS_ENABLED(CONFIG_IPV6)
if (tb->fast_sk_family == AF_INET6)
return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr,
- &sk->sk_v6_rcv_saddr,
+ inet6_rcv_saddr(sk),
tb->fast_rcv_saddr,
sk->sk_rcv_saddr,
tb->fast_ipv6_only,
@@ -321,13 +321,14 @@ tb_found:
goto fail_unlock;
}
success:
- if (!hlist_empty(&tb->owners)) {
+ if (hlist_empty(&tb->owners)) {
tb->fastreuse = reuse;
if (sk->sk_reuseport) {
tb->fastreuseport = FASTREUSEPORT_ANY;
tb->fastuid = uid;
tb->fast_rcv_saddr = sk->sk_rcv_saddr;
tb->fast_ipv6_only = ipv6_only_sock(sk);
+ tb->fast_sk_family = sk->sk_family;
#if IS_ENABLED(CONFIG_IPV6)
tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
#endif
@@ -354,6 +355,7 @@ success:
tb->fastuid = uid;
tb->fast_rcv_saddr = sk->sk_rcv_saddr;
tb->fast_ipv6_only = ipv6_only_sock(sk);
+ tb->fast_sk_family = sk->sk_family;
#if IS_ENABLED(CONFIG_IPV6)
tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
#endif
@@ -473,6 +475,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
}
spin_unlock_bh(&queue->fastopenq.lock);
}
+ mem_cgroup_sk_alloc(newsk);
out:
release_sock(sk);
if (req)
@@ -492,17 +495,15 @@ EXPORT_SYMBOL(inet_csk_accept);
* to optimize.
*/
void inet_csk_init_xmit_timers(struct sock *sk,
- void (*retransmit_handler)(unsigned long),
- void (*delack_handler)(unsigned long),
- void (*keepalive_handler)(unsigned long))
+ void (*retransmit_handler)(struct timer_list *t),
+ void (*delack_handler)(struct timer_list *t),
+ void (*keepalive_handler)(struct timer_list *t))
{
struct inet_connection_sock *icsk = inet_csk(sk);
- setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler,
- (unsigned long)sk);
- setup_timer(&icsk->icsk_delack_timer, delack_handler,
- (unsigned long)sk);
- setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
+ timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
+ timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
+ timer_setup(&sk->sk_timer, keepalive_handler, 0);
icsk->icsk_pending = icsk->icsk_ack.pending = 0;
}
EXPORT_SYMBOL(inet_csk_init_xmit_timers);
@@ -537,9 +538,11 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct net *net = read_pnet(&ireq->ireq_net);
- struct ip_options_rcu *opt = ireq->opt;
+ struct ip_options_rcu *opt;
struct rtable *rt;
+ opt = ireq_opt_deref(ireq);
+
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -573,10 +576,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
struct flowi4 *fl4;
struct rtable *rt;
+ opt = rcu_dereference(ireq->ireq_opt);
fl4 = &newinet->cork.fl.u.ip4;
- rcu_read_lock();
- opt = rcu_dereference(newinet->inet_opt);
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -589,13 +591,11 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
goto no_route;
if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
goto route_err;
- rcu_read_unlock();
return &rt->dst;
route_err:
ip_rt_put(rt);
no_route:
- rcu_read_unlock();
__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
@@ -674,9 +674,9 @@ void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req
}
EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
-static void reqsk_timer_handler(unsigned long data)
+static void reqsk_timer_handler(struct timer_list *t)
{
- struct request_sock *req = (struct request_sock *)data;
+ struct request_sock *req = from_timer(req, t, rsk_timer);
struct sock *sk_listener = req->rsk_listener;
struct net *net = sock_net(sk_listener);
struct inet_connection_sock *icsk = inet_csk(sk_listener);
@@ -747,8 +747,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
req->num_timeout = 0;
req->sk = NULL;
- setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler,
- (unsigned long)req);
+ timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
mod_timer(&req->rsk_timer, jiffies + timeout);
inet_ehash_insert(req_to_sk(req), NULL);
@@ -916,7 +915,6 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
tcp_sk(child)->fastopen_rsk = NULL;
}
inet_csk_destroy_sock(child);
- reqsk_put(req);
}
struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
@@ -987,6 +985,7 @@ void inet_csk_listen_stop(struct sock *sk)
sock_hold(child);
inet_child_forget(sk, req, child);
+ reqsk_put(req);
bh_unlock_sock(child);
local_bh_enable();
sock_put(child);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 3828b3a805cd..c9c35b61a027 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -93,8 +93,17 @@ void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill);
-static size_t inet_sk_attr_size(void)
+static size_t inet_sk_attr_size(struct sock *sk,
+ const struct inet_diag_req_v2 *req,
+ bool net_admin)
{
+ const struct inet_diag_handler *handler;
+ size_t aux = 0;
+
+ handler = inet_diag_table[req->sdiag_protocol];
+ if (handler && handler->idiag_get_aux_size)
+ aux = handler->idiag_get_aux_size(sk, net_admin);
+
return nla_total_size(sizeof(struct tcp_info))
+ nla_total_size(1) /* INET_DIAG_SHUTDOWN */
+ nla_total_size(1) /* INET_DIAG_TOS */
@@ -105,6 +114,7 @@ static size_t inet_sk_attr_size(void)
+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
+ nla_total_size(TCP_CA_NAME_MAX)
+ nla_total_size(sizeof(struct tcpvegas_info))
+ + aux
+ 64;
}
@@ -260,6 +270,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
handler->idiag_get_info(sk, r, info);
+ if (ext & (1 << (INET_DIAG_INFO - 1)) && handler->idiag_get_aux)
+ if (handler->idiag_get_aux(sk, net_admin, skb) < 0)
+ goto errout;
+
if (sk->sk_state < TCP_TIME_WAIT) {
union tcp_cc_info info;
size_t sz = 0;
@@ -274,6 +288,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
goto errout;
}
+ if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) {
+ u32 classid = 0;
+
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ classid = sock_cgroup_classid(&sk->sk_cgrp_data);
+#endif
+
+ if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
+ goto errout;
+ }
+
out:
nlmsg_end(skb, nlh);
return 0;
@@ -438,6 +463,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
const struct nlmsghdr *nlh,
const struct inet_diag_req_v2 *req)
{
+ bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN);
struct net *net = sock_net(in_skb->sk);
struct sk_buff *rep;
struct sock *sk;
@@ -447,7 +473,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
if (IS_ERR(sk))
return PTR_ERR(sk);
- rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
+ rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL);
if (!rep) {
err = -ENOMEM;
goto out;
@@ -456,8 +482,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
err = sk_diag_fill(sk, rep, req,
sk_user_ns(NETLINK_CB(in_skb).sk),
NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0, nlh,
- netlink_net_capable(in_skb, CAP_NET_ADMIN));
+ nlh->nlmsg_seq, 0, nlh, net_admin);
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
nlmsg_free(rep);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 96e95e83cc61..26a3d0315728 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -147,7 +147,7 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
spin_unlock(&hb->chain_lock);
hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
- f->frag_expire((unsigned long) fq);
+ f->frag_expire(&fq->timer);
return evicted;
}
@@ -164,7 +164,7 @@ static void inet_frag_worker(struct work_struct *work)
local_bh_disable();
- for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
+ for (i = READ_ONCE(f->next_bucket); budget; --budget) {
evicted += inet_evict_bucket(f, &f->hash[i]);
i = (i + 1) & (INETFRAGS_HASHSZ - 1);
if (evicted > INETFRAGS_EVICT_MAX)
@@ -234,10 +234,8 @@ evict_again:
cond_resched();
if (read_seqretry(&f->rnd_seqlock, seq) ||
- percpu_counter_sum(&nf->mem))
+ sum_frag_mem_limit(nf))
goto evict_again;
-
- percpu_counter_destroy(&nf->mem);
}
EXPORT_SYMBOL(inet_frags_exit_net);
@@ -368,7 +366,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
f->constructor(q, arg);
add_frag_mem_limit(nf, f->qsize);
- setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
+ timer_setup(&q->timer, f->frag_expire, 0);
spin_lock_init(&q->lock);
refcount_set(&q->refcnt, 1);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2e3389d614d1..e7d15fb0d94d 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -170,7 +170,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port);
static inline int compute_score(struct sock *sk, struct net *net,
const unsigned short hnum, const __be32 daddr,
- const int dif, bool exact_dif)
+ const int dif, const int sdif, bool exact_dif)
{
int score = -1;
struct inet_sock *inet = inet_sk(sk);
@@ -185,9 +185,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
score += 4;
}
if (sk->sk_bound_dev_if || exact_dif) {
- if (sk->sk_bound_dev_if != dif)
+ bool dev_match = (sk->sk_bound_dev_if == dif ||
+ sk->sk_bound_dev_if == sdif);
+
+ if (exact_dif && !dev_match)
return -1;
- score += 4;
+ if (sk->sk_bound_dev_if && dev_match)
+ score += 4;
}
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
@@ -208,7 +212,7 @@ struct sock *__inet_lookup_listener(struct net *net,
struct sk_buff *skb, int doff,
const __be32 saddr, __be16 sport,
const __be32 daddr, const unsigned short hnum,
- const int dif)
+ const int dif, const int sdif)
{
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -218,7 +222,8 @@ struct sock *__inet_lookup_listener(struct net *net,
u32 phash = 0;
sk_for_each_rcu(sk, &ilb->head) {
- score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
+ score = compute_score(sk, net, hnum, daddr,
+ dif, sdif, exact_dif);
if (score > hiscore) {
reuseport = sk->sk_reuseport;
if (reuseport) {
@@ -268,7 +273,7 @@ struct sock *__inet_lookup_established(struct net *net,
struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const u16 hnum,
- const int dif)
+ const int dif, const int sdif)
{
INET_ADDR_COOKIE(acookie, saddr, daddr);
const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
@@ -286,11 +291,12 @@ begin:
if (sk->sk_hash != hash)
continue;
if (likely(INET_MATCH(sk, net, acookie,
- saddr, daddr, ports, dif))) {
+ saddr, daddr, ports, dif, sdif))) {
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
if (unlikely(!INET_MATCH(sk, net, acookie,
- saddr, daddr, ports, dif))) {
+ saddr, daddr, ports,
+ dif, sdif))) {
sock_gen_put(sk);
goto begin;
}
@@ -321,9 +327,10 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
__be32 daddr = inet->inet_rcv_saddr;
__be32 saddr = inet->inet_daddr;
int dif = sk->sk_bound_dev_if;
+ struct net *net = sock_net(sk);
+ int sdif = l3mdev_master_ifindex_by_index(net, dif);
INET_ADDR_COOKIE(acookie, saddr, daddr);
const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
- struct net *net = sock_net(sk);
unsigned int hash = inet_ehashfn(net, daddr, lport,
saddr, inet->inet_dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
@@ -339,7 +346,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
continue;
if (likely(INET_MATCH(sk2, net, acookie,
- saddr, daddr, ports, dif))) {
+ saddr, daddr, ports, dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
if (twsk_unique(sk, sk2, twp))
@@ -449,10 +456,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
return reuseport_add_sock(sk, sk2);
}
- /* Initial allocation may have already happened via setsockopt */
- if (!rcu_access_pointer(sk->sk_reuseport_cb))
- return reuseport_alloc(sk);
- return 0;
+ return reuseport_alloc(sk);
}
int __inet_hash(struct sock *sk, struct sock *osk)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 5b039159e67a..c690cd0d9b3f 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -9,7 +9,6 @@
*/
#include <linux/kernel.h>
-#include <linux/kmemcheck.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <net/inet_hashtables.h>
@@ -142,9 +141,9 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
}
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
-static void tw_timer_handler(unsigned long data)
+static void tw_timer_handler(struct timer_list *t)
{
- struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data;
+ struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
if (tw->tw_kill)
__NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
@@ -167,8 +166,6 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
if (tw) {
const struct inet_sock *inet = inet_sk(sk);
- kmemcheck_annotate_bitfield(tw, flags);
-
tw->tw_dr = dr;
/* Give us an identity. */
tw->tw_daddr = inet->inet_daddr;
@@ -188,8 +185,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
tw->tw_prot = sk->sk_prot_creator;
atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
twsk_net_set(tw, sock_net(sk));
- setup_pinned_timer(&tw->tw_timer, tw_timer_handler,
- (unsigned long)tw);
+ timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED);
/*
* Because we use RCU lookups, we should not set tw_refcnt
* to a non null value before everything is setup for this
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index c5a117cc6619..914d56928578 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -33,7 +33,7 @@
* also be removed if the pool is overloaded i.e. if the total amount of
* entries is greater-or-equal than the threshold.
*
- * Node pool is organised as an AVL tree.
+ * Node pool is organised as an RB tree.
* Such an implementation has been chosen not just for fun. It's a way to
* prevent easy and efficient DoS attacks by creating hash collisions. A huge
* amount of long living nodes in a single hash slot would significantly delay
@@ -45,7 +45,7 @@
* AND reference count being 0.
* 3. Global variable peer_total is modified under the pool lock.
* 4. struct inet_peer fields modification:
- * avl_left, avl_right, avl_parent, avl_height: pool lock
+ * rb_node: pool lock
* refcnt: atomically against modifications on other CPU;
* usually under some other lock to prevent node disappearing
* daddr: unchangeable
@@ -53,30 +53,15 @@
static struct kmem_cache *peer_cachep __read_mostly;
-static LIST_HEAD(gc_list);
-static const int gc_delay = 60 * HZ;
-static struct delayed_work gc_work;
-static DEFINE_SPINLOCK(gc_lock);
-
-#define node_height(x) x->avl_height
-
-#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
-#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
-static const struct inet_peer peer_fake_node = {
- .avl_left = peer_avl_empty_rcu,
- .avl_right = peer_avl_empty_rcu,
- .avl_height = 0
-};
-
void inet_peer_base_init(struct inet_peer_base *bp)
{
- bp->root = peer_avl_empty_rcu;
+ bp->rb_root = RB_ROOT;
seqlock_init(&bp->lock);
bp->total = 0;
}
EXPORT_SYMBOL_GPL(inet_peer_base_init);
-#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
+#define PEER_MAX_GC 32
/* Exported for sysctl_net_ipv4. */
int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more
@@ -84,53 +69,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m
int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */
int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */
-static void inetpeer_gc_worker(struct work_struct *work)
-{
- struct inet_peer *p, *n, *c;
- struct list_head list;
-
- spin_lock_bh(&gc_lock);
- list_replace_init(&gc_list, &list);
- spin_unlock_bh(&gc_lock);
-
- if (list_empty(&list))
- return;
-
- list_for_each_entry_safe(p, n, &list, gc_list) {
-
- if (need_resched())
- cond_resched();
-
- c = rcu_dereference_protected(p->avl_left, 1);
- if (c != peer_avl_empty) {
- list_add_tail(&c->gc_list, &list);
- p->avl_left = peer_avl_empty_rcu;
- }
-
- c = rcu_dereference_protected(p->avl_right, 1);
- if (c != peer_avl_empty) {
- list_add_tail(&c->gc_list, &list);
- p->avl_right = peer_avl_empty_rcu;
- }
-
- n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
-
- if (refcount_read(&p->refcnt) == 1) {
- list_del(&p->gc_list);
- kmem_cache_free(peer_cachep, p);
- }
- }
-
- if (list_empty(&list))
- return;
-
- spin_lock_bh(&gc_lock);
- list_splice(&list, &gc_list);
- spin_unlock_bh(&gc_lock);
-
- schedule_delayed_work(&gc_work, gc_delay);
-}
-
/* Called from ip_output.c:ip_init */
void __init inet_initpeers(void)
{
@@ -153,225 +91,65 @@ void __init inet_initpeers(void)
sizeof(struct inet_peer),
0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
NULL);
-
- INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
}
-#define rcu_deref_locked(X, BASE) \
- rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
-
-/*
- * Called with local BH disabled and the pool lock held.
- */
-#define lookup(_daddr, _stack, _base) \
-({ \
- struct inet_peer *u; \
- struct inet_peer __rcu **v; \
- \
- stackptr = _stack; \
- *stackptr++ = &_base->root; \
- for (u = rcu_deref_locked(_base->root, _base); \
- u != peer_avl_empty;) { \
- int cmp = inetpeer_addr_cmp(_daddr, &u->daddr); \
- if (cmp == 0) \
- break; \
- if (cmp == -1) \
- v = &u->avl_left; \
- else \
- v = &u->avl_right; \
- *stackptr++ = v; \
- u = rcu_deref_locked(*v, _base); \
- } \
- u; \
-})
-
-/*
- * Called with rcu_read_lock()
- * Because we hold no lock against a writer, its quite possible we fall
- * in an endless loop.
- * But every pointer we follow is guaranteed to be valid thanks to RCU.
- * We exit from this function if number of links exceeds PEER_MAXDEPTH
- */
-static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
- struct inet_peer_base *base)
+/* Called with rcu_read_lock() or base->lock held */
+static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
+ struct inet_peer_base *base,
+ unsigned int seq,
+ struct inet_peer *gc_stack[],
+ unsigned int *gc_cnt,
+ struct rb_node **parent_p,
+ struct rb_node ***pp_p)
{
- struct inet_peer *u = rcu_dereference(base->root);
- int count = 0;
+ struct rb_node **pp, *parent, *next;
+ struct inet_peer *p;
- while (u != peer_avl_empty) {
- int cmp = inetpeer_addr_cmp(daddr, &u->daddr);
+ pp = &base->rb_root.rb_node;
+ parent = NULL;
+ while (1) {
+ int cmp;
+
+ next = rcu_dereference_raw(*pp);
+ if (!next)
+ break;
+ parent = next;
+ p = rb_entry(parent, struct inet_peer, rb_node);
+ cmp = inetpeer_addr_cmp(daddr, &p->daddr);
if (cmp == 0) {
- /* Before taking a reference, check if this entry was
- * deleted (refcnt=0)
- */
- if (!refcount_inc_not_zero(&u->refcnt)) {
- u = NULL;
- }
- return u;
+ if (!refcount_inc_not_zero(&p->refcnt))
+ break;
+ return p;
+ }
+ if (gc_stack) {
+ if (*gc_cnt < PEER_MAX_GC)
+ gc_stack[(*gc_cnt)++] = p;
+ } else if (unlikely(read_seqretry(&base->lock, seq))) {
+ break;
}
if (cmp == -1)
- u = rcu_dereference(u->avl_left);
+ pp = &next->rb_left;
else
- u = rcu_dereference(u->avl_right);
- if (unlikely(++count == PEER_MAXDEPTH))
- break;
+ pp = &next->rb_right;
}
+ *parent_p = parent;
+ *pp_p = pp;
return NULL;
}
-/* Called with local BH disabled and the pool lock held. */
-#define lookup_rightempty(start, base) \
-({ \
- struct inet_peer *u; \
- struct inet_peer __rcu **v; \
- *stackptr++ = &start->avl_left; \
- v = &start->avl_left; \
- for (u = rcu_deref_locked(*v, base); \
- u->avl_right != peer_avl_empty_rcu;) { \
- v = &u->avl_right; \
- *stackptr++ = v; \
- u = rcu_deref_locked(*v, base); \
- } \
- u; \
-})
-
-/* Called with local BH disabled and the pool lock held.
- * Variable names are the proof of operation correctness.
- * Look into mm/map_avl.c for more detail description of the ideas.
- */
-static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
- struct inet_peer __rcu ***stackend,
- struct inet_peer_base *base)
-{
- struct inet_peer __rcu **nodep;
- struct inet_peer *node, *l, *r;
- int lh, rh;
-
- while (stackend > stack) {
- nodep = *--stackend;
- node = rcu_deref_locked(*nodep, base);
- l = rcu_deref_locked(node->avl_left, base);
- r = rcu_deref_locked(node->avl_right, base);
- lh = node_height(l);
- rh = node_height(r);
- if (lh > rh + 1) { /* l: RH+2 */
- struct inet_peer *ll, *lr, *lrl, *lrr;
- int lrh;
- ll = rcu_deref_locked(l->avl_left, base);
- lr = rcu_deref_locked(l->avl_right, base);
- lrh = node_height(lr);
- if (lrh <= node_height(ll)) { /* ll: RH+1 */
- RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
- RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
- node->avl_height = lrh + 1; /* RH+1 or RH+2 */
- RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */
- RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */
- l->avl_height = node->avl_height + 1;
- RCU_INIT_POINTER(*nodep, l);
- } else { /* ll: RH, lr: RH+1 */
- lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */
- lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */
- RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
- RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
- node->avl_height = rh + 1; /* node: RH+1 */
- RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */
- RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */
- l->avl_height = rh + 1; /* l: RH+1 */
- RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */
- RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */
- lr->avl_height = rh + 2;
- RCU_INIT_POINTER(*nodep, lr);
- }
- } else if (rh > lh + 1) { /* r: LH+2 */
- struct inet_peer *rr, *rl, *rlr, *rll;
- int rlh;
- rr = rcu_deref_locked(r->avl_right, base);
- rl = rcu_deref_locked(r->avl_left, base);
- rlh = node_height(rl);
- if (rlh <= node_height(rr)) { /* rr: LH+1 */
- RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
- RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
- node->avl_height = rlh + 1; /* LH+1 or LH+2 */
- RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */
- RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */
- r->avl_height = node->avl_height + 1;
- RCU_INIT_POINTER(*nodep, r);
- } else { /* rr: RH, rl: RH+1 */
- rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */
- rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */
- RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
- RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
- node->avl_height = lh + 1; /* node: LH+1 */
- RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */
- RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */
- r->avl_height = lh + 1; /* r: LH+1 */
- RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */
- RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */
- rl->avl_height = lh + 2;
- RCU_INIT_POINTER(*nodep, rl);
- }
- } else {
- node->avl_height = (lh > rh ? lh : rh) + 1;
- }
- }
-}
-
-/* Called with local BH disabled and the pool lock held. */
-#define link_to_pool(n, base) \
-do { \
- n->avl_height = 1; \
- n->avl_left = peer_avl_empty_rcu; \
- n->avl_right = peer_avl_empty_rcu; \
- /* lockless readers can catch us now */ \
- rcu_assign_pointer(**--stackptr, n); \
- peer_avl_rebalance(stack, stackptr, base); \
-} while (0)
-
static void inetpeer_free_rcu(struct rcu_head *head)
{
kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
}
-static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
- struct inet_peer __rcu **stack[PEER_MAXDEPTH])
-{
- struct inet_peer __rcu ***stackptr, ***delp;
-
- if (lookup(&p->daddr, stack, base) != p)
- BUG();
- delp = stackptr - 1; /* *delp[0] == p */
- if (p->avl_left == peer_avl_empty_rcu) {
- *delp[0] = p->avl_right;
- --stackptr;
- } else {
- /* look for a node to insert instead of p */
- struct inet_peer *t;
- t = lookup_rightempty(p, base);
- BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
- **--stackptr = t->avl_left;
- /* t is removed, t->daddr > x->daddr for any
- * x in p->avl_left subtree.
- * Put t in the old place of p. */
- RCU_INIT_POINTER(*delp[0], t);
- t->avl_left = p->avl_left;
- t->avl_right = p->avl_right;
- t->avl_height = p->avl_height;
- BUG_ON(delp[1] != &p->avl_left);
- delp[1] = &t->avl_left; /* was &p->avl_left */
- }
- peer_avl_rebalance(stack, stackptr, base);
- base->total--;
- call_rcu(&p->rcu, inetpeer_free_rcu);
-}
-
/* perform garbage collect on all items stacked during a lookup */
-static int inet_peer_gc(struct inet_peer_base *base,
- struct inet_peer __rcu **stack[PEER_MAXDEPTH],
- struct inet_peer __rcu ***stackptr)
+static void inet_peer_gc(struct inet_peer_base *base,
+ struct inet_peer *gc_stack[],
+ unsigned int gc_cnt)
{
- struct inet_peer *p, *gchead = NULL;
+ struct inet_peer *p;
__u32 delta, ttl;
- int cnt = 0;
+ int i;
if (base->total >= inet_peer_threshold)
ttl = 0; /* be aggressive */
@@ -379,43 +157,38 @@ static int inet_peer_gc(struct inet_peer_base *base,
ttl = inet_peer_maxttl
- (inet_peer_maxttl - inet_peer_minttl) / HZ *
base->total / inet_peer_threshold * HZ;
- stackptr--; /* last stack slot is peer_avl_empty */
- while (stackptr > stack) {
- stackptr--;
- p = rcu_deref_locked(**stackptr, base);
- if (refcount_read(&p->refcnt) == 1) {
- smp_rmb();
- delta = (__u32)jiffies - p->dtime;
- if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) {
- p->gc_next = gchead;
- gchead = p;
- }
- }
+ for (i = 0; i < gc_cnt; i++) {
+ p = gc_stack[i];
+ delta = (__u32)jiffies - p->dtime;
+ if (delta < ttl || !refcount_dec_if_one(&p->refcnt))
+ gc_stack[i] = NULL;
}
- while ((p = gchead) != NULL) {
- gchead = p->gc_next;
- cnt++;
- unlink_from_pool(p, base, stack);
+ for (i = 0; i < gc_cnt; i++) {
+ p = gc_stack[i];
+ if (p) {
+ rb_erase(&p->rb_node, &base->rb_root);
+ base->total--;
+ call_rcu(&p->rcu, inetpeer_free_rcu);
+ }
}
- return cnt;
}
struct inet_peer *inet_getpeer(struct inet_peer_base *base,
const struct inetpeer_addr *daddr,
int create)
{
- struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
- struct inet_peer *p;
- unsigned int sequence;
- int invalidated, gccnt = 0;
+ struct inet_peer *p, *gc_stack[PEER_MAX_GC];
+ struct rb_node **pp, *parent;
+ unsigned int gc_cnt, seq;
+ int invalidated;
/* Attempt a lockless lookup first.
* Because of a concurrent writer, we might not find an existing entry.
*/
rcu_read_lock();
- sequence = read_seqbegin(&base->lock);
- p = lookup_rcu(daddr, base);
- invalidated = read_seqretry(&base->lock, sequence);
+ seq = read_seqbegin(&base->lock);
+ p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
+ invalidated = read_seqretry(&base->lock, seq);
rcu_read_unlock();
if (p)
@@ -428,36 +201,31 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
/* retry an exact lookup, taking the lock before.
* At least, nodes should be hot in our cache.
*/
+ parent = NULL;
write_seqlock_bh(&base->lock);
-relookup:
- p = lookup(daddr, stack, base);
- if (p != peer_avl_empty) {
- refcount_inc(&p->refcnt);
- write_sequnlock_bh(&base->lock);
- return p;
- }
- if (!gccnt) {
- gccnt = inet_peer_gc(base, stack, stackptr);
- if (gccnt && create)
- goto relookup;
- }
- p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
- if (p) {
- p->daddr = *daddr;
- refcount_set(&p->refcnt, 2);
- atomic_set(&p->rid, 0);
- p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
- p->rate_tokens = 0;
- /* 60*HZ is arbitrary, but chosen enough high so that the first
- * calculation of tokens is at its maximum.
- */
- p->rate_last = jiffies - 60*HZ;
- INIT_LIST_HEAD(&p->gc_list);
-
- /* Link the node. */
- link_to_pool(p, base);
- base->total++;
+
+ gc_cnt = 0;
+ p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp);
+ if (!p && create) {
+ p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
+ if (p) {
+ p->daddr = *daddr;
+ refcount_set(&p->refcnt, 2);
+ atomic_set(&p->rid, 0);
+ p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
+ p->rate_tokens = 0;
+ /* 60*HZ is arbitrary, but chosen enough high so that the first
+ * calculation of tokens is at its maximum.
+ */
+ p->rate_last = jiffies - 60*HZ;
+
+ rb_link_node(&p->rb_node, parent, pp);
+ rb_insert_color(&p->rb_node, &base->rb_root);
+ base->total++;
+ }
}
+ if (gc_cnt)
+ inet_peer_gc(base, gc_stack, gc_cnt);
write_sequnlock_bh(&base->lock);
return p;
@@ -467,8 +235,9 @@ EXPORT_SYMBOL_GPL(inet_getpeer);
void inet_putpeer(struct inet_peer *p)
{
p->dtime = (__u32)jiffies;
- smp_mb__before_atomic();
- refcount_dec(&p->refcnt);
+
+ if (refcount_dec_and_test(&p->refcnt))
+ call_rcu(&p->rcu, inetpeer_free_rcu);
}
EXPORT_SYMBOL_GPL(inet_putpeer);
@@ -513,30 +282,19 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
}
EXPORT_SYMBOL(inet_peer_xrlim_allow);
-static void inetpeer_inval_rcu(struct rcu_head *head)
-{
- struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu);
-
- spin_lock_bh(&gc_lock);
- list_add_tail(&p->gc_list, &gc_list);
- spin_unlock_bh(&gc_lock);
-
- schedule_delayed_work(&gc_work, gc_delay);
-}
-
void inetpeer_invalidate_tree(struct inet_peer_base *base)
{
- struct inet_peer *root;
+ struct rb_node *p = rb_first(&base->rb_root);
- write_seqlock_bh(&base->lock);
+ while (p) {
+ struct inet_peer *peer = rb_entry(p, struct inet_peer, rb_node);
- root = rcu_deref_locked(base->root, base);
- if (root != peer_avl_empty) {
- base->root = peer_avl_empty_rcu;
- base->total = 0;
- call_rcu(&root->gc_rcu, inetpeer_inval_rcu);
+ p = rb_next(p);
+ rb_erase(&peer->rb_node, &base->rb_root);
+ inet_putpeer(peer);
+ cond_resched();
}
- write_sequnlock_bh(&base->lock);
+ base->total = 0;
}
EXPORT_SYMBOL(inetpeer_invalidate_tree);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 9f0a7b96646f..2dd21c3281a1 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9a8cfac503dc..bbf1b94942c0 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -190,12 +191,13 @@ static bool frag_expire_skip_icmp(u32 user)
/*
* Oops, a fragment queue timed out. Kill it and send an ICMP reply.
*/
-static void ip_expire(unsigned long arg)
+static void ip_expire(struct timer_list *t)
{
+ struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct ipq *qp;
struct net *net;
- qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
+ qp = container_of(frag, struct ipq, q);
net = container_of(qp->q.net, struct net, ipv4.frags);
rcu_read_lock();
@@ -844,8 +846,6 @@ static void __init ip4_frags_ctl_register(void)
static int __net_init ipv4_frags_init_net(struct net *net)
{
- int res;
-
/* Fragment cache limits.
*
* The fragment memory accounting code, (tries to) account for
@@ -871,13 +871,9 @@ static int __net_init ipv4_frags_init_net(struct net *net)
net->ipv4.frags.max_dist = 64;
- res = inet_frags_init_net(&net->ipv4.frags);
- if (res)
- return res;
- res = ip4_frags_ns_ctl_register(net);
- if (res)
- inet_frags_uninit_net(&net->ipv4.frags);
- return res;
+ inet_frags_init_net(&net->ipv4.frags);
+
+ return ip4_frags_ns_ctl_register(net);
}
static void __net_exit ipv4_frags_exit_net(struct net *net)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7a7829e839c2..bb6239169b1a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -48,6 +48,7 @@
#include <net/rtnetlink.h>
#include <net/gre.h>
#include <net/dst_metadata.h>
+#include <net/erspan.h>
/*
Problems & solutions
@@ -112,9 +113,12 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev);
+static void erspan_build_header(struct sk_buff *skb,
+ __be32 id, u32 index, bool truncate);
static unsigned int ipgre_net_id __read_mostly;
static unsigned int gre_tap_net_id __read_mostly;
+static unsigned int erspan_net_id __read_mostly;
static void ipgre_err(struct sk_buff *skb, u32 info,
const struct tnl_ptk_info *tpi)
@@ -246,6 +250,79 @@ static void gre_err(struct sk_buff *skb, u32 info)
ipgre_err(skb, info, &tpi);
}
+static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+ int gre_hdr_len)
+{
+ struct net *net = dev_net(skb->dev);
+ struct metadata_dst *tun_dst = NULL;
+ struct ip_tunnel_net *itn;
+ struct ip_tunnel *tunnel;
+ struct erspanhdr *ershdr;
+ const struct iphdr *iph;
+ __be32 index;
+ int len;
+
+ itn = net_generic(net, erspan_net_id);
+ len = gre_hdr_len + sizeof(*ershdr);
+
+ if (unlikely(!pskb_may_pull(skb, len)))
+ return -ENOMEM;
+
+ iph = ip_hdr(skb);
+ ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
+
+ /* The original GRE header does not have key field,
+ * Use ERSPAN 10-bit session ID as key.
+ */
+ tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
+ index = ershdr->md.index;
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
+ tpi->flags | TUNNEL_KEY,
+ iph->saddr, iph->daddr, tpi->key);
+
+ if (tunnel) {
+ if (__iptunnel_pull_header(skb,
+ gre_hdr_len + sizeof(*ershdr),
+ htons(ETH_P_TEB),
+ false, false) < 0)
+ goto drop;
+
+ if (tunnel->collect_md) {
+ struct ip_tunnel_info *info;
+ struct erspan_metadata *md;
+ __be64 tun_id;
+ __be16 flags;
+
+ tpi->flags |= TUNNEL_KEY;
+ flags = tpi->flags;
+ tun_id = key32_to_tunnel_id(tpi->key);
+
+ tun_dst = ip_tun_rx_dst(skb, flags,
+ tun_id, sizeof(*md));
+ if (!tun_dst)
+ return PACKET_REJECT;
+
+ md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+ if (!md)
+ return PACKET_REJECT;
+
+ md->index = index;
+ info = &tun_dst->u.tun_info;
+ info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ info->options_len = sizeof(*md);
+ } else {
+ tunnel->index = ntohl(index);
+ }
+
+ skb_reset_mac_header(skb);
+ ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+ return PACKET_RCVD;
+ }
+drop:
+ kfree_skb(skb);
+ return PACKET_RCVD;
+}
+
static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
{
@@ -328,6 +405,11 @@ static int gre_rcv(struct sk_buff *skb)
if (hdr_len < 0)
goto drop;
+ if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
+ if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
+ return 0;
+ }
+
if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
@@ -376,39 +458,33 @@ static struct rtable *gre_get_rt(struct sk_buff *skb,
return ip_route_output_key(net, fl);
}
-static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
- __be16 proto)
+static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
+ struct net_device *dev,
+ struct flowi4 *fl,
+ int tunnel_hlen)
{
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct rtable *rt = NULL;
- struct flowi4 fl;
int min_headroom;
- int tunnel_hlen;
- __be16 df, flags;
bool use_cache;
int err;
tun_info = skb_tunnel_info(skb);
- if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
- ip_tunnel_info_af(tun_info) != AF_INET))
- goto err_free_skb;
-
key = &tun_info->key;
use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
+
if (use_cache)
- rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
+ rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
if (!rt) {
- rt = gre_get_rt(skb, dev, &fl, key);
+ rt = gre_get_rt(skb, dev, fl, key);
if (IS_ERR(rt))
- goto err_free_skb;
+ goto err_free_skb;
if (use_cache)
dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
- fl.saddr);
+ fl->saddr);
}
- tunnel_hlen = gre_calc_hlen(key->tun_flags);
-
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ tunnel_hlen + sizeof(struct iphdr);
if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
@@ -420,6 +496,37 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
if (unlikely(err))
goto err_free_rt;
}
+ return rt;
+
+err_free_rt:
+ ip_rt_put(rt);
+err_free_skb:
+ kfree_skb(skb);
+ dev->stats.tx_dropped++;
+ return NULL;
+}
+
+static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+ __be16 proto)
+{
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ struct rtable *rt = NULL;
+ struct flowi4 fl;
+ int tunnel_hlen;
+ __be16 df, flags;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET))
+ goto err_free_skb;
+
+ key = &tun_info->key;
+ tunnel_hlen = gre_calc_hlen(key->tun_flags);
+
+ rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+ if (!rt)
+ return;
/* Push Tunnel header. */
if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
@@ -442,6 +549,64 @@ err_free_skb:
dev->stats.tx_dropped++;
}
+static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+ __be16 proto)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ struct erspan_metadata *md;
+ struct rtable *rt = NULL;
+ bool truncate = false;
+ struct flowi4 fl;
+ int tunnel_hlen;
+ __be16 df;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET))
+ goto err_free_skb;
+
+ key = &tun_info->key;
+
+ /* ERSPAN has fixed 8 byte GRE header */
+ tunnel_hlen = 8 + sizeof(struct erspanhdr);
+
+ rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+ if (!rt)
+ return;
+
+ if (gre_handle_offloads(skb, false))
+ goto err_free_rt;
+
+ if (skb->len > dev->mtu + dev->hard_header_len) {
+ pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ truncate = true;
+ }
+
+ md = ip_tunnel_info_opts(tun_info);
+ if (!md)
+ goto err_free_rt;
+
+ erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
+ ntohl(md->index), truncate);
+
+ gre_build_header(skb, 8, TUNNEL_SEQ,
+ htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
+
+ df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
+
+ iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
+ key->tos, key->ttl, df, false);
+ return;
+
+err_free_rt:
+ ip_rt_put(rt);
+err_free_skb:
+ kfree_skb(skb);
+ dev->stats.tx_dropped++;
+}
+
static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -503,6 +668,86 @@ free_skb:
return NETDEV_TX_OK;
}
+static inline u8 tos_to_cos(u8 tos)
+{
+ u8 dscp, cos;
+
+ dscp = tos >> 2;
+ cos = dscp >> 3;
+ return cos;
+}
+
+static void erspan_build_header(struct sk_buff *skb,
+ __be32 id, u32 index, bool truncate)
+{
+ struct iphdr *iphdr = ip_hdr(skb);
+ struct ethhdr *eth = eth_hdr(skb);
+ enum erspan_encap_type enc_type;
+ struct erspanhdr *ershdr;
+ struct qtag_prefix {
+ __be16 eth_type;
+ __be16 tci;
+ } *qp;
+ u16 vlan_tci = 0;
+
+ enc_type = ERSPAN_ENCAP_NOVLAN;
+
+ /* If mirrored packet has vlan tag, extract tci and
+ * perserve vlan header in the mirrored frame.
+ */
+ if (eth->h_proto == htons(ETH_P_8021Q)) {
+ qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
+ vlan_tci = ntohs(qp->tci);
+ enc_type = ERSPAN_ENCAP_INFRAME;
+ }
+
+ skb_push(skb, sizeof(*ershdr));
+ ershdr = (struct erspanhdr *)skb->data;
+ memset(ershdr, 0, sizeof(*ershdr));
+
+ ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
+ (ERSPAN_VERSION << VER_OFFSET));
+ ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
+ ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
+ (enc_type << EN_OFFSET & EN_MASK) |
+ ((truncate << T_OFFSET) & T_MASK));
+ ershdr->md.index = htonl(index & INDEX_MASK);
+}
+
+static netdev_tx_t erspan_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ bool truncate = false;
+
+ if (tunnel->collect_md) {
+ erspan_fb_xmit(skb, dev, skb->protocol);
+ return NETDEV_TX_OK;
+ }
+
+ if (gre_handle_offloads(skb, false))
+ goto free_skb;
+
+ if (skb_cow_head(skb, dev->needed_headroom))
+ goto free_skb;
+
+ if (skb->len > dev->mtu + dev->hard_header_len) {
+ pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ truncate = true;
+ }
+
+ /* Push ERSPAN header */
+ erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
+ tunnel->parms.o_flags &= ~TUNNEL_KEY;
+ __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
+ return NETDEV_TX_OK;
+
+free_skb:
+ kfree_skb(skb);
+ dev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+}
+
static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
struct net_device *dev)
{
@@ -528,20 +773,46 @@ free_skb:
return NETDEV_TX_OK;
}
+static void ipgre_link_update(struct net_device *dev, bool set_mtu)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int len;
+
+ len = tunnel->tun_hlen;
+ tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
+ len = tunnel->tun_hlen - len;
+ tunnel->hlen = tunnel->hlen + len;
+
+ dev->needed_headroom = dev->needed_headroom + len;
+ if (set_mtu)
+ dev->mtu = max_t(int, dev->mtu - len, 68);
+
+ if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
+ if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
+ tunnel->encap.type == TUNNEL_ENCAP_NONE) {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ }
+ dev->features |= NETIF_F_LLTX;
+ }
+}
+
static int ipgre_tunnel_ioctl(struct net_device *dev,
struct ifreq *ifr, int cmd)
{
- int err;
struct ip_tunnel_parm p;
+ int err;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
return -EFAULT;
+
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
- p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
- ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
+ p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
+ ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
return -EINVAL;
}
+
p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
@@ -549,11 +820,22 @@ static int ipgre_tunnel_ioctl(struct net_device *dev,
if (err)
return err;
+ if (cmd == SIOCCHGTUNNEL) {
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ t->parms.i_flags = p.i_flags;
+ t->parms.o_flags = p.o_flags;
+
+ if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
+ ipgre_link_update(dev, true);
+ }
+
p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
return -EFAULT;
+
return 0;
}
@@ -766,15 +1048,14 @@ static int __net_init ipgre_init_net(struct net *net)
return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
}
-static void __net_exit ipgre_exit_net(struct net *net)
+static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
{
- struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
- ip_tunnel_delete_net(itn, &ipgre_link_ops);
+ ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
}
static struct pernet_operations ipgre_net_ops = {
.init = ipgre_init_net,
- .exit = ipgre_exit_net,
+ .exit_batch = ipgre_exit_batch_net,
.id = &ipgre_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -828,6 +1109,42 @@ out:
return ipgre_tunnel_validate(tb, data, extack);
}
+static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ __be16 flags = 0;
+ int ret;
+
+ if (!data)
+ return 0;
+
+ ret = ipgre_tap_validate(tb, data, extack);
+ if (ret)
+ return ret;
+
+ /* ERSPAN should only have GRE sequence and key flag */
+ if (data[IFLA_GRE_OFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+ if (data[IFLA_GRE_IFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+ if (!data[IFLA_GRE_COLLECT_METADATA] &&
+ flags != (GRE_SEQ | GRE_KEY))
+ return -EINVAL;
+
+ /* ERSPAN Session ID only has 10-bit. Since we reuse
+ * 32-bit key field as ID, check it's range.
+ */
+ if (data[IFLA_GRE_IKEY] &&
+ (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
+ return -EINVAL;
+
+ if (data[IFLA_GRE_OKEY] &&
+ (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
+ return -EINVAL;
+
+ return 0;
+}
+
static int ipgre_netlink_parms(struct net_device *dev,
struct nlattr *data[],
struct nlattr *tb[],
@@ -892,6 +1209,13 @@ static int ipgre_netlink_parms(struct net_device *dev,
if (data[IFLA_GRE_FWMARK])
*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
+ if (data[IFLA_GRE_ERSPAN_INDEX]) {
+ t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+
+ if (t->index & ~INDEX_MASK)
+ return -EINVAL;
+ }
+
return 0;
}
@@ -933,6 +1257,7 @@ static int gre_tap_init(struct net_device *dev)
{
__gre_tunnel_init(dev);
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netif_keep_dst(dev);
return ip_tunnel_init(dev);
}
@@ -949,6 +1274,39 @@ static const struct net_device_ops gre_tap_netdev_ops = {
.ndo_fill_metadata_dst = gre_fill_metadata_dst,
};
+static int erspan_tunnel_init(struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int t_hlen;
+
+ tunnel->tun_hlen = 8;
+ tunnel->parms.iph.protocol = IPPROTO_GRE;
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
+ sizeof(struct erspanhdr);
+ t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
+ dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
+ dev->mtu = ETH_DATA_LEN - t_hlen - 4;
+ dev->features |= GRE_FEATURES;
+ dev->hw_features |= GRE_FEATURES;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netif_keep_dst(dev);
+
+ return ip_tunnel_init(dev);
+}
+
+static const struct net_device_ops erspan_netdev_ops = {
+ .ndo_init = erspan_tunnel_init,
+ .ndo_uninit = ip_tunnel_uninit,
+ .ndo_start_xmit = erspan_xmit,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = ip_tunnel_change_mtu,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_get_iflink = ip_tunnel_get_iflink,
+ .ndo_fill_metadata_dst = gre_fill_metadata_dst,
+};
+
static void ipgre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
@@ -986,9 +1344,9 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
__u32 fwmark = t->fwmark;
+ struct ip_tunnel_parm p;
int err;
if (ipgre_netlink_encap_parms(data, &ipencap)) {
@@ -1001,7 +1359,18 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
if (err < 0)
return err;
- return ip_tunnel_changelink(dev, tb, &p, fwmark);
+
+ err = ip_tunnel_changelink(dev, tb, &p, fwmark);
+ if (err < 0)
+ return err;
+
+ t->parms.i_flags = p.i_flags;
+ t->parms.o_flags = p.o_flags;
+
+ if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
+ ipgre_link_update(dev, !tb[IFLA_MTU]);
+
+ return 0;
}
static size_t ipgre_get_size(const struct net_device *dev)
@@ -1041,6 +1410,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(1) +
/* IFLA_GRE_FWMARK */
nla_total_size(4) +
+ /* IFLA_GRE_ERSPAN_INDEX */
+ nla_total_size(4) +
0;
}
@@ -1083,12 +1454,25 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
goto nla_put_failure;
}
+ if (t->index)
+ if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
return -EMSGSIZE;
}
+static void erspan_setup(struct net_device *dev)
+{
+ ether_setup(dev);
+ dev->netdev_ops = &erspan_netdev_ops;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ ip_tunnel_setup(dev, erspan_net_id);
+}
+
static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_LINK] = { .type = NLA_U32 },
[IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
@@ -1107,6 +1491,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
[IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
[IFLA_GRE_FWMARK] = { .type = NLA_U32 },
+ [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
};
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
@@ -1139,6 +1524,21 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
.get_link_net = ip_tunnel_get_link_net,
};
+static struct rtnl_link_ops erspan_link_ops __read_mostly = {
+ .kind = "erspan",
+ .maxtype = IFLA_GRE_MAX,
+ .policy = ipgre_policy,
+ .priv_size = sizeof(struct ip_tunnel),
+ .setup = erspan_setup,
+ .validate = erspan_validate,
+ .newlink = ipgre_newlink,
+ .changelink = ipgre_changelink,
+ .dellink = ip_tunnel_dellink,
+ .get_size = ipgre_get_size,
+ .fill_info = ipgre_fill_info,
+ .get_link_net = ip_tunnel_get_link_net,
+};
+
struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
u8 name_assign_type)
{
@@ -1189,19 +1589,36 @@ static int __net_init ipgre_tap_init_net(struct net *net)
return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
}
-static void __net_exit ipgre_tap_exit_net(struct net *net)
+static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
{
- struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
- ip_tunnel_delete_net(itn, &ipgre_tap_ops);
+ ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
}
static struct pernet_operations ipgre_tap_net_ops = {
.init = ipgre_tap_init_net,
- .exit = ipgre_tap_exit_net,
+ .exit_batch = ipgre_tap_exit_batch_net,
.id = &gre_tap_net_id,
.size = sizeof(struct ip_tunnel_net),
};
+static int __net_init erspan_init_net(struct net *net)
+{
+ return ip_tunnel_init_net(net, erspan_net_id,
+ &erspan_link_ops, "erspan0");
+}
+
+static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
+{
+ ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
+}
+
+static struct pernet_operations erspan_net_ops = {
+ .init = erspan_init_net,
+ .exit_batch = erspan_exit_batch_net,
+ .id = &erspan_net_id,
+ .size = sizeof(struct ip_tunnel_net),
+};
+
static int __init ipgre_init(void)
{
int err;
@@ -1214,7 +1631,11 @@ static int __init ipgre_init(void)
err = register_pernet_device(&ipgre_tap_net_ops);
if (err < 0)
- goto pnet_tap_faied;
+ goto pnet_tap_failed;
+
+ err = register_pernet_device(&erspan_net_ops);
+ if (err < 0)
+ goto pnet_erspan_failed;
err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
if (err < 0) {
@@ -1230,15 +1651,23 @@ static int __init ipgre_init(void)
if (err < 0)
goto tap_ops_failed;
+ err = rtnl_link_register(&erspan_link_ops);
+ if (err < 0)
+ goto erspan_link_failed;
+
return 0;
+erspan_link_failed:
+ rtnl_link_unregister(&ipgre_tap_ops);
tap_ops_failed:
rtnl_link_unregister(&ipgre_link_ops);
rtnl_link_failed:
gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
add_proto_failed:
+ unregister_pernet_device(&erspan_net_ops);
+pnet_erspan_failed:
unregister_pernet_device(&ipgre_tap_net_ops);
-pnet_tap_faied:
+pnet_tap_failed:
unregister_pernet_device(&ipgre_net_ops);
return err;
}
@@ -1247,9 +1676,11 @@ static void __exit ipgre_fini(void)
{
rtnl_link_unregister(&ipgre_tap_ops);
rtnl_link_unregister(&ipgre_link_ops);
+ rtnl_link_unregister(&erspan_link_ops);
gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
unregister_pernet_device(&ipgre_tap_net_ops);
unregister_pernet_device(&ipgre_net_ops);
+ unregister_pernet_device(&erspan_net_ops);
}
module_init(ipgre_init);
@@ -1257,5 +1688,7 @@ module_exit(ipgre_fini);
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("gre");
MODULE_ALIAS_RTNL_LINK("gretap");
+MODULE_ALIAS_RTNL_LINK("erspan");
MODULE_ALIAS_NETDEV("gre0");
MODULE_ALIAS_NETDEV("gretap0");
+MODULE_ALIAS_NETDEV("erspan0");
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index fa2dc8f692c6..57fc13c6ab2b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -311,9 +311,10 @@ drop:
static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
- struct rtable *rt;
+ int (*edemux)(struct sk_buff *skb);
struct net_device *dev = skb->dev;
- void (*edemux)(struct sk_buff *skb);
+ struct rtable *rt;
+ int err;
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -331,7 +332,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
- edemux(skb);
+ err = edemux(skb);
+ if (unlikely(err))
+ goto drop_error;
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
}
@@ -342,13 +345,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
* how the packet travels inside Linux networking.
*/
if (!skb_valid_dst(skb)) {
- int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
- iph->tos, dev);
- if (unlikely(err)) {
- if (err == -EXDEV)
- __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
- goto drop;
- }
+ err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ iph->tos, dev);
+ if (unlikely(err))
+ goto drop_error;
}
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -399,6 +399,11 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
drop:
kfree_skb(skb);
return NET_RX_DROP;
+
+drop_error:
+ if (err == -EXDEV)
+ __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
+ goto drop;
}
/*
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 93157f2f4758..ed194d46c00e 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -86,8 +87,8 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
* NOTE: dopt cannot point to skb.
*/
-int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
- const struct ip_options *sopt)
+int __ip_options_echo(struct net *net, struct ip_options *dopt,
+ struct sk_buff *skb, const struct ip_options *sopt)
{
unsigned char *sptr, *dptr;
int soffset, doffset;
@@ -140,7 +141,7 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
__be32 addr;
memcpy(&addr, dptr+soffset-1, 4);
- if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) {
+ if (inet_addr_type(net, addr) != RTN_UNICAST) {
dopt->ts_needtime = 1;
soffset += 8;
}
@@ -174,9 +175,6 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
doffset -= 4;
}
if (doffset > 3) {
- __be32 daddr = fib_compute_spec_dst(skb);
-
- memcpy(&start[doffset-1], &daddr, 4);
dopt->faddr = faddr;
dptr[0] = start[0];
dptr[1] = doffset+3;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e153c40c2436..e8e675be60ec 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -853,61 +853,6 @@ csum_page(struct page *page, int offset, int copy)
return csum;
}
-static inline int ip_ufo_append_data(struct sock *sk,
- struct sk_buff_head *queue,
- int getfrag(void *from, char *to, int offset, int len,
- int odd, struct sk_buff *skb),
- void *from, int length, int hh_len, int fragheaderlen,
- int transhdrlen, int maxfraglen, unsigned int flags)
-{
- struct sk_buff *skb;
- int err;
-
- /* There is support for UDP fragmentation offload by network
- * device, so create one single skb packet containing complete
- * udp datagram
- */
- skb = skb_peek_tail(queue);
- if (!skb) {
- skb = sock_alloc_send_skb(sk,
- hh_len + fragheaderlen + transhdrlen + 20,
- (flags & MSG_DONTWAIT), &err);
-
- if (!skb)
- return err;
-
- /* reserve space for Hardware header */
- skb_reserve(skb, hh_len);
-
- /* create space for UDP/IP header */
- skb_put(skb, fragheaderlen + transhdrlen);
-
- /* initialize network header pointer */
- skb_reset_network_header(skb);
-
- /* initialize protocol header pointer */
- skb->transport_header = skb->network_header + fragheaderlen;
-
- skb->csum = 0;
-
- if (flags & MSG_CONFIRM)
- skb_set_dst_pending_confirm(skb, 1);
-
- __skb_queue_tail(queue, skb);
- } else if (skb_is_gso(skb)) {
- goto append;
- }
-
- skb->ip_summed = CHECKSUM_PARTIAL;
- /* specify the length of each IP datagram fragment */
- skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
- skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-
-append:
- return skb_append_datato_frags(sk, skb, getfrag, from,
- (length - transhdrlen));
-}
-
static int __ip_append_data(struct sock *sk,
struct flowi4 *fl4,
struct sk_buff_head *queue,
@@ -965,19 +910,6 @@ static int __ip_append_data(struct sock *sk,
csummode = CHECKSUM_PARTIAL;
cork->length += length;
- if ((skb && skb_is_gso(skb)) ||
- (((length + (skb ? skb->len : fragheaderlen)) > mtu) &&
- (skb_queue_len(queue) <= 1) &&
- (sk->sk_protocol == IPPROTO_UDP) &&
- (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
- (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) {
- err = ip_ufo_append_data(sk, queue, getfrag, from, length,
- hh_len, fragheaderlen, transhdrlen,
- maxfraglen, flags);
- if (err)
- goto error;
- return 0;
- }
/* So, what's going on in the loop below?
*
@@ -1288,28 +1220,14 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
if (!skb)
return -EINVAL;
- if ((size + skb->len > mtu) &&
- (skb_queue_len(&sk->sk_write_queue) == 1) &&
- (sk->sk_protocol == IPPROTO_UDP) &&
- (rt->dst.dev->features & NETIF_F_UFO)) {
- if (skb->ip_summed != CHECKSUM_PARTIAL)
- return -EOPNOTSUPP;
-
- skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
- skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
- }
cork->length += size;
while (size > 0) {
- if (skb_is_gso(skb)) {
- len = size;
- } else {
+ /* Check if the remaining data fits into current packet. */
+ len = mtu - skb->len;
+ if (len < size)
+ len = maxfraglen - skb->len;
- /* Check if the remaining data fits into current packet. */
- len = mtu - skb->len;
- if (len < size)
- len = maxfraglen - skb->len;
- }
if (len <= 0) {
struct sk_buff *skb_prev;
int alloclen;
@@ -1603,7 +1521,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
int err;
int oif;
- if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
+ if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt))
return;
ipc.addr = daddr;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ecc4b4a2413e..60fb1eb7d7d8 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -80,7 +81,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
}
-static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
+static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg,
+ struct sk_buff *skb)
{
unsigned char optbuf[sizeof(struct ip_options) + 40];
struct ip_options *opt = (struct ip_options *)optbuf;
@@ -88,7 +90,7 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
if (IPCB(skb)->opt.optlen == 0)
return;
- if (ip_options_echo(opt, skb)) {
+ if (ip_options_echo(net, opt, skb)) {
msg->msg_flags |= MSG_CTRUNC;
return;
}
@@ -204,7 +206,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
}
if (flags & IP_CMSG_RETOPTS) {
- ip_cmsg_recv_retopts(msg, skb);
+ ip_cmsg_recv_retopts(sock_net(sk), msg, skb);
flags &= ~IP_CMSG_RETOPTS;
if (!flags)
@@ -1219,22 +1221,20 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
* (e.g., process binds socket to eth0 for Tx which is
* redirected to loopback in the rtable/dst).
*/
+ struct rtable *rt = skb_rtable(skb);
+ bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
+
if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
pktinfo->ipi_ifindex = inet_iif(skb);
+ else if (l3slave && rt && rt->rt_iif)
+ pktinfo->ipi_ifindex = rt->rt_iif;
pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
} else {
pktinfo->ipi_ifindex = 0;
pktinfo->ipi_spec_dst.s_addr = 0;
}
- /* We need to keep the dst for __ip_options_echo()
- * We could restrict the test to opt.ts_needtime || opt.srr,
- * but the following is good enough as IP options are not often used.
- */
- if (unlikely(IPCB(skb)->opt.optlen))
- skb_dst_force(skb);
- else
- skb_dst_drop(skb);
+ skb_dst_drop(skb);
}
int ip_setsockopt(struct sock *sk, int level,
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 129d1a3616f8..fe6fee728ce4 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -176,7 +176,7 @@ skip_key_lookup:
return cand;
t = rcu_dereference(itn->collect_md_tun);
- if (t)
+ if (t && t->dev->flags & IFF_UP)
return t;
if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
@@ -618,8 +618,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
ip_rt_put(rt);
goto tx_dropped;
}
- iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos,
- key->ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+ iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
+ df, !net_eq(tunnel->net, dev_net(dev)));
return;
tx_error:
dev->stats.tx_errors++;
@@ -1061,16 +1061,22 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
}
}
-void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
+void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
+ struct rtnl_link_ops *ops)
{
+ struct ip_tunnel_net *itn;
+ struct net *net;
LIST_HEAD(list);
rtnl_lock();
- ip_tunnel_destroy(itn, &list, ops);
+ list_for_each_entry(net, net_list, exit_list) {
+ itn = net_generic(net, id);
+ ip_tunnel_destroy(itn, &list, ops);
+ }
unregister_netdevice_many(&list);
rtnl_unlock();
}
-EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
+EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p, __u32 fwmark)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 0192c255e508..949f432a5f04 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -168,6 +168,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
struct ip_tunnel_parm *parms = &tunnel->parms;
struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev; /* Device to other host */
+ int pkt_len = skb->len;
int err;
int mtu;
@@ -197,15 +198,6 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- if (tunnel->err_count > 0) {
- if (time_before(jiffies,
- tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
- tunnel->err_count--;
- dst_link_failure(skb);
- } else
- tunnel->err_count = 0;
- }
-
mtu = dst_mtu(dst);
if (skb->len > mtu) {
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
@@ -229,7 +221,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
err = dst_output(tunnel->net, skb->sk, skb);
if (net_xmit_eval(err) == 0)
- err = skb->len;
+ err = pkt_len;
iptunnel_xmit_stats(dev, err);
return NETDEV_TX_OK;
@@ -452,15 +444,14 @@ static int __net_init vti_init_net(struct net *net)
return 0;
}
-static void __net_exit vti_exit_net(struct net *net)
+static void __net_exit vti_exit_batch_net(struct list_head *list_net)
{
- struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
- ip_tunnel_delete_net(itn, &vti_link_ops);
+ ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops);
}
static struct pernet_operations vti_net_ops = {
.init = vti_init_net,
- .exit = vti_exit_net,
+ .exit_batch = vti_exit_batch_net,
.id = &vti_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -584,33 +575,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
.get_link_net = ip_tunnel_get_link_net,
};
-static bool is_vti_tunnel(const struct net_device *dev)
-{
- return dev->netdev_ops == &vti_netdev_ops;
-}
-
-static int vti_device_event(struct notifier_block *unused,
- unsigned long event, void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct ip_tunnel *tunnel = netdev_priv(dev);
-
- if (!is_vti_tunnel(dev))
- return NOTIFY_DONE;
-
- switch (event) {
- case NETDEV_DOWN:
- if (!net_eq(tunnel->net, dev_net(dev)))
- xfrm_garbage_collect(tunnel->net);
- break;
- }
- return NOTIFY_DONE;
-}
-
-static struct notifier_block vti_notifier_block __read_mostly = {
- .notifier_call = vti_device_event,
-};
-
static int __init vti_init(void)
{
const char *msg;
@@ -618,8 +582,6 @@ static int __init vti_init(void)
pr_info("IPv4 over IPsec tunneling driver\n");
- register_netdevice_notifier(&vti_notifier_block);
-
msg = "tunnel device";
err = register_pernet_device(&vti_net_ops);
if (err < 0)
@@ -652,7 +614,6 @@ xfrm_proto_ah_failed:
xfrm_proto_esp_failed:
unregister_pernet_device(&vti_net_ops);
pernet_dev_failed:
- unregister_netdevice_notifier(&vti_notifier_block);
pr_err("vti init: failed to register %s\n", msg);
return err;
}
@@ -664,7 +625,6 @@ static void __exit vti_fini(void)
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti_net_ops);
- unregister_netdevice_notifier(&vti_notifier_block);
}
module_init(vti_init);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 4c5dfe6bd34d..abdebca848c9 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or
* user-supplied information to configure own IP address and routes.
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index fb1ad22b5e29..c891235b4966 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly;
static int ipip_err(struct sk_buff *skb, u32 info)
{
-
-/* All the routers (except for Linux) return only
- 8 bytes of packet payload. It means, that precise relaying of
- ICMP in the real Internet is absolutely infeasible.
- */
+ /* All the routers (except for Linux) return only
+ * 8 bytes of packet payload. It means, that precise relaying of
+ * ICMP in the real Internet is absolutely infeasible.
+ */
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
const struct iphdr *iph = (const struct iphdr *)skb->data;
- struct ip_tunnel *t;
- int err;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
+ struct ip_tunnel *t;
+ int err = 0;
+
+ switch (type) {
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ /* Impossible event. */
+ goto out;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ * rfc2003 contains "deep thoughts" about NET_UNREACH,
+ * I believe they are just ether pollution. --ANK
+ */
+ break;
+ }
+ break;
+
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ goto out;
+ break;
+
+ case ICMP_REDIRECT:
+ break;
+
+ default:
+ goto out;
+ }
- err = -ENOENT;
t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->daddr, iph->saddr, 0);
- if (!t)
+ if (!t) {
+ err = -ENOENT;
goto out;
+ }
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, iph->protocol, 0);
- err = 0;
+ ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
+ iph->protocol, 0);
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- iph->protocol, 0);
- err = 0;
+ ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
goto out;
}
- if (t->parms.iph.daddr == 0)
+ if (t->parms.iph.daddr == 0) {
+ err = -ENOENT;
goto out;
+ }
- err = 0;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
goto out;
@@ -634,15 +659,14 @@ static int __net_init ipip_init_net(struct net *net)
return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
}
-static void __net_exit ipip_exit_net(struct net *net)
+static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
{
- struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
- ip_tunnel_delete_net(itn, &ipip_link_ops);
+ ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
}
static struct pernet_operations ipip_net_ops = {
.init = ipip_init_net,
- .exit = ipip_exit_net,
+ .exit_batch = ipip_exit_batch_net,
.id = &ipip_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 06863ea3fc5b..40a43ad294cb 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -67,6 +67,7 @@
#include <net/fib_rules.h>
#include <linux/netconf.h>
#include <net/nexthop.h>
+#include <net/switchdev.h>
struct ipmr_rule {
struct fib_rule common;
@@ -264,6 +265,22 @@ static void __net_exit ipmr_rules_exit(struct net *net)
fib_rules_unregister(net->ipv4.mr_rules_ops);
rtnl_unlock();
}
+
+static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
+}
+
+static unsigned int ipmr_rules_seq_read(struct net *net)
+{
+ return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
+}
+
+bool ipmr_rule_default(const struct fib_rule *rule)
+{
+ return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
+}
+EXPORT_SYMBOL(ipmr_rule_default);
#else
#define ipmr_for_each_table(mrt, net) \
for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
@@ -298,6 +315,22 @@ static void __net_exit ipmr_rules_exit(struct net *net)
net->ipv4.mrt = NULL;
rtnl_unlock();
}
+
+static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return 0;
+}
+
+static unsigned int ipmr_rules_seq_read(struct net *net)
+{
+ return 0;
+}
+
+bool ipmr_rule_default(const struct fib_rule *rule)
+{
+ return true;
+}
+EXPORT_SYMBOL(ipmr_rule_default);
#endif
static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -587,6 +620,82 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
}
#endif
+static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
+ struct net *net,
+ enum fib_event_type event_type,
+ struct vif_device *vif,
+ vifi_t vif_index, u32 tb_id)
+{
+ struct vif_entry_notifier_info info = {
+ .info = {
+ .family = RTNL_FAMILY_IPMR,
+ .net = net,
+ },
+ .dev = vif->dev,
+ .vif_index = vif_index,
+ .vif_flags = vif->flags,
+ .tb_id = tb_id,
+ };
+
+ return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_ipmr_vif_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct vif_device *vif,
+ vifi_t vif_index, u32 tb_id)
+{
+ struct vif_entry_notifier_info info = {
+ .info = {
+ .family = RTNL_FAMILY_IPMR,
+ .net = net,
+ },
+ .dev = vif->dev,
+ .vif_index = vif_index,
+ .vif_flags = vif->flags,
+ .tb_id = tb_id,
+ };
+
+ ASSERT_RTNL();
+ net->ipv4.ipmr_seq++;
+ return call_fib_notifiers(net, event_type, &info.info);
+}
+
+static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
+ struct net *net,
+ enum fib_event_type event_type,
+ struct mfc_cache *mfc, u32 tb_id)
+{
+ struct mfc_entry_notifier_info info = {
+ .info = {
+ .family = RTNL_FAMILY_IPMR,
+ .net = net,
+ },
+ .mfc = mfc,
+ .tb_id = tb_id
+ };
+
+ return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_ipmr_mfc_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct mfc_cache *mfc, u32 tb_id)
+{
+ struct mfc_entry_notifier_info info = {
+ .info = {
+ .family = RTNL_FAMILY_IPMR,
+ .net = net,
+ },
+ .mfc = mfc,
+ .tb_id = tb_id
+ };
+
+ ASSERT_RTNL();
+ net->ipv4.ipmr_seq++;
+ return call_fib_notifiers(net, event_type, &info.info);
+}
+
/**
* vif_delete - Delete a VIF entry
* @notify: Set to 1, if the caller is a notifier_call
@@ -594,6 +703,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
static int vif_delete(struct mr_table *mrt, int vifi, int notify,
struct list_head *head)
{
+ struct net *net = read_pnet(&mrt->net);
struct vif_device *v;
struct net_device *dev;
struct in_device *in_dev;
@@ -603,6 +713,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
v = &mrt->vif_table[vifi];
+ if (VIF_EXISTS(mrt, vifi))
+ call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
+ mrt->id);
+
write_lock_bh(&mrt_lock);
dev = v->dev;
v->dev = NULL;
@@ -652,10 +766,11 @@ static void ipmr_cache_free_rcu(struct rcu_head *head)
kmem_cache_free(mrt_cachep, c);
}
-static inline void ipmr_cache_free(struct mfc_cache *c)
+void ipmr_cache_free(struct mfc_cache *c)
{
call_rcu(&c->rcu, ipmr_cache_free_rcu);
}
+EXPORT_SYMBOL(ipmr_cache_free);
/* Destroy an unresolved cache entry, killing queued skbs
* and reporting error to netlink readers.
@@ -754,6 +869,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
struct vifctl *vifc, int mrtsock)
{
int vifi = vifc->vifc_vifi;
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
+ };
struct vif_device *v = &mrt->vif_table[vifi];
struct net_device *dev;
struct in_device *in_dev;
@@ -828,6 +946,13 @@ static int vif_add(struct net *net, struct mr_table *mrt,
/* Fill in the VIF structures */
+ attr.orig_dev = dev;
+ if (!switchdev_port_attr_get(dev, &attr)) {
+ memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len);
+ v->dev_parent_id.id_len = attr.u.ppid.id_len;
+ } else {
+ v->dev_parent_id.id_len = 0;
+ }
v->rate_limit = vifc->vifc_rate_limit;
v->local = vifc->vifc_lcl_addr.s_addr;
v->remote = vifc->vifc_rmt_addr.s_addr;
@@ -851,6 +976,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
if (vifi+1 > mrt->maxvif)
mrt->maxvif = vifi+1;
write_unlock_bh(&mrt_lock);
+ call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
return 0;
}
@@ -949,6 +1075,7 @@ static struct mfc_cache *ipmr_cache_alloc(void)
if (c) {
c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
c->mfc_un.res.minvif = MAXVIFS;
+ refcount_set(&c->mfc_un.res.refcount, 1);
}
return c;
}
@@ -1150,6 +1277,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
{
+ struct net *net = read_pnet(&mrt->net);
struct mfc_cache *c;
/* The entries are added/deleted only under RTNL */
@@ -1161,8 +1289,9 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
return -ENOENT;
rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
list_del_rcu(&c->list);
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_free(c);
+ ipmr_cache_put(c);
return 0;
}
@@ -1189,6 +1318,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
if (!mrtsock)
c->mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
+ mrt->id);
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1238,6 +1369,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
ipmr_cache_resolve(net, mrt, uc, c);
ipmr_cache_free(uc);
}
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1245,6 +1377,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
/* Close the multicast socket, and clear the vif tables etc */
static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
+ struct net *net = read_pnet(&mrt->net);
struct mfc_cache *c, *tmp;
LIST_HEAD(list);
int i;
@@ -1263,8 +1396,10 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
continue;
rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
list_del_rcu(&c->list);
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+ mrt->id);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_free(c);
+ ipmr_cache_put(c);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
@@ -1393,6 +1528,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
case MRT_ADD_MFC:
case MRT_DEL_MFC:
parent = -1;
+ /* fall through */
case MRT_ADD_MFC_PROXY:
case MRT_DEL_MFC_PROXY:
if (optlen != sizeof(mfc)) {
@@ -1724,10 +1860,33 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
return dst_output(net, sk, skb);
}
+#ifdef CONFIG_NET_SWITCHDEV
+static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
+ int in_vifi, int out_vifi)
+{
+ struct vif_device *out_vif = &mrt->vif_table[out_vifi];
+ struct vif_device *in_vif = &mrt->vif_table[in_vifi];
+
+ if (!skb->offload_mr_fwd_mark)
+ return false;
+ if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
+ return false;
+ return netdev_phys_item_id_same(&out_vif->dev_parent_id,
+ &in_vif->dev_parent_id);
+}
+#else
+static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
+ int in_vifi, int out_vifi)
+{
+ return false;
+}
+#endif
+
/* Processing handlers for ipmr_forward */
static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *c, int vifi)
+ int in_vifi, struct sk_buff *skb,
+ struct mfc_cache *c, int vifi)
{
const struct iphdr *iph = ip_hdr(skb);
struct vif_device *vif = &mrt->vif_table[vifi];
@@ -1748,6 +1907,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
goto out_free;
}
+ if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi))
+ goto out_free;
+
if (vif->flags & VIFF_TUNNEL) {
rt = ip_route_output_ports(net, &fl4, NULL,
vif->remote, vif->local,
@@ -1925,8 +2087,8 @@ forward:
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ipmr_queue_xmit(net, mrt, skb2, cache,
- psend);
+ ipmr_queue_xmit(net, mrt, true_vifi,
+ skb2, cache, psend);
}
psend = ct;
}
@@ -1937,9 +2099,10 @@ last_forward:
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ipmr_queue_xmit(net, mrt, skb2, cache, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb2,
+ cache, psend);
} else {
- ipmr_queue_xmit(net, mrt, skb, cache, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
return;
}
}
@@ -2156,6 +2319,9 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
return -EMSGSIZE;
+ if (c->mfc_flags & MFC_OFFLOAD)
+ rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
return -EMSGSIZE;
@@ -3048,14 +3214,87 @@ static const struct net_protocol pim_protocol = {
};
#endif
+static unsigned int ipmr_seq_read(struct net *net)
+{
+ ASSERT_RTNL();
+
+ return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
+}
+
+static int ipmr_dump(struct net *net, struct notifier_block *nb)
+{
+ struct mr_table *mrt;
+ int err;
+
+ err = ipmr_rules_dump(net, nb);
+ if (err)
+ return err;
+
+ ipmr_for_each_table(mrt, net) {
+ struct vif_device *v = &mrt->vif_table[0];
+ struct mfc_cache *mfc;
+ int vifi;
+
+ /* Notifiy on table VIF entries */
+ read_lock(&mrt_lock);
+ for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
+ if (!v->dev)
+ continue;
+
+ call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
+ v, vifi, mrt->id);
+ }
+ read_unlock(&mrt_lock);
+
+ /* Notify on table MFC entries */
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+ call_ipmr_mfc_entry_notifier(nb, net,
+ FIB_EVENT_ENTRY_ADD, mfc,
+ mrt->id);
+ }
+
+ return 0;
+}
+
+static const struct fib_notifier_ops ipmr_notifier_ops_template = {
+ .family = RTNL_FAMILY_IPMR,
+ .fib_seq_read = ipmr_seq_read,
+ .fib_dump = ipmr_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __net_init ipmr_notifier_init(struct net *net)
+{
+ struct fib_notifier_ops *ops;
+
+ net->ipv4.ipmr_seq = 0;
+
+ ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+ net->ipv4.ipmr_notifier_ops = ops;
+
+ return 0;
+}
+
+static void __net_exit ipmr_notifier_exit(struct net *net)
+{
+ fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
+ net->ipv4.ipmr_notifier_ops = NULL;
+}
+
/* Setup for IP multicast routing */
static int __net_init ipmr_net_init(struct net *net)
{
int err;
+ err = ipmr_notifier_init(net);
+ if (err)
+ goto ipmr_notifier_fail;
+
err = ipmr_rules_init(net);
if (err < 0)
- goto fail;
+ goto ipmr_rules_fail;
#ifdef CONFIG_PROC_FS
err = -ENOMEM;
@@ -3072,7 +3311,9 @@ proc_cache_fail:
proc_vif_fail:
ipmr_rules_exit(net);
#endif
-fail:
+ipmr_rules_fail:
+ ipmr_notifier_exit(net);
+ipmr_notifier_fail:
return err;
}
@@ -3082,6 +3323,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
remove_proc_entry("ip_mr_cache", net->proc_net);
remove_proc_entry("ip_mr_vif", net->proc_net);
#endif
+ ipmr_notifier_exit(net);
ipmr_rules_exit(net);
}
@@ -3114,14 +3356,14 @@ int __init ip_mr_init(void)
}
#endif
rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
- ipmr_rtm_getroute, ipmr_rtm_dumproute, NULL);
+ ipmr_rtm_getroute, ipmr_rtm_dumproute, 0);
rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
- ipmr_rtm_route, NULL, NULL);
+ ipmr_rtm_route, NULL, 0);
rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
- ipmr_rtm_route, NULL, NULL);
+ ipmr_rtm_route, NULL, 0);
rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK,
- NULL, ipmr_rtm_dumplink, NULL);
+ NULL, ipmr_rtm_dumplink, 0);
return 0;
#ifdef CONFIG_IP_PIMSM_V2
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index f462fee66ac8..adcdae358365 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the netfilter modules on top of IPv4.
#
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 0bc3c3d73e61..f88221aebc9d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -268,14 +268,14 @@ unsigned int arpt_do_table(struct sk_buff *skb,
acpar.targinfo = t->data;
verdict = t->u.kernel.target->target(skb, &acpar);
- /* Target might have changed stuff. */
- arp = arp_hdr(skb);
-
- if (verdict == XT_CONTINUE)
+ if (verdict == XT_CONTINUE) {
+ /* Target might have changed stuff. */
+ arp = arp_hdr(skb);
e = arpt_next_entry(e);
- else
+ } else {
/* Verdict */
break;
+ }
} while (!acpar.hotdrop);
xt_write_recseq_end(addend);
local_bh_enable();
@@ -629,7 +629,27 @@ static void get_counters(const struct xt_table_info *t,
ADD_COUNTER(counters[i], bcnt, pcnt);
++i;
+ cond_resched();
+ }
+ }
+}
+
+static void get_old_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
+{
+ struct arpt_entry *iter;
+ unsigned int cpu, i;
+
+ for_each_possible_cpu(cpu) {
+ i = 0;
+ xt_entry_foreach(iter, t->entries, t->size) {
+ struct xt_counters *tmp;
+
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
+ ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
+ ++i;
}
+ cond_resched();
}
}
@@ -909,8 +929,7 @@ static int __do_replace(struct net *net, const char *name,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters, and synchronize with replace */
- get_counters(oldinfo, counters);
+ get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries;
@@ -1117,7 +1136,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
- struct xt_target *target;
struct arpt_entry *de;
unsigned int origsize;
int h;
@@ -1132,7 +1150,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
de->target_offset = e->target_offset - (origsize - *size);
t = compat_arpt_get_target(e);
- target = t->u.kernel.target;
xt_compat_target_from_user(t, dstptr, size);
de->next_offset = e->next_offset - (origsize - *size);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2a55a40211cb..4cbe5e80f3bf 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -35,12 +35,6 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("IPv4 packet filter");
-#ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x) WARN_ON(!(x))
-#else
-#define IP_NF_ASSERT(x)
-#endif
-
void *ipt_alloc_initial_table(const struct xt_table *info)
{
return xt_alloc_initial_table(ipt, IPT);
@@ -151,7 +145,7 @@ static const char *const comments[] = {
[NF_IP_TRACE_COMMENT_POLICY] = "policy",
};
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
@@ -263,7 +257,7 @@ ipt_do_table(struct sk_buff *skb,
acpar.hotdrop = false;
acpar.state = state;
- IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+ WARN_ON(!(table->valid_hooks & (1 << hook)));
local_bh_disable();
addend = xt_write_recseq_begin();
private = table->private;
@@ -293,7 +287,7 @@ ipt_do_table(struct sk_buff *skb,
const struct xt_entry_match *ematch;
struct xt_counters *counter;
- IP_NF_ASSERT(e);
+ WARN_ON(!e);
if (!ip_packet_match(ip, indev, outdev,
&e->ip, acpar.fragoff)) {
no_match:
@@ -312,7 +306,7 @@ ipt_do_table(struct sk_buff *skb,
ADD_COUNTER(*counter, skb->len, 1);
t = ipt_get_target(e);
- IP_NF_ASSERT(t->u.kernel.target);
+ WARN_ON(!t->u.kernel.target);
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
/* The packet is traced: log it */
@@ -352,13 +346,14 @@ ipt_do_table(struct sk_buff *skb,
acpar.targinfo = t->data;
verdict = t->u.kernel.target->target(skb, &acpar);
- /* Target might have changed stuff. */
- ip = ip_hdr(skb);
- if (verdict == XT_CONTINUE)
+ if (verdict == XT_CONTINUE) {
+ /* Target might have changed stuff. */
+ ip = ip_hdr(skb);
e = ipt_next_entry(e);
- else
+ } else {
/* Verdict */
break;
+ }
} while (!acpar.hotdrop);
xt_write_recseq_end(addend);
@@ -781,10 +776,31 @@ get_counters(const struct xt_table_info *t,
ADD_COUNTER(counters[i], bcnt, pcnt);
++i; /* macro does multi eval of i */
+ cond_resched();
}
}
}
+static void get_old_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
+{
+ struct ipt_entry *iter;
+ unsigned int cpu, i;
+
+ for_each_possible_cpu(cpu) {
+ i = 0;
+ xt_entry_foreach(iter, t->entries, t->size) {
+ const struct xt_counters *tmp;
+
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
+ ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
+ ++i; /* macro does multi eval of i */
+ }
+
+ cond_resched();
+ }
+}
+
static struct xt_counters *alloc_counters(const struct xt_table *table)
{
unsigned int countersize;
@@ -1074,8 +1090,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters, and synchronize with replace */
- get_counters(oldinfo, counters);
+ get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
@@ -1355,7 +1370,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
- struct xt_target *target;
struct ipt_entry *de;
unsigned int origsize;
int h;
@@ -1374,7 +1388,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
de->target_offset = e->target_offset - (origsize - *size);
t = compat_ipt_get_target(e);
- target = t->u.kernel.target;
xt_compat_target_from_user(t, dstptr, size);
de->next_offset = e->next_offset - (origsize - *size);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 7d72decb80f9..17b4ca562944 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -117,7 +117,8 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
* functions are also incrementing the refcount on their own,
* so it's safe to remove the entry even if it's in use. */
#ifdef CONFIG_PROC_FS
- proc_remove(c->pde);
+ if (cn->procdir)
+ proc_remove(c->pde);
#endif
return;
}
@@ -624,7 +625,7 @@ arp_mangle(void *priv,
return NF_ACCEPT;
}
-static struct nf_hook_ops cip_arp_ops __read_mostly = {
+static const struct nf_hook_ops cip_arp_ops = {
.hook = arp_mangle,
.pf = NFPROTO_ARP,
.hooknum = NF_ARP_OUT,
@@ -815,6 +816,7 @@ static void clusterip_net_exit(struct net *net)
#ifdef CONFIG_PROC_FS
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
proc_remove(cn->procdir);
+ cn->procdir = NULL;
#endif
nf_unregister_net_hook(net, &cip_arp_ops);
}
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index f1528f7175a8..f75fc6b53115 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -330,7 +330,8 @@ static unsigned int ipv4_synproxy_hook(void *priv,
if (synproxy == NULL)
return NF_ACCEPT;
- if (nf_is_loopback_packet(skb))
+ if (nf_is_loopback_packet(skb) ||
+ ip_hdr(skb)->protocol != IPPROTO_TCP)
return NF_ACCEPT;
thoff = ip_hdrlen(skb);
@@ -416,7 +417,7 @@ static unsigned int ipv4_synproxy_hook(void *priv,
return NF_ACCEPT;
}
-static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv4_synproxy_ops[] = {
{
.hook = ipv4_synproxy_hook,
.pf = NFPROTO_IPV4,
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 138a24bc76ad..a1a07b338ccf 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -67,7 +67,7 @@ static unsigned int iptable_nat_ipv4_local_fn(void *priv,
return nf_nat_ipv4_local_fn(priv, skb, state, iptable_nat_do_chain);
}
-static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
+static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
/* Before packet filtering, change destination */
{
.hook = iptable_nat_ipv4_in,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2e14ed11a35c..89af9d88ca21 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -63,13 +63,6 @@ static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-static void ipv4_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "src=%pI4 dst=%pI4 ",
- &tuple->src.u3.ip, &tuple->dst.u3.ip);
-}
-
static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
@@ -174,7 +167,7 @@ static unsigned int ipv4_conntrack_local(void *priv,
/* Connection tracking may drop packets, but never alters them, so
make it the first hook. */
-static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv4_conntrack_ops[] = {
{
.hook = ipv4_conntrack_in,
.pf = NFPROTO_IPV4,
@@ -303,11 +296,6 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
-
-static int ipv4_nlattr_tuple_size(void)
-{
- return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1);
-}
#endif
static struct nf_sockopt_ops so_getorigdst = {
@@ -356,18 +344,17 @@ static void ipv4_hooks_unregister(struct net *net)
mutex_unlock(&register_ipv4_hooks);
}
-struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
+const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
.l3proto = PF_INET,
- .name = "ipv4",
.pkt_to_tuple = ipv4_pkt_to_tuple,
.invert_tuple = ipv4_invert_tuple,
- .print_tuple = ipv4_print_tuple,
.get_l4proto = ipv4_get_l4proto,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv4_tuple_to_nlattr,
- .nlattr_tuple_size = ipv4_nlattr_tuple_size,
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
.nla_policy = ipv4_nla_policy,
+ .nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* CTA_IP_V4_SRC */
+ NLA_ALIGN(NLA_HDRLEN + sizeof(u32)), /* CTA_IP_V4_DST */
#endif
.net_ns_get = ipv4_hooks_register,
.net_ns_put = ipv4_hooks_unregister,
@@ -398,24 +385,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
static int ipv4_net_init(struct net *net)
{
- int ret = 0;
-
- ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
- ARRAY_SIZE(builtin_l4proto4));
- if (ret < 0)
- return ret;
- ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
- if (ret < 0) {
- pr_err("nf_conntrack_ipv4: pernet registration failed\n");
- nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
- ARRAY_SIZE(builtin_l4proto4));
- }
- return ret;
+ return nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
+ ARRAY_SIZE(builtin_l4proto4));
}
static void ipv4_net_exit(struct net *net)
{
- nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
ARRAY_SIZE(builtin_l4proto4));
}
@@ -433,6 +408,11 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
need_conntrack();
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ if (WARN_ON(nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1) !=
+ nf_conntrack_l3proto_ipv4.nla_size))
+ return -EINVAL;
+#endif
ret = nf_register_sockopt(&so_getorigdst);
if (ret < 0) {
pr_err("Unable to register netfilter socket option\n");
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 73c591d8a9a8..1849fedd9b81 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -71,16 +71,6 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void icmp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "type=%u code=%u id=%u ",
- tuple->dst.u.icmp.type,
- tuple->dst.u.icmp.code,
- ntohs(tuple->src.u.icmp.id));
-}
-
static unsigned int *icmp_get_timeouts(struct net *net)
{
return &icmp_pernet(net)->timeout;
@@ -91,8 +81,6 @@ static int icmp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeout)
{
/* Do not immediately delete the connection after the first
@@ -137,7 +125,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
- NF_CT_ASSERT(!skb_nfct(skb));
+ WARN_ON(skb_nfct(skb));
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
/* Are they talking about one of our connections? */
@@ -176,6 +164,12 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
return NF_ACCEPT;
}
+static void icmp_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
+}
+
/* Small and modified version of icmp_rcv */
static int
icmp_error(struct net *net, struct nf_conn *tmpl,
@@ -188,18 +182,14 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
/* Not enough header? */
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) {
- if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
- NULL, "nf_ct_icmp: short packet ");
+ icmp_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
/* See ip_conntrack_proto_tcp.c */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, dataoff, 0)) {
- if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmp: bad HW ICMP checksum ");
+ icmp_error_log(skb, net, pf, "bad hw icmp checksum");
return -NF_ACCEPT;
}
@@ -210,9 +200,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
* discarded.
*/
if (icmph->type > NR_ICMP_TYPES) {
- if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmp: invalid ICMP type ");
+ icmp_error_log(skb, net, pf, "invalid icmp type");
return -NF_ACCEPT;
}
@@ -270,9 +258,14 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
-static int icmp_nlattr_tuple_size(void)
+static unsigned int icmp_nlattr_tuple_size(void)
{
- return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+ static unsigned int size __read_mostly;
+
+ if (!size)
+ size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+
+ return size;
}
#endif
@@ -362,10 +355,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_ICMP,
- .name = "icmp",
.pkt_to_tuple = icmp_pkt_to_tuple,
.invert_tuple = icmp_invert_tuple,
- .print_tuple = icmp_print_tuple,
.packet = icmp_packet,
.get_timeouts = icmp_get_timeouts,
.new = icmp_new,
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 346bf7ccac08..37fe1616ca0b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -90,7 +90,7 @@ static unsigned int ipv4_conntrack_defrag(void *priv,
return NF_ACCEPT;
}
-static struct nf_hook_ops ipv4_defrag_ops[] = {
+static const struct nf_hook_ops ipv4_defrag_ops[] = {
{
.hook = ipv4_conntrack_defrag,
.pf = NFPROTO_IPV4,
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index 2f3895ddc275..df5c2a2061a4 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -25,7 +25,7 @@
#include <linux/netfilter/xt_LOG.h>
#include <net/netfilter/nf_log.h>
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index c83a9963269b..4388de0e5380 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -24,7 +24,7 @@
#include <linux/netfilter/xt_LOG.h>
#include <net/netfilter/nf_log.h>
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 574f7ebba0b6..ac8342dcb55e 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -252,16 +252,16 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
if (set_h245_addr(skb, protoff, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons((port & htons(1)) ? nated_port + 1 :
- nated_port)) == 0) {
- /* Save ports */
- info->rtp_port[i][dir] = rtp_port;
- info->rtp_port[i][!dir] = htons(nated_port);
- } else {
+ nated_port))) {
nf_ct_unexpect_related(rtp_exp);
nf_ct_unexpect_related(rtcp_exp);
return -1;
}
+ /* Save ports */
+ info->rtp_port[i][dir] = rtp_port;
+ info->rtp_port[i][!dir] = htons(nated_port);
+
/* Success */
pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n",
&rtp_exp->tuple.src.u3.ip,
@@ -370,15 +370,15 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
/* Modify signal */
if (set_h225_addr(skb, protoff, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
- htons(nated_port)) == 0) {
- /* Save ports */
- info->sig_port[dir] = port;
- info->sig_port[!dir] = htons(nated_port);
- } else {
+ htons(nated_port))) {
nf_ct_unexpect_related(exp);
return -1;
}
+ /* Save ports */
+ info->sig_port[dir] = port;
+ info->sig_port[!dir] = htons(nated_port);
+
pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n",
&exp->tuple.src.u3.ip,
ntohs(exp->tuple.src.u.tcp.port),
@@ -462,24 +462,27 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
/* Modify signal */
if (set_h225_addr(skb, protoff, data, 0, &taddr[idx],
&ct->tuplehash[!dir].tuple.dst.u3,
- htons(nated_port)) == 0) {
- /* Save ports */
- info->sig_port[dir] = port;
- info->sig_port[!dir] = htons(nated_port);
-
- /* Fix for Gnomemeeting */
- if (idx > 0 &&
- get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
- (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
- set_h225_addr(skb, protoff, data, 0, &taddr[0],
- &ct->tuplehash[!dir].tuple.dst.u3,
- info->sig_port[!dir]);
- }
- } else {
+ htons(nated_port))) {
nf_ct_unexpect_related(exp);
return -1;
}
+ /* Save ports */
+ info->sig_port[dir] = port;
+ info->sig_port[!dir] = htons(nated_port);
+
+ /* Fix for Gnomemeeting */
+ if (idx > 0 &&
+ get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
+ (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
+ if (set_h225_addr(skb, protoff, data, 0, &taddr[0],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ info->sig_port[!dir])) {
+ nf_ct_unexpect_related(exp);
+ return -1;
+ }
+ }
+
/* Success */
pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n",
&exp->tuple.src.u3.ip,
@@ -550,9 +553,9 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
}
/* Modify signal */
- if (!set_h225_addr(skb, protoff, data, dataoff, taddr,
- &ct->tuplehash[!dir].tuple.dst.u3,
- htons(nated_port)) == 0) {
+ if (set_h225_addr(skb, protoff, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port))) {
nf_ct_unexpect_related(exp);
return -1;
}
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index feedd759ca80..0443ca4120b0 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -190,7 +190,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
struct nf_conntrack_tuple target;
unsigned long statusbit;
- NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+ WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
return 0;
@@ -276,7 +276,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
else
return NF_ACCEPT;
}
- /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ /* Only ICMPs can be IP_CT_IS_REPLY: */
+ /* fall through */
case IP_CT_NEW:
/* Seen it before? This can happen for loopback, retrans,
* or local packets.
@@ -306,8 +307,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
default:
/* ESTABLISHED */
- NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
- ctinfo == IP_CT_ESTABLISHED_REPLY);
+ WARN_ON(ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED_REPLY);
if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
goto oif_changed;
}
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index f39037fca923..0c366aad89cb 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -34,12 +34,12 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
const struct rtable *rt;
__be32 newsrc, nh;
- NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
+ WARN_ON(hooknum != NF_INET_POST_ROUTING);
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY));
+ WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY)));
/* Source address is 0.0.0.0 - locally generated packet that is
* probably not supposed to be masqueraded.
@@ -96,7 +96,7 @@ static int masq_device_event(struct notifier_block *this,
* conntracks which were associated with that device,
* and forget them.
*/
- NF_CT_ASSERT(dev->ifindex != 0);
+ WARN_ON(dev->ifindex == 0);
nf_ct_iterate_cleanup_net(net, device_cmp,
(void *)(long)dev->ifindex, 0, 0);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index eeacbdaf7cdf..5cd06ba3535d 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -132,6 +132,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
goto free_nskb;
+ niph = ip_hdr(nskb);
+
/* "Never happens" */
if (nskb->len > dst_mtu(skb_dst(nskb)))
goto free_nskb;
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index de3681df2ce7..e50976e3c213 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -32,9 +32,10 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_fib *priv = nft_expr_priv(expr);
+ int noff = skb_network_offset(pkt->skb);
u32 *dst = &regs->data[priv->dreg];
const struct net_device *dev = NULL;
- const struct iphdr *iph;
+ struct iphdr *iph, _iph;
__be32 addr;
if (priv->flags & NFTA_FIB_F_IIF)
@@ -42,7 +43,12 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
else if (priv->flags & NFTA_FIB_F_OIF)
dev = nft_out(pkt);
- iph = ip_hdr(pkt->skb);
+ iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+ if (!iph) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
if (priv->flags & NFTA_FIB_F_DADDR)
addr = iph->daddr;
else
@@ -61,8 +67,9 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_fib *priv = nft_expr_priv(expr);
+ int noff = skb_network_offset(pkt->skb);
u32 *dest = &regs->data[priv->dreg];
- const struct iphdr *iph;
+ struct iphdr *iph, _iph;
struct fib_result res;
struct flowi4 fl4 = {
.flowi4_scope = RT_SCOPE_UNIVERSE,
@@ -95,7 +102,12 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
return;
}
- iph = ip_hdr(pkt->skb);
+ iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+ if (!iph) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
if (ipv4_is_zeronet(iph->saddr)) {
if (ipv4_is_lbcast(iph->daddr) ||
ipv4_is_local_multicast(iph->daddr)) {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 43eb6567b3a0..9f37c4727861 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -206,18 +206,12 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST),
SNMP_MIB_ITEM("ListenOverflows", LINUX_MIB_LISTENOVERFLOWS),
SNMP_MIB_ITEM("ListenDrops", LINUX_MIB_LISTENDROPS),
- SNMP_MIB_ITEM("TCPPrequeued", LINUX_MIB_TCPPREQUEUED),
- SNMP_MIB_ITEM("TCPDirectCopyFromBacklog", LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG),
- SNMP_MIB_ITEM("TCPDirectCopyFromPrequeue", LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE),
- SNMP_MIB_ITEM("TCPPrequeueDropped", LINUX_MIB_TCPPREQUEUEDROPPED),
SNMP_MIB_ITEM("TCPHPHits", LINUX_MIB_TCPHPHITS),
- SNMP_MIB_ITEM("TCPHPHitsToUser", LINUX_MIB_TCPHPHITSTOUSER),
SNMP_MIB_ITEM("TCPPureAcks", LINUX_MIB_TCPPUREACKS),
SNMP_MIB_ITEM("TCPHPAcks", LINUX_MIB_TCPHPACKS),
SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY),
SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY),
SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING),
- SNMP_MIB_ITEM("TCPFACKReorder", LINUX_MIB_TCPFACKREORDER),
SNMP_MIB_ITEM("TCPSACKReorder", LINUX_MIB_TCPSACKREORDER),
SNMP_MIB_ITEM("TCPRenoReorder", LINUX_MIB_TCPRENOREORDER),
SNMP_MIB_ITEM("TCPTSReorder", LINUX_MIB_TCPTSREORDER),
@@ -230,14 +224,12 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES),
SNMP_MIB_ITEM("TCPLossFailures", LINUX_MIB_TCPLOSSFAILURES),
SNMP_MIB_ITEM("TCPFastRetrans", LINUX_MIB_TCPFASTRETRANS),
- SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS),
SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS),
SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS),
SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES),
SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY),
SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),
SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),
- SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED),
SNMP_MIB_ITEM("TCPRcvCollapsed", LINUX_MIB_TCPRCVCOLLAPSED),
SNMP_MIB_ITEM("TCPDSACKOldSent", LINUX_MIB_TCPDSACKOLDSENT),
SNMP_MIB_ITEM("TCPDSACKOfoSent", LINUX_MIB_TCPDSACKOFOSENT),
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index b0bb5d0a30bd..33b70bfd1122 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -122,7 +122,8 @@ void raw_unhash_sk(struct sock *sk)
EXPORT_SYMBOL_GPL(raw_unhash_sk);
struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
- unsigned short num, __be32 raddr, __be32 laddr, int dif)
+ unsigned short num, __be32 raddr, __be32 laddr,
+ int dif, int sdif)
{
sk_for_each_from(sk) {
struct inet_sock *inet = inet_sk(sk);
@@ -130,7 +131,8 @@ struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
!(inet->inet_daddr && inet->inet_daddr != raddr) &&
!(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
- !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+ sk->sk_bound_dev_if != sdif))
goto found; /* gotcha */
}
sk = NULL;
@@ -171,6 +173,7 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
*/
static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
{
+ int sdif = inet_sdif(skb);
struct sock *sk;
struct hlist_head *head;
int delivered = 0;
@@ -184,13 +187,13 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
net = dev_net(skb->dev);
sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
iph->saddr, iph->daddr,
- skb->dev->ifindex);
+ skb->dev->ifindex, sdif);
while (sk) {
delivered = 1;
if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&
ip_mc_sf_allow(sk, iph->daddr, iph->saddr,
- skb->dev->ifindex)) {
+ skb->dev->ifindex, sdif)) {
struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
/* Not releasing hash table! */
@@ -199,7 +202,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
}
sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
iph->saddr, iph->daddr,
- skb->dev->ifindex);
+ skb->dev->ifindex, sdif);
}
out:
read_unlock(&raw_v4_hashinfo.lock);
@@ -297,12 +300,15 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
read_lock(&raw_v4_hashinfo.lock);
raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
if (raw_sk) {
+ int dif = skb->dev->ifindex;
+ int sdif = inet_sdif(skb);
+
iph = (const struct iphdr *)skb->data;
net = dev_net(skb->dev);
while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
iph->daddr, iph->saddr,
- skb->dev->ifindex)) != NULL) {
+ dif, sdif)) != NULL) {
raw_err(raw_sk, skb, info);
raw_sk = sk_next(raw_sk);
iph = (const struct iphdr *)skb->data;
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index e1a51ca68d23..c200065ef9a5 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -46,13 +46,13 @@ static struct sock *raw_lookup(struct net *net, struct sock *from,
sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol,
r->id.idiag_dst[0],
r->id.idiag_src[0],
- r->id.idiag_if);
+ r->id.idiag_if, 0);
#if IS_ENABLED(CONFIG_IPV6)
else
sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol,
(const struct in6_addr *)r->id.idiag_src,
(const struct in6_addr *)r->id.idiag_dst,
- r->id.idiag_if);
+ r->id.idiag_if, 0);
#endif
return sk;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 7effa62beed3..43b69af242e1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -495,7 +495,7 @@ u32 ip_idents_reserve(u32 hash, int segs)
{
u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
- u32 old = ACCESS_ONCE(*p_tstamp);
+ u32 old = READ_ONCE(*p_tstamp);
u32 now = (u32)jiffies;
u32 new, delta = 0;
@@ -651,9 +651,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe;
struct rtable *rt;
+ u32 genid, hval;
unsigned int i;
int depth;
- u32 hval = fnhe_hashfun(daddr);
+
+ genid = fnhe_genid(dev_net(nh->nh_dev));
+ hval = fnhe_hashfun(daddr);
spin_lock_bh(&fnhe_lock);
@@ -676,12 +679,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
}
if (fnhe) {
+ if (fnhe->fnhe_genid != genid)
+ fnhe->fnhe_genid = genid;
if (gw)
fnhe->fnhe_gw = gw;
- if (pmtu) {
+ if (pmtu)
fnhe->fnhe_pmtu = pmtu;
- fnhe->fnhe_expires = max(1UL, expires);
- }
+ fnhe->fnhe_expires = max(1UL, expires);
/* Update all cached dsts too */
rt = rcu_dereference(fnhe->fnhe_rth_input);
if (rt)
@@ -700,7 +704,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_next = hash->chain;
rcu_assign_pointer(hash->chain, fnhe);
}
- fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
+ fnhe->fnhe_genid = genid;
fnhe->fnhe_daddr = daddr;
fnhe->fnhe_gw = gw;
fnhe->fnhe_pmtu = pmtu;
@@ -1250,7 +1254,7 @@ static void set_class_tag(struct rtable *rt, u32 tag)
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{
unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
- unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size,
+ unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
ip_rt_min_advmss);
return min(advmss, IPV4_MAX_PMTU - header_size);
@@ -1267,7 +1271,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
if (mtu)
return mtu;
- mtu = dst->dev->mtu;
+ mtu = READ_ONCE(dst->dev->mtu);
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
if (rt->rt_uses_gateway && mtu > 576)
@@ -1398,7 +1402,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
struct rtable *rt = (struct rtable *) dst;
- if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+ if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
kfree(p);
if (!list_empty(&rt->rt_uncached)) {
@@ -1456,7 +1460,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
if (fi->fib_metrics != &dst_default_metrics) {
rt->dst._metrics |= DST_METRICS_REFCOUNTED;
- atomic_inc(&fi->fib_metrics->refcnt);
+ refcount_inc(&fi->fib_metrics->refcnt);
}
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
@@ -1520,43 +1524,56 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
EXPORT_SYMBOL(rt_dst_alloc);
/* called in rcu_read_lock() section */
-static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev, int our)
+int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev,
+ struct in_device *in_dev, u32 *itag)
{
- struct rtable *rth;
- struct in_device *in_dev = __in_dev_get_rcu(dev);
- unsigned int flags = RTCF_MULTICAST;
- u32 itag = 0;
int err;
/* Primary sanity checks. */
-
if (!in_dev)
return -EINVAL;
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
skb->protocol != htons(ETH_P_IP))
- goto e_inval;
+ return -EINVAL;
if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
- goto e_inval;
+ return -EINVAL;
if (ipv4_is_zeronet(saddr)) {
if (!ipv4_is_local_multicast(daddr))
- goto e_inval;
+ return -EINVAL;
} else {
err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
- in_dev, &itag);
+ in_dev, itag);
if (err < 0)
- goto e_err;
+ return err;
}
+ return 0;
+}
+
+/* called in rcu_read_lock() section */
+static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev, int our)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+ unsigned int flags = RTCF_MULTICAST;
+ struct rtable *rth;
+ u32 itag = 0;
+ int err;
+
+ err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
+ if (err)
+ return err;
+
if (our)
flags |= RTCF_LOCAL;
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
if (!rth)
- goto e_nobufs;
+ return -ENOBUFS;
#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
@@ -1572,13 +1589,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
skb_dst_set(skb, &rth->dst);
return 0;
-
-e_nobufs:
- return -ENOBUFS;
-e_inval:
- return -EINVAL;
-e_err:
- return err;
}
@@ -2236,7 +2246,7 @@ add:
if (!rth)
return ERR_PTR(-ENOBUFS);
- rth->rt_iif = orig_oif ? : 0;
+ rth->rt_iif = orig_oif;
if (res->table)
rth->rt_table_id = res->table->tb_id;
@@ -2439,6 +2449,12 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
/* L3 master device is the loopback for that domain */
dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
net->loopback_dev;
+
+ /* make sure orig_oif points to fib result device even
+ * though packet rx/tx happens over loopback or l3mdev
+ */
+ orig_oif = FIB_RES_OIF(*res);
+
fl4->flowi4_oif = dev_out->ifindex;
flags |= RTCF_LOCAL;
goto make_route;
@@ -2501,7 +2517,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
struct rtable *ort = (struct rtable *) dst_orig;
struct rtable *rt;
- rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
+ rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
if (rt) {
struct dst_entry *new = &rt->dst;
@@ -2763,14 +2779,21 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
table_id = rt->rt_table_id;
- if (rtm->rtm_flags & RTM_F_FIB_MATCH)
+ if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
+ if (!res.fi) {
+ err = fib_props[res.type].error;
+ if (!err)
+ err = -EHOSTUNREACH;
+ goto errout_free;
+ }
err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
rt->rt_type, res.prefix, res.prefixlen,
fl4.flowi4_tos, res.fi, 0);
- else
+ } else {
err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
+ }
if (err < 0)
goto errout_free;
@@ -3019,7 +3042,6 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
int __init ip_rt_init(void)
{
- int rc = 0;
int cpu;
ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
@@ -3068,14 +3090,15 @@ int __init ip_rt_init(void)
xfrm_init();
xfrm4_init();
#endif
- rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
+ rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
+ RTNL_FLAG_DOIT_UNLOCKED);
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&sysctl_route_ops);
#endif
register_pernet_subsys(&rt_genid_ops);
register_pernet_subsys(&ipv4_inetpeer_ops);
- return rc;
+ return 0;
}
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 03ad8778c395..fda37f2862c9 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
*/
- ireq->opt = tcp_v4_save_options(skb);
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb));
if (security_inet_conn_request(sk, skb, req)) {
reqsk_free(req);
@@ -385,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* Try to redo what tcp_v4_send_synack did. */
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
- tcp_select_initial_window(tcp_full_space(sk), req->mss,
+ tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(&rt->dst, RTAX_INITRWND));
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9bf809726066..93e172118a94 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
*
@@ -25,6 +26,7 @@
#include <net/inet_frag.h>
#include <net/ping.h>
#include <net/protocol.h>
+#include <net/netevent.h>
static int zero;
static int one = 1;
@@ -45,6 +47,9 @@ static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
static int ip_ping_group_range_min[] = { 0, 0 };
static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
+/* obsolete */
+static int sysctl_tcp_low_latency __read_mostly;
+
/* Update system visible IP port range */
static void set_local_port_range(struct net *net, int range[2])
{
@@ -196,6 +201,8 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
+ struct net *net = container_of(ctl->data, struct net,
+ ipv4.tcp_congestion_control);
char val[TCP_CA_NAME_MAX];
struct ctl_table tbl = {
.data = val,
@@ -203,11 +210,11 @@ static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
};
int ret;
- tcp_get_default_congestion_control(val);
+ tcp_get_default_congestion_control(net, val);
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0)
- ret = tcp_set_default_congestion_control(val);
+ ret = tcp_set_default_congestion_control(net, val);
return ret;
}
@@ -248,10 +255,12 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl,
return ret;
}
-static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
+static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_tcp_fastopen);
struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
struct tcp_fastopen_context *ctxt;
int ret;
@@ -262,7 +271,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
return -ENOMEM;
rcu_read_lock();
- ctxt = rcu_dereference(tcp_fastopen_ctx);
+ ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
if (ctxt)
memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
else
@@ -279,12 +288,8 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
ret = -EINVAL;
goto bad_key;
}
- /* Generate a dummy secret but don't publish it. This
- * is needed so we don't regenerate a new key on the
- * first invocation of tcp_fastopen_cookie_gen
- */
- tcp_fastopen_init_key_once(false);
- tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
+ tcp_fastopen_reset_cipher(net, NULL, user_key,
+ TCP_FASTOPEN_KEY_LENGTH);
}
bad_key:
@@ -355,11 +360,13 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_tcp_fastopen_blackhole_timeout);
int ret;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
- tcp_fastopen_active_timeout_reset();
+ atomic_set(&net->ipv4.tfo_active_disable_times, 0);
return ret;
}
@@ -382,15 +389,25 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
return ret;
}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_fib_multipath_hash_policy);
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
+
+ return ret;
+}
+#endif
+
static struct ctl_table ipv4_table[] = {
{
- .procname = "tcp_retrans_collapse",
- .data = &sysctl_tcp_retrans_collapse,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_max_orphans",
.data = &sysctl_tcp_max_orphans,
.maxlen = sizeof(int),
@@ -398,48 +415,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_fastopen",
- .data = &sysctl_tcp_fastopen,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "tcp_fastopen_key",
- .mode = 0600,
- .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
- .proc_handler = proc_tcp_fastopen_key,
- },
- {
- .procname = "tcp_fastopen_blackhole_timeout_sec",
- .data = &sysctl_tcp_fastopen_blackhole_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_tfo_blackhole_detect_timeout,
- .extra1 = &zero,
- },
- {
- .procname = "tcp_abort_on_overflow",
- .data = &sysctl_tcp_abort_on_overflow,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_stdurg",
- .data = &sysctl_tcp_stdurg,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_rfc1337",
- .data = &sysctl_tcp_rfc1337,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "inet_peer_threshold",
.data = &inet_peer_threshold,
.maxlen = sizeof(int),
@@ -461,34 +436,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
{
- .procname = "tcp_fack",
- .data = &sysctl_tcp_fack,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_recovery",
- .data = &sysctl_tcp_recovery,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "tcp_max_reordering",
- .data = &sysctl_tcp_max_reordering,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_dsack",
- .data = &sysctl_tcp_dsack,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_mem",
.maxlen = sizeof(sysctl_tcp_mem),
.data = &sysctl_tcp_mem,
@@ -496,113 +443,12 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_doulongvec_minmax,
},
{
- .procname = "tcp_wmem",
- .data = &sysctl_tcp_wmem,
- .maxlen = sizeof(sysctl_tcp_wmem),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- },
- {
- .procname = "tcp_rmem",
- .data = &sysctl_tcp_rmem,
- .maxlen = sizeof(sysctl_tcp_rmem),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- },
- {
- .procname = "tcp_app_win",
- .data = &sysctl_tcp_app_win,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_adv_win_scale",
- .data = &sysctl_tcp_adv_win_scale,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &tcp_adv_win_scale_min,
- .extra2 = &tcp_adv_win_scale_max,
- },
- {
- .procname = "tcp_frto",
- .data = &sysctl_tcp_frto,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_min_rtt_wlen",
- .data = &sysctl_tcp_min_rtt_wlen,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_low_latency",
.data = &sysctl_tcp_low_latency,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
- {
- .procname = "tcp_no_metrics_save",
- .data = &sysctl_tcp_nometrics_save,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "tcp_moderate_rcvbuf",
- .data = &sysctl_tcp_moderate_rcvbuf,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "tcp_tso_win_divisor",
- .data = &sysctl_tcp_tso_win_divisor,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "tcp_congestion_control",
- .mode = 0644,
- .maxlen = TCP_CA_NAME_MAX,
- .proc_handler = proc_tcp_congestion_control,
- },
- {
- .procname = "tcp_workaround_signed_windows",
- .data = &sysctl_tcp_workaround_signed_windows,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_limit_output_bytes",
- .data = &sysctl_tcp_limit_output_bytes,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_challenge_ack_limit",
- .data = &sysctl_tcp_challenge_ack_limit,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_slow_start_after_idle",
- .data = &sysctl_tcp_slow_start_after_idle,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
#ifdef CONFIG_NETLABEL
{
.procname = "cipso_cache_enable",
@@ -646,65 +492,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_allowed_congestion_control,
},
{
- .procname = "tcp_thin_linear_timeouts",
- .data = &sysctl_tcp_thin_linear_timeouts,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_early_retrans",
- .data = &sysctl_tcp_early_retrans,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &four,
- },
- {
- .procname = "tcp_min_tso_segs",
- .data = &sysctl_tcp_min_tso_segs,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &gso_max_segs,
- },
- {
- .procname = "tcp_pacing_ss_ratio",
- .data = &sysctl_tcp_pacing_ss_ratio,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &thousand,
- },
- {
- .procname = "tcp_pacing_ca_ratio",
- .data = &sysctl_tcp_pacing_ca_ratio,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &thousand,
- },
- {
- .procname = "tcp_autocorking",
- .data = &sysctl_tcp_autocorking,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
- },
- {
- .procname = "tcp_invalid_ratelimit",
- .data = &sysctl_tcp_invalid_ratelimit,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
- },
- {
.procname = "tcp_available_ulp",
.maxlen = TCP_ULP_BUF_MAX,
.mode = 0444,
@@ -973,6 +760,13 @@ static struct ctl_table ipv4_net_table[] = {
},
#endif
{
+ .procname = "tcp_congestion_control",
+ .data = &init_net.ipv4.tcp_congestion_control,
+ .mode = 0644,
+ .maxlen = TCP_CA_NAME_MAX,
+ .proc_handler = proc_tcp_congestion_control,
+ },
+ {
.procname = "tcp_keepalive_time",
.data = &init_net.ipv4.sysctl_tcp_keepalive_time,
.maxlen = sizeof(int),
@@ -1082,6 +876,28 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "tcp_fastopen",
+ .data = &init_net.ipv4.sysctl_tcp_fastopen,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tcp_fastopen_key",
+ .mode = 0600,
+ .data = &init_net.ipv4.sysctl_tcp_fastopen,
+ .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+ .proc_handler = proc_tcp_fastopen_key,
+ },
+ {
+ .procname = "tcp_fastopen_blackhole_timeout_sec",
+ .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_tfo_blackhole_detect_timeout,
+ .extra1 = &zero,
+ },
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname = "fib_multipath_use_neigh",
@@ -1097,7 +913,7 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_fib_multipath_hash_policy,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_fib_multipath_hash_policy,
.extra1 = &zero,
.extra2 = &one,
},
@@ -1141,6 +957,216 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "tcp_early_retrans",
+ .data = &init_net.ipv4.sysctl_tcp_early_retrans,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &four,
+ },
+ {
+ .procname = "tcp_recovery",
+ .data = &init_net.ipv4.sysctl_tcp_recovery,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tcp_thin_linear_timeouts",
+ .data = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_slow_start_after_idle",
+ .data = &init_net.ipv4.sysctl_tcp_slow_start_after_idle,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_retrans_collapse",
+ .data = &init_net.ipv4.sysctl_tcp_retrans_collapse,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_stdurg",
+ .data = &init_net.ipv4.sysctl_tcp_stdurg,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_rfc1337",
+ .data = &init_net.ipv4.sysctl_tcp_rfc1337,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_abort_on_overflow",
+ .data = &init_net.ipv4.sysctl_tcp_abort_on_overflow,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_fack",
+ .data = &init_net.ipv4.sysctl_tcp_fack,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_max_reordering",
+ .data = &init_net.ipv4.sysctl_tcp_max_reordering,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_dsack",
+ .data = &init_net.ipv4.sysctl_tcp_dsack,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_app_win",
+ .data = &init_net.ipv4.sysctl_tcp_app_win,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_adv_win_scale",
+ .data = &init_net.ipv4.sysctl_tcp_adv_win_scale,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &tcp_adv_win_scale_min,
+ .extra2 = &tcp_adv_win_scale_max,
+ },
+ {
+ .procname = "tcp_frto",
+ .data = &init_net.ipv4.sysctl_tcp_frto,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_no_metrics_save",
+ .data = &init_net.ipv4.sysctl_tcp_nometrics_save,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tcp_moderate_rcvbuf",
+ .data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tcp_tso_win_divisor",
+ .data = &init_net.ipv4.sysctl_tcp_tso_win_divisor,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tcp_workaround_signed_windows",
+ .data = &init_net.ipv4.sysctl_tcp_workaround_signed_windows,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_limit_output_bytes",
+ .data = &init_net.ipv4.sysctl_tcp_limit_output_bytes,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_challenge_ack_limit",
+ .data = &init_net.ipv4.sysctl_tcp_challenge_ack_limit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_min_tso_segs",
+ .data = &init_net.ipv4.sysctl_tcp_min_tso_segs,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &gso_max_segs,
+ },
+ {
+ .procname = "tcp_min_rtt_wlen",
+ .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_autocorking",
+ .data = &init_net.ipv4.sysctl_tcp_autocorking,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "tcp_invalid_ratelimit",
+ .data = &init_net.ipv4.sysctl_tcp_invalid_ratelimit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ },
+ {
+ .procname = "tcp_pacing_ss_ratio",
+ .data = &init_net.ipv4.sysctl_tcp_pacing_ss_ratio,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &thousand,
+ },
+ {
+ .procname = "tcp_pacing_ca_ratio",
+ .data = &init_net.ipv4.sysctl_tcp_pacing_ca_ratio,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &thousand,
+ },
+ {
+ .procname = "tcp_wmem",
+ .data = &init_net.ipv4.sysctl_tcp_wmem,
+ .maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ },
+ {
+ .procname = "tcp_rmem",
+ .data = &init_net.ipv4.sysctl_tcp_rmem,
+ .maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 71ce33decd97..bf97317e6c97 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -269,6 +269,8 @@
#include <linux/err.h>
#include <linux/time.h>
#include <linux/slab.h>
+#include <linux/errqueue.h>
+#include <linux/static_key.h>
#include <net/icmp.h>
#include <net/inet_common.h>
@@ -281,24 +283,22 @@
#include <asm/ioctls.h>
#include <net/busy_poll.h>
-int sysctl_tcp_min_tso_segs __read_mostly = 2;
-
-int sysctl_tcp_autocorking __read_mostly = 1;
+#include <trace/events/tcp.h>
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
long sysctl_tcp_mem[3] __read_mostly;
-int sysctl_tcp_wmem[3] __read_mostly;
-int sysctl_tcp_rmem[3] __read_mostly;
-
EXPORT_SYMBOL(sysctl_tcp_mem);
-EXPORT_SYMBOL(sysctl_tcp_rmem);
-EXPORT_SYMBOL(sysctl_tcp_wmem);
atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
+#if IS_ENABLED(CONFIG_SMC)
+DEFINE_STATIC_KEY_FALSE(tcp_have_smc);
+EXPORT_SYMBOL(tcp_have_smc);
+#endif
+
/*
* Current number of TCP sockets.
*/
@@ -388,6 +388,19 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
return period;
}
+static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp)
+{
+ u32 rate = READ_ONCE(tp->rate_delivered);
+ u32 intv = READ_ONCE(tp->rate_interval_us);
+ u64 rate64 = 0;
+
+ if (rate && intv) {
+ rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
+ do_div(rate64, intv);
+ }
+ return rate64;
+}
+
/* Address-family independent initialization for a tcp_sock.
*
* NOTE: A lot of things set to zero explicitly by call to
@@ -399,9 +412,10 @@ void tcp_init_sock(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
tp->out_of_order_queue = RB_ROOT;
+ sk->tcp_rtx_queue = RB_ROOT;
tcp_init_xmit_timers(sk);
- tcp_prequeue_init(tp);
INIT_LIST_HEAD(&tp->tsq_node);
+ INIT_LIST_HEAD(&tp->tsorted_sent_queue);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
@@ -428,6 +442,7 @@ void tcp_init_sock(struct sock *sk)
tcp_assign_congestion_control(sk);
tp->tsoffset = 0;
+ tp->rack.reo_wnd_steps = 1;
sk->sk_state = TCP_CLOSE;
@@ -436,15 +451,29 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_sync_mss = tcp_sync_mss;
- sk->sk_sndbuf = sysctl_tcp_wmem[1];
- sk->sk_rcvbuf = sysctl_tcp_rmem[1];
+ sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
+ sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
sk_sockets_allocated_inc(sk);
}
EXPORT_SYMBOL(tcp_init_sock);
-static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
+void tcp_init_transfer(struct sock *sk, int bpf_op)
{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ tcp_mtup_init(sk);
+ icsk->icsk_af_ops->rebuild_header(sk);
+ tcp_init_metrics(sk);
+ tcp_call_bpf(sk, bpf_op);
+ tcp_init_congestion_control(sk);
+ tcp_init_buffer_space(sk);
+}
+
+static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
+{
+ struct sk_buff *skb = tcp_write_queue_tail(sk);
+
if (tsflags && skb) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -662,7 +691,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
int size_goal)
{
return skb->len < size_goal &&
- sysctl_tcp_autocorking &&
+ sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
skb != tcp_write_queue_head(sk) &&
refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
}
@@ -673,10 +702,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- if (!tcp_send_head(sk))
- return;
-
skb = tcp_write_queue_tail(sk);
+ if (!skb)
+ return;
if (!(flags & MSG_MORE) || forced_push(tp))
tcp_mark_push(tp, skb);
@@ -856,6 +884,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
* available to the caller, no more, no less.
*/
skb->reserved_tailroom = skb->end - skb->tail - size;
+ INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
return skb;
}
__kfree_skb(skb);
@@ -935,14 +964,14 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
int copy, i;
bool can_coalesce;
- if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 ||
+ if (!skb || (copy = size_goal - skb->len) <= 0 ||
!tcp_skb_can_collapse_to(skb)) {
new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
- skb_queue_empty(&sk->sk_write_queue));
+ tcp_rtx_and_write_queues_empty(sk));
if (!skb)
goto wait_for_memory;
@@ -1014,7 +1043,7 @@ wait_for_memory:
out:
if (copied) {
- tcp_tx_timestamp(sk, sk->sk_tsflags, tcp_write_queue_tail(sk));
+ tcp_tx_timestamp(sk, sk->sk_tsflags);
if (!(flags & MSG_SENDPAGE_NOTLAST))
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
}
@@ -1034,23 +1063,29 @@ out_err:
}
EXPORT_SYMBOL_GPL(do_tcp_sendpages);
-int tcp_sendpage(struct sock *sk, struct page *page, int offset,
- size_t size, int flags)
+int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags)
{
- ssize_t res;
-
if (!(sk->sk_route_caps & NETIF_F_SG) ||
!sk_check_csum_caps(sk))
- return sock_no_sendpage(sk->sk_socket, page, offset, size,
- flags);
-
- lock_sock(sk);
+ return sock_no_sendpage_locked(sk, page, offset, size, flags);
tcp_rate_check_app_limited(sk); /* is sending application-limited? */
- res = do_tcp_sendpages(sk, page, offset, size, flags);
+ return do_tcp_sendpages(sk, page, offset, size, flags);
+}
+EXPORT_SYMBOL_GPL(tcp_sendpage_locked);
+
+int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags)
+{
+ int ret;
+
+ lock_sock(sk);
+ ret = tcp_sendpage_locked(sk, page, offset, size, flags);
release_sock(sk);
- return res;
+
+ return ret;
}
EXPORT_SYMBOL(tcp_sendpage);
@@ -1107,7 +1142,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
struct sockaddr *uaddr = msg->msg_name;
int err, flags;
- if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+ if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
(uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
@@ -1144,9 +1179,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
return err;
}
-int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct ubuf_info *uarg = NULL;
struct sk_buff *skb;
struct sockcm_cookie sockc;
int flags, err, copied = 0;
@@ -1155,9 +1191,25 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
bool sg;
long timeo;
- lock_sock(sk);
-
flags = msg->msg_flags;
+
+ if (flags & MSG_ZEROCOPY && size) {
+ if (sk->sk_state != TCP_ESTABLISHED) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ skb = tcp_write_queue_tail(sk);
+ uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
+ if (!uarg) {
+ err = -ENOBUFS;
+ goto out_err;
+ }
+
+ if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG))
+ uarg->zerocopy = 0;
+ }
+
if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) {
err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
if (err == -EINPROGRESS && copied_syn > 0)
@@ -1223,7 +1275,7 @@ restart:
int max = size_goal;
skb = tcp_write_queue_tail(sk);
- if (tcp_send_head(sk)) {
+ if (skb) {
if (skb->ip_summed == CHECKSUM_NONE)
max = mss_now;
copy = max - skb->len;
@@ -1243,7 +1295,7 @@ new_segment:
process_backlog = false;
goto restart;
}
- first_skb = skb_queue_empty(&sk->sk_write_queue);
+ first_skb = tcp_rtx_and_write_queues_empty(sk);
skb = sk_stream_alloc_skb(sk,
select_size(sk, sg, first_skb),
sk->sk_allocation,
@@ -1281,7 +1333,7 @@ new_segment:
err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
if (err)
goto do_fault;
- } else {
+ } else if (!uarg || !uarg->zerocopy) {
bool merge = true;
int i = skb_shinfo(skb)->nr_frags;
struct page_frag *pfrag = sk_page_frag(sk);
@@ -1319,6 +1371,13 @@ new_segment:
page_ref_inc(pfrag->page);
}
pfrag->offset += copy;
+ } else {
+ err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
+ if (err == -EMSGSIZE || err == -EEXIST)
+ goto new_segment;
+ if (err < 0)
+ goto do_error;
+ copy = err;
}
if (!copied)
@@ -1361,11 +1420,11 @@ wait_for_memory:
out:
if (copied) {
- tcp_tx_timestamp(sk, sockc.tsflags, tcp_write_queue_tail(sk));
+ tcp_tx_timestamp(sk, sockc.tsflags);
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
}
out_nopush:
- release_sock(sk);
+ sock_zerocopy_put(uarg);
return copied + copied_syn;
do_fault:
@@ -1382,6 +1441,7 @@ do_error:
if (copied + copied_syn)
goto out;
out_err:
+ sock_zerocopy_put_abort(uarg);
err = sk_stream_error(sk, flags, err);
/* make sure we wake any epoll edge trigger waiter */
if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
@@ -1389,9 +1449,20 @@ out_err:
sk->sk_write_space(sk);
tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
}
- release_sock(sk);
return err;
}
+EXPORT_SYMBOL_GPL(tcp_sendmsg_locked);
+
+int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+ int ret;
+
+ lock_sock(sk);
+ ret = tcp_sendmsg_locked(sk, msg, size);
+ release_sock(sk);
+
+ return ret;
+}
EXPORT_SYMBOL(tcp_sendmsg);
/*
@@ -1450,6 +1521,13 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
/* XXX -- need to support SO_PEEK_OFF */
+ skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
+ err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
+ if (err)
+ return err;
+ copied += skb->len;
+ }
+
skb_queue_walk(&sk->sk_write_queue, skb) {
err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
if (err)
@@ -1525,20 +1603,6 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied)
tcp_send_ack(sk);
}
-static void tcp_prequeue_process(struct sock *sk)
-{
- struct sk_buff *skb;
- struct tcp_sock *tp = tcp_sk(sk);
-
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
-
- while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
- sk_backlog_rcv(sk, skb);
-
- /* Clear memory counter. */
- tp->ucopy.memory = 0;
-}
-
static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
{
struct sk_buff *skb;
@@ -1652,6 +1716,61 @@ int tcp_peek_len(struct socket *sock)
}
EXPORT_SYMBOL(tcp_peek_len);
+static void tcp_update_recv_tstamps(struct sk_buff *skb,
+ struct scm_timestamping *tss)
+{
+ if (skb->tstamp)
+ tss->ts[0] = ktime_to_timespec(skb->tstamp);
+ else
+ tss->ts[0] = (struct timespec) {0};
+
+ if (skb_hwtstamps(skb)->hwtstamp)
+ tss->ts[2] = ktime_to_timespec(skb_hwtstamps(skb)->hwtstamp);
+ else
+ tss->ts[2] = (struct timespec) {0};
+}
+
+/* Similar to __sock_recv_timestamp, but does not require an skb */
+void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+ struct scm_timestamping *tss)
+{
+ struct timeval tv;
+ bool has_timestamping = false;
+
+ if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
+ if (sock_flag(sk, SOCK_RCVTSTAMP)) {
+ if (sock_flag(sk, SOCK_RCVTSTAMPNS)) {
+ put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
+ sizeof(tss->ts[0]), &tss->ts[0]);
+ } else {
+ tv.tv_sec = tss->ts[0].tv_sec;
+ tv.tv_usec = tss->ts[0].tv_nsec / 1000;
+
+ put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
+ sizeof(tv), &tv);
+ }
+ }
+
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
+ has_timestamping = true;
+ else
+ tss->ts[0] = (struct timespec) {0};
+ }
+
+ if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
+ has_timestamping = true;
+ else
+ tss->ts[2] = (struct timespec) {0};
+ }
+
+ if (has_timestamping) {
+ tss->ts[1] = (struct timespec) {0};
+ put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING,
+ sizeof(*tss), tss);
+ }
+}
+
/*
* This routine copies from a sock struct into the user buffer.
*
@@ -1671,9 +1790,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int err;
int target; /* Read at least this many bytes */
long timeo;
- struct task_struct *user_recv = NULL;
struct sk_buff *skb, *last;
u32 urg_hole = 0;
+ struct scm_timestamping tss;
+ bool has_tss = false;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
@@ -1806,51 +1926,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
tcp_cleanup_rbuf(sk, copied);
- if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
- /* Install new reader */
- if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
- user_recv = current;
- tp->ucopy.task = user_recv;
- tp->ucopy.msg = msg;
- }
-
- tp->ucopy.len = len;
-
- WARN_ON(tp->copied_seq != tp->rcv_nxt &&
- !(flags & (MSG_PEEK | MSG_TRUNC)));
-
- /* Ugly... If prequeue is not empty, we have to
- * process it before releasing socket, otherwise
- * order will be broken at second iteration.
- * More elegant solution is required!!!
- *
- * Look: we have the following (pseudo)queues:
- *
- * 1. packets in flight
- * 2. backlog
- * 3. prequeue
- * 4. receive_queue
- *
- * Each queue can be processed only if the next ones
- * are empty. At this point we have empty receive_queue.
- * But prequeue _can_ be not empty after 2nd iteration,
- * when we jumped to start of loop because backlog
- * processing added something to receive_queue.
- * We cannot release_sock(), because backlog contains
- * packets arrived _after_ prequeued ones.
- *
- * Shortly, algorithm is clear --- to process all
- * the queues in order. We could make it more directly,
- * requeueing packets from backlog to prequeue, if
- * is not empty. It is more elegant, but eats cycles,
- * unfortunately.
- */
- if (!skb_queue_empty(&tp->ucopy.prequeue))
- goto do_prequeue;
-
- /* __ Set realtime policy in scheduler __ */
- }
-
if (copied >= target) {
/* Do not sleep, just process backlog. */
release_sock(sk);
@@ -1859,31 +1934,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
sk_wait_data(sk, &timeo, last);
}
- if (user_recv) {
- int chunk;
-
- /* __ Restore normal policy in scheduler __ */
-
- chunk = len - tp->ucopy.len;
- if (chunk != 0) {
- NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
- len -= chunk;
- copied += chunk;
- }
-
- if (tp->rcv_nxt == tp->copied_seq &&
- !skb_queue_empty(&tp->ucopy.prequeue)) {
-do_prequeue:
- tcp_prequeue_process(sk);
-
- chunk = len - tp->ucopy.len;
- if (chunk != 0) {
- NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
- len -= chunk;
- copied += chunk;
- }
- }
- }
if ((flags & MSG_PEEK) &&
(peek_seq - copied - urg_hole != tp->copied_seq)) {
net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
@@ -1941,6 +1991,10 @@ skip_copy:
if (used + offset < skb->len)
continue;
+ if (TCP_SKB_CB(skb)->has_rxtstamp) {
+ tcp_update_recv_tstamps(skb, &tss);
+ has_tss = true;
+ }
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
if (!(flags & MSG_PEEK))
@@ -1955,29 +2009,13 @@ skip_copy:
break;
} while (len > 0);
- if (user_recv) {
- if (!skb_queue_empty(&tp->ucopy.prequeue)) {
- int chunk;
-
- tp->ucopy.len = copied > 0 ? len : 0;
-
- tcp_prequeue_process(sk);
-
- if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
- NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
- len -= chunk;
- copied += chunk;
- }
- }
-
- tp->ucopy.task = NULL;
- tp->ucopy.len = 0;
- }
-
/* According to UNIX98, msg_name/msg_namelen are ignored
* on connected socket. I was just happy when found this 8) --ANK
*/
+ if (has_tss)
+ tcp_recv_timestamp(msg, sk, &tss);
+
/* Clean up data we have read: This will do ACK frames. */
tcp_cleanup_rbuf(sk, copied);
@@ -2002,6 +2040,8 @@ void tcp_set_state(struct sock *sk, int state)
{
int oldstate = sk->sk_state;
+ trace_tcp_set_state(sk, oldstate, state);
+
switch (state) {
case TCP_ESTABLISHED:
if (oldstate != TCP_ESTABLISHED)
@@ -2289,6 +2329,37 @@ static inline bool tcp_need_reset(int state)
TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
}
+static void tcp_rtx_queue_purge(struct sock *sk)
+{
+ struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
+
+ while (p) {
+ struct sk_buff *skb = rb_to_skb(p);
+
+ p = rb_next(p);
+ /* Since we are deleting whole queue, no need to
+ * list_del(&skb->tcp_tsorted_anchor)
+ */
+ tcp_rtx_queue_unlink(skb, sk);
+ sk_wmem_free_skb(sk, skb);
+ }
+}
+
+void tcp_write_queue_purge(struct sock *sk)
+{
+ struct sk_buff *skb;
+
+ tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
+ while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
+ tcp_skb_tsorted_anchor_cleanup(skb);
+ sk_wmem_free_skb(sk, skb);
+ }
+ tcp_rtx_queue_purge(sk);
+ INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
+ sk_mem_reclaim(sk);
+ tcp_clear_all_retrans_hints(tcp_sk(sk));
+}
+
int tcp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
@@ -2347,7 +2418,6 @@ int tcp_disconnect(struct sock *sk, int flags)
* issue in __tcp_select_window()
*/
icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
- tcp_init_send_head(sk);
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
dst_release(sk->sk_rx_dst);
@@ -2439,8 +2509,6 @@ static int tcp_repair_options_est(struct sock *sk,
return -EINVAL;
tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
- if (sysctl_tcp_fack)
- tcp_enable_fack(tp);
break;
case TCPOPT_TIMESTAMP:
if (opt.opt_val != 0)
@@ -2481,7 +2549,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
name[val] = 0;
lock_sock(sk);
- err = tcp_set_congestion_control(sk, name, true);
+ err = tcp_set_congestion_control(sk, name, true, true);
release_sock(sk);
return err;
}
@@ -2503,6 +2571,17 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
release_sock(sk);
return err;
}
+ case TCP_FASTOPEN_KEY: {
+ __u8 key[TCP_FASTOPEN_KEY_LENGTH];
+
+ if (optlen != sizeof(key))
+ return -EINVAL;
+
+ if (copy_from_user(key, optval, optlen))
+ return -EFAULT;
+
+ return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key));
+ }
default:
/* fallthru */
break;
@@ -2734,7 +2813,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
- tcp_fastopen_init_key_once(true);
+ tcp_fastopen_init_key_once(net);
fastopen_queue_tune(sk, val);
} else {
@@ -2744,7 +2823,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN_CONNECT:
if (val > 1 || val < 0) {
err = -EINVAL;
- } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+ } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
if (sk->sk_state == TCP_CLOSE)
tp->fastopen_connect = val;
else
@@ -2753,6 +2832,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
err = -EOPNOTSUPP;
}
break;
+ case TCP_FASTOPEN_NO_COOKIE:
+ if (val > 1 || val < 0)
+ err = -EINVAL;
+ else if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+ err = -EINVAL;
+ else
+ tp->fastopen_no_cookie = val;
+ break;
case TCP_TIMESTAMP:
if (!tp->repair)
err = -EPERM;
@@ -2823,7 +2910,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
{
const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
const struct inet_connection_sock *icsk = inet_csk(sk);
- u32 now, intv;
+ u32 now;
u64 rate64;
bool slow;
u32 rate;
@@ -2890,7 +2977,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_lost = tp->lost_out;
info->tcpi_retrans = tp->retrans_out;
- info->tcpi_fackets = tp->fackets_out;
now = tcp_jiffies32;
info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
@@ -2922,13 +3008,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_data_segs_out = tp->data_segs_out;
info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0;
- rate = READ_ONCE(tp->rate_delivered);
- intv = READ_ONCE(tp->rate_interval_us);
- if (rate && intv) {
- rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
- do_div(rate64, intv);
+ rate64 = tcp_compute_delivery_rate(tp);
+ if (rate64)
info->tcpi_delivery_rate = rate64;
- }
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -2938,8 +3020,12 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
const struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *stats;
struct tcp_info info;
+ u64 rate64;
+ u32 rate;
- stats = alloc_skb(5 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC);
+ stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
+ 3 * nla_total_size(sizeof(u32)) +
+ 2 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
if (!stats)
return NULL;
@@ -2954,6 +3040,20 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
tp->data_segs_out, TCP_NLA_PAD);
nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS,
tp->total_retrans, TCP_NLA_PAD);
+
+ rate = READ_ONCE(sk->sk_pacing_rate);
+ rate64 = rate != ~0U ? rate : ~0ULL;
+ nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD);
+
+ rate64 = tcp_compute_delivery_rate(tp);
+ nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);
+
+ nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd);
+ nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
+ nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
+
+ nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
+ nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
return stats;
}
@@ -3075,6 +3175,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return -EFAULT;
return 0;
+ case TCP_FASTOPEN_KEY: {
+ __u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ struct tcp_fastopen_context *ctx;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx);
+ if (ctx)
+ memcpy(key, ctx->key, sizeof(key));
+ else
+ len = 0;
+ rcu_read_unlock();
+
+ len = min_t(unsigned int, len, sizeof(key));
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, key, len))
+ return -EFAULT;
+ return 0;
+ }
case TCP_THIN_LINEAR_TIMEOUTS:
val = tp->thin_lto;
break;
@@ -3137,6 +3259,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = tp->fastopen_connect;
break;
+ case TCP_FASTOPEN_NO_COOKIE:
+ val = tp->fastopen_no_cookie;
+ break;
+
case TCP_TIMESTAMP:
val = tcp_time_stamp_raw() + tp->tsoffset;
break;
@@ -3502,13 +3628,13 @@ void __init tcp_init(void)
max_wshare = min(4UL*1024*1024, limit);
max_rshare = min(6UL*1024*1024, limit);
- sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
- sysctl_tcp_wmem[1] = 16*1024;
- sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
+ init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
+ init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
+ init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
- sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
- sysctl_tcp_rmem[1] = 87380;
- sysctl_tcp_rmem[2] = max(87380, max_rshare);
+ init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
+ init_net.ipv4.sysctl_tcp_rmem[1] = 87380;
+ init_net.ipv4.sysctl_tcp_rmem[2] = max(87380, max_rshare);
pr_info("Hash tables configured (established %u bind %u)\n",
tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 609965f0e298..fc3614377413 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -49,7 +49,6 @@ MODULE_PARM_DESC(smooth_part, "log(B/(B*Smin))/log(B/(B-1))+B, # of RTT from Wma
struct bictcp {
u32 cnt; /* increase cwnd by 1 after ACKs */
u32 last_max_cwnd; /* last maximum snd_cwnd */
- u32 loss_cwnd; /* congestion window at last loss */
u32 last_cwnd; /* the last snd_cwnd */
u32 last_time; /* time when updated last_cwnd */
u32 epoch_start; /* beginning of an epoch */
@@ -72,7 +71,6 @@ static void bictcp_init(struct sock *sk)
struct bictcp *ca = inet_csk_ca(sk);
bictcp_reset(ca);
- ca->loss_cwnd = 0;
if (initial_ssthresh)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
@@ -172,22 +170,12 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
else
ca->last_max_cwnd = tp->snd_cwnd;
- ca->loss_cwnd = tp->snd_cwnd;
-
if (tp->snd_cwnd <= low_window)
return max(tp->snd_cwnd >> 1U, 2U);
else
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
}
-static u32 bictcp_undo_cwnd(struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- const struct bictcp *ca = inet_csk_ca(sk);
-
- return max(tp->snd_cwnd, ca->loss_cwnd);
-}
-
static void bictcp_state(struct sock *sk, u8 new_state)
{
if (new_state == TCP_CA_Loss)
@@ -214,7 +202,7 @@ static struct tcp_congestion_ops bictcp __read_mostly = {
.ssthresh = bictcp_recalc_ssthresh,
.cong_avoid = bictcp_cong_avoid,
.set_state = bictcp_state,
- .undo_cwnd = bictcp_undo_cwnd,
+ .undo_cwnd = tcp_reno_undo_cwnd,
.pkts_acked = bictcp_acked,
.owner = THIS_MODULE,
.name = "bic",
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 50a0f3e51d5b..06fbe102a425 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -85,7 +85,6 @@ struct cdg {
u8 state;
u8 delack;
u32 rtt_seq;
- u32 undo_cwnd;
u32 shadow_wnd;
u16 backoff_cnt;
u16 sample_cnt;
@@ -330,8 +329,6 @@ static u32 tcp_cdg_ssthresh(struct sock *sk)
struct cdg *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- ca->undo_cwnd = tp->snd_cwnd;
-
if (ca->state == CDG_BACKOFF)
return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10);
@@ -344,13 +341,6 @@ static u32 tcp_cdg_ssthresh(struct sock *sk)
return max(2U, tp->snd_cwnd >> 1);
}
-static u32 tcp_cdg_undo_cwnd(struct sock *sk)
-{
- struct cdg *ca = inet_csk_ca(sk);
-
- return max(tcp_sk(sk)->snd_cwnd, ca->undo_cwnd);
-}
-
static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
{
struct cdg *ca = inet_csk_ca(sk);
@@ -399,11 +389,11 @@ static void tcp_cdg_release(struct sock *sk)
kfree(ca->gradients);
}
-struct tcp_congestion_ops tcp_cdg __read_mostly = {
+static struct tcp_congestion_ops tcp_cdg __read_mostly = {
.cong_avoid = tcp_cdg_cong_avoid,
.cwnd_event = tcp_cdg_cwnd_event,
.pkts_acked = tcp_cdg_acked,
- .undo_cwnd = tcp_cdg_undo_cwnd,
+ .undo_cwnd = tcp_reno_undo_cwnd,
.ssthresh = tcp_cdg_ssthresh,
.release = tcp_cdg_release,
.init = tcp_cdg_init,
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index fde983f6376b..bc6c02f16243 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -33,9 +33,11 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name)
}
/* Must be called with rcu lock held */
-static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name)
+static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net,
+ const char *name)
{
- const struct tcp_congestion_ops *ca = tcp_ca_find(name);
+ struct tcp_congestion_ops *ca = tcp_ca_find(name);
+
#ifdef CONFIG_MODULES
if (!ca && capable(CAP_NET_ADMIN)) {
rcu_read_unlock();
@@ -115,7 +117,7 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
}
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
-u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
+u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca)
{
const struct tcp_congestion_ops *ca;
u32 key = TCP_CA_UNSPEC;
@@ -123,7 +125,7 @@ u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
might_sleep();
rcu_read_lock();
- ca = __tcp_ca_find_autoload(name);
+ ca = tcp_ca_find_autoload(net, name);
if (ca) {
key = ca->key;
*ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN;
@@ -153,23 +155,18 @@ EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key);
/* Assign choice of congestion control. */
void tcp_assign_congestion_control(struct sock *sk)
{
+ struct net *net = sock_net(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
- struct tcp_congestion_ops *ca;
+ const struct tcp_congestion_ops *ca;
rcu_read_lock();
- list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
- if (likely(try_module_get(ca->owner))) {
- icsk->icsk_ca_ops = ca;
- goto out;
- }
- /* Fallback to next available. The last really
- * guaranteed fallback is Reno from this list.
- */
- }
-out:
+ ca = rcu_dereference(net->ipv4.tcp_congestion_control);
+ if (unlikely(!try_module_get(ca->owner)))
+ ca = &tcp_reno;
+ icsk->icsk_ca_ops = ca;
rcu_read_unlock();
- memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
if (ca->flags & TCP_CONG_NEEDS_ECN)
INET_ECN_xmit(sk);
else
@@ -189,8 +186,8 @@ void tcp_init_congestion_control(struct sock *sk)
INET_ECN_dontxmit(sk);
}
-void tcp_reinit_congestion_control(struct sock *sk,
- const struct tcp_congestion_ops *ca)
+static void tcp_reinit_congestion_control(struct sock *sk,
+ const struct tcp_congestion_ops *ca)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -214,29 +211,27 @@ void tcp_cleanup_congestion_control(struct sock *sk)
}
/* Used by sysctl to change default congestion control */
-int tcp_set_default_congestion_control(const char *name)
+int tcp_set_default_congestion_control(struct net *net, const char *name)
{
struct tcp_congestion_ops *ca;
- int ret = -ENOENT;
-
- spin_lock(&tcp_cong_list_lock);
- ca = tcp_ca_find(name);
-#ifdef CONFIG_MODULES
- if (!ca && capable(CAP_NET_ADMIN)) {
- spin_unlock(&tcp_cong_list_lock);
+ const struct tcp_congestion_ops *prev;
+ int ret;
- request_module("tcp_%s", name);
- spin_lock(&tcp_cong_list_lock);
- ca = tcp_ca_find(name);
- }
-#endif
+ rcu_read_lock();
+ ca = tcp_ca_find_autoload(net, name);
+ if (!ca) {
+ ret = -ENOENT;
+ } else if (!try_module_get(ca->owner)) {
+ ret = -EBUSY;
+ } else {
+ prev = xchg(&net->ipv4.tcp_congestion_control, ca);
+ if (prev)
+ module_put(prev->owner);
- if (ca) {
- ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */
- list_move(&ca->list, &tcp_cong_list);
+ ca->flags |= TCP_CONG_NON_RESTRICTED;
ret = 0;
}
- spin_unlock(&tcp_cong_list_lock);
+ rcu_read_unlock();
return ret;
}
@@ -244,7 +239,8 @@ int tcp_set_default_congestion_control(const char *name)
/* Set default value from kernel configuration at bootup */
static int __init tcp_congestion_default(void)
{
- return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
+ return tcp_set_default_congestion_control(&init_net,
+ CONFIG_DEFAULT_TCP_CONG);
}
late_initcall(tcp_congestion_default);
@@ -264,14 +260,12 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen)
}
/* Get current default congestion control */
-void tcp_get_default_congestion_control(char *name)
+void tcp_get_default_congestion_control(struct net *net, char *name)
{
- struct tcp_congestion_ops *ca;
- /* We will always have reno... */
- BUG_ON(list_empty(&tcp_cong_list));
+ const struct tcp_congestion_ops *ca;
rcu_read_lock();
- ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list);
+ ca = rcu_dereference(net->ipv4.tcp_congestion_control);
strncpy(name, ca->name, TCP_CA_NAME_MAX);
rcu_read_unlock();
}
@@ -338,7 +332,7 @@ out:
* tcp_reinit_congestion_control (if the current congestion control was
* already initialized.
*/
-int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_congestion_ops *ca;
@@ -351,18 +345,29 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
if (!load)
ca = tcp_ca_find(name);
else
- ca = __tcp_ca_find_autoload(name);
+ ca = tcp_ca_find_autoload(sock_net(sk), name);
+
/* No change asking for existing value */
if (ca == icsk->icsk_ca_ops) {
icsk->icsk_ca_setsockopt = 1;
goto out;
}
+
if (!ca) {
err = -ENOENT;
} else if (!load) {
- icsk->icsk_ca_ops = ca;
- if (!try_module_get(ca->owner))
+ const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops;
+
+ if (try_module_get(ca->owner)) {
+ if (reinit) {
+ tcp_reinit_congestion_control(sk, ca);
+ } else {
+ icsk->icsk_ca_ops = ca;
+ module_put(old_ca->owner);
+ }
+ } else {
err = -EBUSY;
+ }
} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
err = -EPERM;
@@ -456,7 +461,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max(tp->snd_cwnd, tp->snd_ssthresh << 1);
+ return max(tp->snd_cwnd, tp->prior_cwnd);
}
EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 57ae5b5ae643..78bfadfcf342 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -83,7 +83,6 @@ MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (mse
struct bictcp {
u32 cnt; /* increase cwnd by 1 after ACKs */
u32 last_max_cwnd; /* last maximum snd_cwnd */
- u32 loss_cwnd; /* congestion window at last loss */
u32 last_cwnd; /* the last snd_cwnd */
u32 last_time; /* time when updated last_cwnd */
u32 bic_origin_point;/* origin point of bic function */
@@ -142,7 +141,6 @@ static void bictcp_init(struct sock *sk)
struct bictcp *ca = inet_csk_ca(sk);
bictcp_reset(ca);
- ca->loss_cwnd = 0;
if (hystart)
bictcp_hystart_reset(sk);
@@ -366,18 +364,9 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
else
ca->last_max_cwnd = tp->snd_cwnd;
- ca->loss_cwnd = tp->snd_cwnd;
-
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
}
-static u32 bictcp_undo_cwnd(struct sock *sk)
-{
- struct bictcp *ca = inet_csk_ca(sk);
-
- return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
static void bictcp_state(struct sock *sk, u8 new_state)
{
if (new_state == TCP_CA_Loss) {
@@ -470,7 +459,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
.ssthresh = bictcp_recalc_ssthresh,
.cong_avoid = bictcp_cong_avoid,
.set_state = bictcp_state,
- .undo_cwnd = bictcp_undo_cwnd,
+ .undo_cwnd = tcp_reno_undo_cwnd,
.cwnd_event = bictcp_cwnd_event,
.pkts_acked = bictcp_acked,
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index a748c74aa8b7..abbf0edcf6c2 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -16,6 +16,7 @@
#include <linux/tcp.h>
+#include <net/netlink.h>
#include <net/tcp.h>
static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
@@ -36,6 +37,100 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
tcp_get_info(sk, info);
}
+#ifdef CONFIG_TCP_MD5SIG
+static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info,
+ const struct tcp_md5sig_key *key)
+{
+ info->tcpm_family = key->family;
+ info->tcpm_prefixlen = key->prefixlen;
+ info->tcpm_keylen = key->keylen;
+ memcpy(info->tcpm_key, key->key, key->keylen);
+
+ if (key->family == AF_INET)
+ info->tcpm_addr[0] = key->addr.a4.s_addr;
+ #if IS_ENABLED(CONFIG_IPV6)
+ else if (key->family == AF_INET6)
+ memcpy(&info->tcpm_addr, &key->addr.a6,
+ sizeof(info->tcpm_addr));
+ #endif
+}
+
+static int tcp_diag_put_md5sig(struct sk_buff *skb,
+ const struct tcp_md5sig_info *md5sig)
+{
+ const struct tcp_md5sig_key *key;
+ struct tcp_diag_md5sig *info;
+ struct nlattr *attr;
+ int md5sig_count = 0;
+
+ hlist_for_each_entry_rcu(key, &md5sig->head, node)
+ md5sig_count++;
+ if (md5sig_count == 0)
+ return 0;
+
+ attr = nla_reserve(skb, INET_DIAG_MD5SIG,
+ md5sig_count * sizeof(struct tcp_diag_md5sig));
+ if (!attr)
+ return -EMSGSIZE;
+
+ info = nla_data(attr);
+ memset(info, 0, md5sig_count * sizeof(struct tcp_diag_md5sig));
+ hlist_for_each_entry_rcu(key, &md5sig->head, node) {
+ tcp_diag_md5sig_fill(info++, key);
+ if (--md5sig_count == 0)
+ break;
+ }
+
+ return 0;
+}
+#endif
+
+static int tcp_diag_get_aux(struct sock *sk, bool net_admin,
+ struct sk_buff *skb)
+{
+#ifdef CONFIG_TCP_MD5SIG
+ if (net_admin) {
+ struct tcp_md5sig_info *md5sig;
+ int err = 0;
+
+ rcu_read_lock();
+ md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
+ if (md5sig)
+ err = tcp_diag_put_md5sig(skb, md5sig);
+ rcu_read_unlock();
+ if (err < 0)
+ return err;
+ }
+#endif
+
+ return 0;
+}
+
+static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
+{
+ size_t size = 0;
+
+#ifdef CONFIG_TCP_MD5SIG
+ if (net_admin && sk_fullsock(sk)) {
+ const struct tcp_md5sig_info *md5sig;
+ const struct tcp_md5sig_key *key;
+ size_t md5sig_count = 0;
+
+ rcu_read_lock();
+ md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
+ if (md5sig) {
+ hlist_for_each_entry_rcu(key, &md5sig->head, node)
+ md5sig_count++;
+ }
+ rcu_read_unlock();
+ size += nla_total_size(md5sig_count *
+ sizeof(struct tcp_diag_md5sig));
+ }
+#endif
+
+ return size;
+}
+
static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *r, struct nlattr *bc)
{
@@ -68,13 +163,15 @@ static int tcp_diag_destroy(struct sk_buff *in_skb,
#endif
static const struct inet_diag_handler tcp_diag_handler = {
- .dump = tcp_diag_dump,
- .dump_one = tcp_diag_dump_one,
- .idiag_get_info = tcp_diag_get_info,
- .idiag_type = IPPROTO_TCP,
- .idiag_info_size = sizeof(struct tcp_info),
+ .dump = tcp_diag_dump,
+ .dump_one = tcp_diag_dump_one,
+ .idiag_get_info = tcp_diag_get_info,
+ .idiag_get_aux = tcp_diag_get_aux,
+ .idiag_get_aux_size = tcp_diag_get_aux_size,
+ .idiag_type = IPPROTO_TCP,
+ .idiag_info_size = sizeof(struct tcp_info),
#ifdef CONFIG_INET_DIAG_DESTROY
- .destroy = tcp_diag_destroy,
+ .destroy = tcp_diag_destroy,
#endif
};
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index ce9c7fef200f..78c192ee03a4 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/init.h>
@@ -9,15 +10,18 @@
#include <net/inetpeer.h>
#include <net/tcp.h>
-int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE;
-
-struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-
-static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
-
-void tcp_fastopen_init_key_once(bool publish)
+void tcp_fastopen_init_key_once(struct net *net)
{
- static u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ struct tcp_fastopen_context *ctxt;
+
+ rcu_read_lock();
+ ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
+ if (ctxt) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
/* tcp_fastopen_reset_cipher publishes the new context
* atomically, so we allow this race happening here.
@@ -25,8 +29,8 @@ void tcp_fastopen_init_key_once(bool publish)
* All call sites of tcp_fastopen_cookie_gen also check
* for a valid cookie, so this is an acceptable risk.
*/
- if (net_get_random_once(key, sizeof(key)) && publish)
- tcp_fastopen_reset_cipher(key, sizeof(key));
+ get_random_bytes(key, sizeof(key));
+ tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key));
}
static void tcp_fastopen_ctx_free(struct rcu_head *head)
@@ -37,10 +41,37 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head)
kfree(ctx);
}
-int tcp_fastopen_reset_cipher(void *key, unsigned int len)
+void tcp_fastopen_destroy_cipher(struct sock *sk)
+{
+ struct tcp_fastopen_context *ctx;
+
+ ctx = rcu_dereference_protected(
+ inet_csk(sk)->icsk_accept_queue.fastopenq.ctx, 1);
+ if (ctx)
+ call_rcu(&ctx->rcu, tcp_fastopen_ctx_free);
+}
+
+void tcp_fastopen_ctx_destroy(struct net *net)
+{
+ struct tcp_fastopen_context *ctxt;
+
+ spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
+
+ ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
+ lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+ rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL);
+ spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
+
+ if (ctxt)
+ call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
+}
+
+int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
+ void *key, unsigned int len)
{
- int err;
struct tcp_fastopen_context *ctx, *octx;
+ struct fastopen_queue *q;
+ int err;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
@@ -61,26 +92,37 @@ error: kfree(ctx);
}
memcpy(ctx->key, key, len);
- spin_lock(&tcp_fastopen_ctx_lock);
- octx = rcu_dereference_protected(tcp_fastopen_ctx,
- lockdep_is_held(&tcp_fastopen_ctx_lock));
- rcu_assign_pointer(tcp_fastopen_ctx, ctx);
- spin_unlock(&tcp_fastopen_ctx_lock);
+ spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
+ if (sk) {
+ q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
+ octx = rcu_dereference_protected(q->ctx,
+ lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+ rcu_assign_pointer(q->ctx, ctx);
+ } else {
+ octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
+ lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+ rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
+ }
+ spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
if (octx)
call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
return err;
}
-static bool __tcp_fastopen_cookie_gen(const void *path,
+static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
struct tcp_fastopen_cookie *foc)
{
struct tcp_fastopen_context *ctx;
bool ok = false;
rcu_read_lock();
- ctx = rcu_dereference(tcp_fastopen_ctx);
+
+ ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
+ if (!ctx)
+ ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
+
if (ctx) {
crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
@@ -96,7 +138,8 @@ static bool __tcp_fastopen_cookie_gen(const void *path,
*
* XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
*/
-static bool tcp_fastopen_cookie_gen(struct request_sock *req,
+static bool tcp_fastopen_cookie_gen(struct sock *sk,
+ struct request_sock *req,
struct sk_buff *syn,
struct tcp_fastopen_cookie *foc)
{
@@ -104,7 +147,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
const struct iphdr *iph = ip_hdr(syn);
__be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
- return __tcp_fastopen_cookie_gen(path, foc);
+ return __tcp_fastopen_cookie_gen(sk, path, foc);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -112,13 +155,13 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
const struct ipv6hdr *ip6h = ipv6_hdr(syn);
struct tcp_fastopen_cookie tmp;
- if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) {
+ if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) {
struct in6_addr *buf = &tmp.addr;
int i;
for (i = 0; i < 4; i++)
buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
- return __tcp_fastopen_cookie_gen(buf, foc);
+ return __tcp_fastopen_cookie_gen(sk, buf, foc);
}
}
#endif
@@ -171,7 +214,6 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
static struct sock *tcp_fastopen_create_child(struct sock *sk,
struct sk_buff *skb,
- struct dst_entry *dst,
struct request_sock *req)
{
struct tcp_sock *tp;
@@ -217,12 +259,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
refcount_set(&req->rsk_refcnt, 2);
/* Now finish processing the fastopen child socket. */
- inet_csk(child)->icsk_af_ops->rebuild_header(child);
- tcp_init_congestion_control(child);
- tcp_mtup_init(child);
- tcp_init_metrics(child);
- tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
- tcp_init_buffer_space(child);
+ tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
@@ -272,6 +309,15 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
return true;
}
+static bool tcp_fastopen_no_cookie(const struct sock *sk,
+ const struct dst_entry *dst,
+ int flag)
+{
+ return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
+ tcp_sk(sk)->fastopen_no_cookie ||
+ (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
+}
+
/* Returns true if we should perform Fast Open on the SYN. The cookie (foc)
* may be updated and return the client in the SYN-ACK later. E.g., Fast Open
* cookie request (foc->len == 0).
@@ -279,27 +325,29 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- struct dst_entry *dst)
+ const struct dst_entry *dst)
{
- struct tcp_fastopen_cookie valid_foc = { .len = -1 };
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
+ int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
if (foc->len == 0) /* Client requests a cookie */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
- if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+ if (!((tcp_fastopen & TFO_SERVER_ENABLE) &&
(syn_data || foc->len >= 0) &&
tcp_fastopen_queue_check(sk))) {
foc->len = -1;
return NULL;
}
- if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
+ if (syn_data &&
+ tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen;
if (foc->len >= 0 && /* Client presents or requests a cookie */
- tcp_fastopen_cookie_gen(req, skb, &valid_foc) &&
+ tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) &&
foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
foc->len == valid_foc.len &&
!memcmp(foc->val, valid_foc.val, foc->len)) {
@@ -312,7 +360,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
* data in SYN_RECV state.
*/
fastopen:
- child = tcp_fastopen_create_child(sk, skb, dst, req);
+ child = tcp_fastopen_create_child(sk, skb, req);
if (child) {
foc->len = -1;
NET_INC_STATS(sock_net(sk),
@@ -332,6 +380,7 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie)
{
unsigned long last_syn_loss = 0;
+ const struct dst_entry *dst;
int syn_loss = 0;
tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss);
@@ -349,7 +398,9 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
return false;
}
- if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
+ dst = __sk_dst_get(sk);
+
+ if (tcp_fastopen_no_cookie(sk, dst, TFO_CLIENT_NO_COOKIE)) {
cookie->len = -1;
return true;
}
@@ -403,25 +454,16 @@ EXPORT_SYMBOL(tcp_fastopen_defer_connect);
* TFO connection with data exchanges.
*/
-/* Default to 1hr */
-unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60;
-static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0);
-static unsigned long tfo_active_disable_stamp __read_mostly;
-
/* Disable active TFO and record current jiffies and
* tfo_active_disable_times
*/
void tcp_fastopen_active_disable(struct sock *sk)
{
- atomic_inc(&tfo_active_disable_times);
- tfo_active_disable_stamp = jiffies;
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE);
-}
+ struct net *net = sock_net(sk);
-/* Reset tfo_active_disable_times to 0 */
-void tcp_fastopen_active_timeout_reset(void)
-{
- atomic_set(&tfo_active_disable_times, 0);
+ atomic_inc(&net->ipv4.tfo_active_disable_times);
+ net->ipv4.tfo_active_disable_stamp = jiffies;
+ NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
}
/* Calculate timeout for tfo active disable
@@ -430,17 +472,18 @@ void tcp_fastopen_active_timeout_reset(void)
*/
bool tcp_fastopen_active_should_disable(struct sock *sk)
{
- int tfo_da_times = atomic_read(&tfo_active_disable_times);
- int multiplier;
+ unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
+ int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times);
unsigned long timeout;
+ int multiplier;
if (!tfo_da_times)
return false;
/* Limit timout to max: 2^6 * initial timeout */
multiplier = 1 << min(tfo_da_times - 1, 6);
- timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ;
- if (time_before(jiffies, tfo_active_disable_stamp + timeout))
+ timeout = multiplier * tfo_bh_timeout * HZ;
+ if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout))
return true;
/* Mark check bit so we can check for successful active TFO
@@ -459,27 +502,25 @@ bool tcp_fastopen_active_should_disable(struct sock *sk)
void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct rb_node *p;
- struct sk_buff *skb;
struct dst_entry *dst;
+ struct sk_buff *skb;
if (!tp->syn_fastopen)
return;
if (!tp->data_segs_in) {
- p = rb_first(&tp->out_of_order_queue);
- if (p && !rb_next(p)) {
- skb = rb_entry(p, struct sk_buff, rbnode);
+ skb = skb_rb_first(&tp->out_of_order_queue);
+ if (skb && !skb_rb_next(skb)) {
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
tcp_fastopen_active_disable(sk);
return;
}
}
} else if (tp->syn_fastopen_ch &&
- atomic_read(&tfo_active_disable_times)) {
+ atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
dst = sk_dst_get(sk);
if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
- tcp_fastopen_active_timeout_reset();
+ atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
dst_release(dst);
}
}
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 6d9879e93648..d1c33c91eadc 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -94,7 +94,6 @@ static const struct hstcp_aimd_val {
struct hstcp {
u32 ai;
- u32 loss_cwnd;
};
static void hstcp_init(struct sock *sk)
@@ -153,22 +152,14 @@ static u32 hstcp_ssthresh(struct sock *sk)
const struct tcp_sock *tp = tcp_sk(sk);
struct hstcp *ca = inet_csk_ca(sk);
- ca->loss_cwnd = tp->snd_cwnd;
/* Do multiplicative decrease */
return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
}
-static u32 hstcp_cwnd_undo(struct sock *sk)
-{
- const struct hstcp *ca = inet_csk_ca(sk);
-
- return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
.init = hstcp_init,
.ssthresh = hstcp_ssthresh,
- .undo_cwnd = hstcp_cwnd_undo,
+ .undo_cwnd = tcp_reno_undo_cwnd,
.cong_avoid = hstcp_cong_avoid,
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 3eb78cde6ff0..082d479462fa 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -66,7 +66,6 @@ static inline void htcp_reset(struct htcp *ca)
static u32 htcp_cwnd_undo(struct sock *sk)
{
- const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
if (ca->undo_last_cong) {
@@ -76,7 +75,7 @@ static u32 htcp_cwnd_undo(struct sock *sk)
ca->undo_last_cong = 0;
}
- return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta);
+ return tcp_reno_undo_cwnd(sk);
}
static inline void measure_rtt(struct sock *sk, u32 srtt)
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 60352ff4f5a8..7c843578f233 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -48,7 +48,6 @@ struct illinois {
u32 end_seq; /* right edge of current RTT */
u32 alpha; /* Additive increase */
u32 beta; /* Muliplicative decrease */
- u32 loss_cwnd; /* cwnd on loss */
u16 acked; /* # packets acked by current ACK */
u8 rtt_above; /* average rtt has gone above threshold */
u8 rtt_low; /* # of rtts measurements below threshold */
@@ -297,18 +296,10 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct illinois *ca = inet_csk_ca(sk);
- ca->loss_cwnd = tp->snd_cwnd;
/* Multiplicative decrease */
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
}
-static u32 tcp_illinois_cwnd_undo(struct sock *sk)
-{
- const struct illinois *ca = inet_csk_ca(sk);
-
- return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
/* Extract info for Tcp socket info provided via netlink. */
static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info)
@@ -336,7 +327,7 @@ static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
static struct tcp_congestion_ops tcp_illinois __read_mostly = {
.init = tcp_illinois_init,
.ssthresh = tcp_illinois_ssthresh,
- .undo_cwnd = tcp_illinois_cwnd_undo,
+ .undo_cwnd = tcp_reno_undo_cwnd,
.cong_avoid = tcp_illinois_cong_avoid,
.set_state = tcp_illinois_state,
.get_info = tcp_illinois_info,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 53de1424c13c..f844c06c0676 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -75,25 +76,10 @@
#include <linux/ipsec.h>
#include <asm/unaligned.h>
#include <linux/errqueue.h>
+#include <trace/events/tcp.h>
+#include <linux/static_key.h>
-int sysctl_tcp_fack __read_mostly;
-int sysctl_tcp_max_reordering __read_mostly = 300;
-int sysctl_tcp_dsack __read_mostly = 1;
-int sysctl_tcp_app_win __read_mostly = 31;
-int sysctl_tcp_adv_win_scale __read_mostly = 1;
-EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
-
-/* rfc5961 challenge ack rate limiting */
-int sysctl_tcp_challenge_ack_limit = 1000;
-
-int sysctl_tcp_stdurg __read_mostly;
-int sysctl_tcp_rfc1337 __read_mostly;
int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
-int sysctl_tcp_frto __read_mostly = 2;
-int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
-int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
-int sysctl_tcp_early_retrans __read_mostly = 3;
-int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -114,7 +100,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
-#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
+#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
@@ -334,7 +320,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
sndmem *= nr_segs * per_mss;
if (sk->sk_sndbuf < sndmem)
- sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
+ sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -367,8 +353,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
- int truesize = tcp_win_from_space(skb->truesize) >> 1;
- int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
+ int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
+ int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
@@ -393,7 +379,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
/* Check #2. Increase window, if skb with such overhead
* will fit to rcvbuf in future.
*/
- if (tcp_win_from_space(skb->truesize) <= skb->len)
+ if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
incr = 2 * tp->advmss;
else
incr = __tcp_grow_window(sk, skb);
@@ -419,11 +405,11 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
/* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
* Allow enough cushion so that sender is not limited by our window
*/
- if (sysctl_tcp_moderate_rcvbuf)
+ if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
rcvmem <<= 2;
if (sk->sk_rcvbuf < rcvmem)
- sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
+ sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
}
/* 4. Try to fixup all. It is made immediately after connection enters
@@ -431,6 +417,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
*/
void tcp_init_buffer_space(struct sock *sk)
{
+ int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
@@ -449,14 +436,14 @@ void tcp_init_buffer_space(struct sock *sk)
if (tp->window_clamp >= maxwin) {
tp->window_clamp = maxwin;
- if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
+ if (tcp_app_win && maxwin > 4 * tp->advmss)
tp->window_clamp = max(maxwin -
- (maxwin >> sysctl_tcp_app_win),
+ (maxwin >> tcp_app_win),
4 * tp->advmss);
}
/* Force reservation of one segment. */
- if (sysctl_tcp_app_win &&
+ if (tcp_app_win &&
tp->window_clamp > 2 * tp->advmss &&
tp->window_clamp + tp->advmss > maxwin)
tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
@@ -470,15 +457,16 @@ static void tcp_clamp_window(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct net *net = sock_net(sk);
icsk->icsk_ack.quick = 0;
- if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+ if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
- sysctl_tcp_rmem[2]);
+ net->ipv4.sysctl_tcp_rmem[2]);
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -609,7 +597,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
* <prev RTT . ><current RTT .. ><next RTT .... >
*/
- if (sysctl_tcp_moderate_rcvbuf &&
+ if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvwin, rcvmem, rcvbuf;
@@ -633,10 +621,11 @@ void tcp_rcv_space_adjust(struct sock *sk)
}
rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
- while (tcp_win_from_space(rcvmem) < tp->advmss)
+ while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
rcvmem += 128;
- rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
+ rcvbuf = min(rcvwin / tp->advmss * rcvmem,
+ sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
if (rcvbuf > sk->sk_rcvbuf) {
sk->sk_rcvbuf = rcvbuf;
@@ -780,15 +769,6 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->srtt_us = max(1U, srtt);
}
-/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
- * Note: TCP stack does not yet implement pacing.
- * FQ packet scheduler can be used to implement cheap but effective
- * TCP pacing, to smooth the burst on large writes when packets
- * in flight is significantly lower than cwnd (or rwin)
- */
-int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
-int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
-
static void tcp_update_pacing_rate(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -806,21 +786,21 @@ static void tcp_update_pacing_rate(struct sock *sk)
* end of slow start and should slow down.
*/
if (tp->snd_cwnd < tp->snd_ssthresh / 2)
- rate *= sysctl_tcp_pacing_ss_ratio;
+ rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
else
- rate *= sysctl_tcp_pacing_ca_ratio;
+ rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
rate *= max(tp->snd_cwnd, tp->packets_out);
if (likely(tp->srtt_us))
do_div(rate, tp->srtt_us);
- /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate
+ /* WRITE_ONCE() is needed because sch_fq fetches sk_pacing_rate
* without any lock. We want to make sure compiler wont store
* intermediate values in this location.
*/
- ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate,
- sk->sk_max_pacing_rate);
+ WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
+ sk->sk_max_pacing_rate));
}
/* Calculate rto without backoff. This is the second half of Van Jacobson's
@@ -862,60 +842,46 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
}
-/*
- * Packet counting of FACK is based on in-order assumptions, therefore TCP
- * disables it when reordering is detected
- */
-void tcp_disable_fack(struct tcp_sock *tp)
-{
- /* RFC3517 uses different metric in lost marker => reset on change */
- if (tcp_is_fack(tp))
- tp->lost_skb_hint = NULL;
- tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
-}
-
/* Take a notice that peer is sending D-SACKs */
static void tcp_dsack_seen(struct tcp_sock *tp)
{
tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
+ tp->rack.dsack_seen = 1;
}
-static void tcp_update_reordering(struct sock *sk, const int metric,
- const int ts)
+/* It's reordering when higher sequence was delivered (i.e. sacked) before
+ * some lower never-retransmitted sequence ("low_seq"). The maximum reordering
+ * distance is approximated in full-mss packet distance ("reordering").
+ */
+static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
+ const int ts)
{
struct tcp_sock *tp = tcp_sk(sk);
- int mib_idx;
+ const u32 mss = tp->mss_cache;
+ u32 fack, metric;
- if (WARN_ON_ONCE(metric < 0))
+ fack = tcp_highest_sack_seq(tp);
+ if (!before(low_seq, fack))
return;
- if (metric > tp->reordering) {
- tp->reordering = min(sysctl_tcp_max_reordering, metric);
-
+ metric = fack - low_seq;
+ if ((metric > tp->reordering * mss) && mss) {
#if FASTRETRANS_DEBUG > 1
pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
tp->reordering,
- tp->fackets_out,
+ 0,
tp->sacked_out,
tp->undo_marker ? tp->undo_retrans : 0);
#endif
- tcp_disable_fack(tp);
+ tp->reordering = min_t(u32, (metric + mss - 1) / mss,
+ sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
}
tp->rack.reord = 1;
-
/* This exciting event is worth to be remembered. 8) */
- if (ts)
- mib_idx = LINUX_MIB_TCPTSREORDER;
- else if (tcp_is_reno(tp))
- mib_idx = LINUX_MIB_TCPRENOREORDER;
- else if (tcp_is_fack(tp))
- mib_idx = LINUX_MIB_TCPFACKREORDER;
- else
- mib_idx = LINUX_MIB_TCPSACKREORDER;
-
- NET_INC_STATS(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk),
+ ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
}
/* This must be called before lost_out is incremented */
@@ -989,7 +955,6 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
* 3. Loss detection event of two flavors:
* A. Scoreboard estimator decided the packet is lost.
* A'. Reno "three dupacks" marks head of queue lost.
- * A''. Its FACK modification, head until snd.fack is lost.
* B. SACK arrives sacking SND.NXT at the moment, when the
* segment was retransmitted.
* 4. D-SACK added new rule: D-SACK changes any tag to S.
@@ -1132,8 +1097,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
}
struct tcp_sacktag_state {
- int reord;
- int fack_count;
+ u32 reord;
/* Timestamps for earliest and latest never-retransmitted segment
* that was SACKed. RTO needs the earliest RTT to stay conservative,
* but congestion control should still get an accurate delay signal.
@@ -1142,6 +1106,7 @@ struct tcp_sacktag_state {
u64 last_sackt;
struct rate_sample *rate;
int flag;
+ unsigned int mss_now;
};
/* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1191,7 +1156,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
if (pkt_len >= skb->len && !in_sack)
return 0;
- err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
+ err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+ pkt_len, mss, GFP_ATOMIC);
if (err < 0)
return err;
}
@@ -1207,15 +1173,15 @@ static u8 tcp_sacktag_one(struct sock *sk,
u64 xmit_time)
{
struct tcp_sock *tp = tcp_sk(sk);
- int fack_count = state->fack_count;
/* Account D-SACK for retransmitted packet. */
if (dup_sack && (sacked & TCPCB_RETRANS)) {
if (tp->undo_marker && tp->undo_retrans > 0 &&
after(end_seq, tp->undo_marker))
tp->undo_retrans--;
- if (sacked & TCPCB_SACKED_ACKED)
- state->reord = min(fack_count, state->reord);
+ if ((sacked & TCPCB_SACKED_ACKED) &&
+ before(start_seq, state->reord))
+ state->reord = start_seq;
}
/* Nothing to do; acked frame is about to be dropped (was ACKed). */
@@ -1241,9 +1207,10 @@ static u8 tcp_sacktag_one(struct sock *sk,
* which was in hole. It is reordering.
*/
if (before(start_seq,
- tcp_highest_sack_seq(tp)))
- state->reord = min(fack_count,
- state->reord);
+ tcp_highest_sack_seq(tp)) &&
+ before(start_seq, state->reord))
+ state->reord = start_seq;
+
if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED;
if (state->first_sackt == 0)
@@ -1262,15 +1229,10 @@ static u8 tcp_sacktag_one(struct sock *sk,
tp->sacked_out += pcount;
tp->delivered += pcount; /* Out-of-order packets delivered */
- fack_count += pcount;
-
/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
- if (!tcp_is_fack(tp) && tp->lost_skb_hint &&
+ if (tp->lost_skb_hint &&
before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
tp->lost_cnt_hint += pcount;
-
- if (fack_count > tp->fackets_out)
- tp->fackets_out = fack_count;
}
/* D-SACK. We can detect redundant retransmission in S|R and plain R
@@ -1288,13 +1250,13 @@ static u8 tcp_sacktag_one(struct sock *sk,
/* Shift newly-SACKed bytes from this skb to the immediately previous
* already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
*/
-static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
+static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
+ struct sk_buff *skb,
struct tcp_sacktag_state *state,
unsigned int pcount, int shifted, int mss,
bool dup_sack)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */
u32 end_seq = start_seq + shifted; /* end of newly-SACKed */
@@ -1363,8 +1325,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
- tcp_unlink_write_queue(skb, sk);
- sk_wmem_free_skb(sk, skb);
+ tcp_rtx_queue_unlink_and_free(skb, sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
@@ -1414,9 +1375,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
goto fallback;
/* Can only happen with delayed DSACK + discard craziness */
- if (unlikely(skb == tcp_write_queue_head(sk)))
+ prev = skb_rb_prev(skb);
+ if (!prev)
goto fallback;
- prev = tcp_write_queue_prev(sk, skb);
if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
goto fallback;
@@ -1495,18 +1456,17 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
if (!skb_shift(prev, skb, len))
goto fallback;
- if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
+ if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
goto out;
/* Hole filled allows collapsing with the next as well, this is very
* useful when hole on every nth skb pattern happens
*/
- if (prev == tcp_write_queue_tail(sk))
+ skb = skb_rb_next(prev);
+ if (!skb)
goto out;
- skb = tcp_write_queue_next(sk, prev);
if (!skb_can_shift(skb) ||
- (skb == tcp_send_head(sk)) ||
((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
(mss != tcp_skb_seglen(skb)))
goto out;
@@ -1514,11 +1474,11 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
len = skb->len;
if (skb_shift(prev, skb, len)) {
pcount += tcp_skb_pcount(skb);
- tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
+ tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
+ len, mss, 0);
}
out:
- state->fack_count += pcount;
return prev;
noop:
@@ -1538,13 +1498,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *tmp;
- tcp_for_write_queue_from(skb, sk) {
+ skb_rbtree_walk_from(skb) {
int in_sack = 0;
bool dup_sack = dup_sack_in;
- if (skb == tcp_send_head(sk))
- break;
-
/* queue is in-order => we can short-circuit the walk early */
if (!before(TCP_SKB_CB(skb)->seq, end_seq))
break;
@@ -1593,34 +1550,48 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
tcp_skb_pcount(skb),
skb->skb_mstamp);
tcp_rate_skb_delivered(sk, skb, state->rate);
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+ list_del_init(&skb->tcp_tsorted_anchor);
if (!before(TCP_SKB_CB(skb)->seq,
tcp_highest_sack_seq(tp)))
tcp_advance_highest_sack(sk, skb);
}
-
- state->fack_count += tcp_skb_pcount(skb);
}
return skb;
}
-/* Avoid all extra work that is being done by sacktag while walking in
- * a normal way
- */
+static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
+ struct tcp_sacktag_state *state,
+ u32 seq)
+{
+ struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
+ struct sk_buff *skb;
+
+ while (*p) {
+ parent = *p;
+ skb = rb_to_skb(parent);
+ if (before(seq, TCP_SKB_CB(skb)->seq)) {
+ p = &parent->rb_left;
+ continue;
+ }
+ if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
+ p = &parent->rb_right;
+ continue;
+ }
+ return skb;
+ }
+ return NULL;
+}
+
static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
struct tcp_sacktag_state *state,
u32 skip_to_seq)
{
- tcp_for_write_queue_from(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
-
- if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
- break;
+ if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
+ return skb;
- state->fack_count += tcp_skb_pcount(skb);
- }
- return skb;
+ return tcp_sacktag_bsearch(sk, state, skip_to_seq);
}
static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
@@ -1665,13 +1636,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
int first_sack_index;
state->flag = 0;
- state->reord = tp->packets_out;
+ state->reord = tp->snd_nxt;
- if (!tp->sacked_out) {
- if (WARN_ON(tp->fackets_out))
- tp->fackets_out = 0;
+ if (!tp->sacked_out)
tcp_highest_sack_reset(sk);
- }
found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
num_sacks, prior_snd_una);
@@ -1742,8 +1710,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
}
}
- skb = tcp_write_queue_head(sk);
- state->fack_count = 0;
+ state->mss_now = tcp_current_mss(sk);
+ skb = NULL;
i = 0;
if (!tp->sacked_out) {
@@ -1800,7 +1768,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
skb = tcp_highest_sack(sk);
if (!skb)
break;
- state->fack_count = tp->fackets_out;
cache++;
goto walk;
}
@@ -1815,7 +1782,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
skb = tcp_highest_sack(sk);
if (!skb)
break;
- state->fack_count = tp->fackets_out;
}
skb = tcp_sacktag_skip(skb, sk, state, start_seq);
@@ -1835,9 +1801,8 @@ advance_sp:
for (j = 0; j < used_sacks; j++)
tp->recv_sack_cache[i++] = sp[j];
- if ((state->reord < tp->fackets_out) &&
- ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
- tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
+ if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
+ tcp_check_sack_reordering(sk, state->reord, 0);
tcp_verify_left_out(tp);
out:
@@ -1875,8 +1840,13 @@ static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
static void tcp_check_reno_reordering(struct sock *sk, const int addend)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_limit_reno_sacked(tp))
- tcp_update_reordering(sk, tp->packets_out + addend, 0);
+
+ if (!tcp_limit_reno_sacked(tp))
+ return;
+
+ tp->reordering = min_t(u32, tp->packets_out + addend,
+ sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
}
/* Emulate SACKs for SACKless connection: account for a new dupack. */
@@ -1922,7 +1892,6 @@ void tcp_clear_retrans(struct tcp_sock *tp)
tp->lost_out = 0;
tp->undo_marker = 0;
tp->undo_retrans = -1;
- tp->fackets_out = 0;
tp->sacked_out = 0;
}
@@ -1952,6 +1921,7 @@ void tcp_enter_loss(struct sock *sk)
!after(tp->high_seq, tp->snd_una) ||
(icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(sk);
+ tp->prior_cwnd = tp->snd_cwnd;
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tcp_ca_event(sk, CA_EVENT_LOSS);
tcp_init_undo(tp);
@@ -1966,19 +1936,15 @@ void tcp_enter_loss(struct sock *sk)
if (tcp_is_reno(tp))
tcp_reset_reno_sack(tp);
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
if (is_reneg) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tp->sacked_out = 0;
- tp->fackets_out = 0;
}
tcp_clear_all_retrans_hints(tp);
- tcp_for_write_queue(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
-
+ skb_rbtree_walk_from(skb) {
mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
is_reneg);
if (mark_lost)
@@ -2012,7 +1978,7 @@ void tcp_enter_loss(struct sock *sk)
* falsely raise the receive window, which results in repeated
* timeouts and stop-and-go behavior.
*/
- tp->frto = sysctl_tcp_frto &&
+ tp->frto = net->ipv4.sysctl_tcp_frto &&
(new_recovery || icsk->icsk_retransmits) &&
!inet_csk(sk)->icsk_mtup.probe_size;
}
@@ -2041,19 +2007,10 @@ static bool tcp_check_sack_reneging(struct sock *sk, int flag)
return false;
}
-static inline int tcp_fackets_out(const struct tcp_sock *tp)
-{
- return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
-}
-
/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
* counter when SACK is enabled (without SACK, sacked_out is used for
* that purpose).
*
- * Instead, with FACK TCP uses fackets_out that includes both SACKed
- * segments up to the highest received SACK block so far and holes in
- * between them.
- *
* With reordering, holes may still be in flight, so RFC3517 recovery
* uses pure sacked_out (total number of SACKed segments) even though
* it violates the RFC that uses duplicate ACKs, often these are equal
@@ -2063,10 +2020,10 @@ static inline int tcp_fackets_out(const struct tcp_sock *tp)
*/
static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
{
- return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
+ return tp->sacked_out + 1;
}
-/* Linux NewReno/SACK/FACK/ECN state machine.
+/* Linux NewReno/SACK/ECN state machine.
* --------------------------------------
*
* "Open" Normal state, no dubious events, fast path.
@@ -2131,16 +2088,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
* dynamically measured and adjusted. This is implemented in
* tcp_rack_mark_lost.
*
- * FACK (Disabled by default. Subsumbed by RACK):
- * It is the simplest heuristics. As soon as we decided
- * that something is lost, we decide that _all_ not SACKed
- * packets until the most forward SACK are lost. I.e.
- * lost_out = fackets_out - sacked_out and left_out = fackets_out.
- * It is absolutely correct estimate, if network does not reorder
- * packets. And it loses any connection to reality when reordering
- * takes place. We use FACK by default until reordering
- * is suspected on the path to this destination.
- *
* If the receiver does not support SACK:
*
* NewReno (RFC6582): in Recovery we assume that one segment
@@ -2189,7 +2136,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
}
/* Detect loss in event "A" above by marking head of queue up as lost.
- * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
+ * For non-SACK(Reno) senders, the first "packets" number of segments
* are considered lost. For RFC3517 SACK, a segment is considered lost if it
* has at least tp->reordering SACKed seqments above it; "packets" refers to
* the maximum SACKed segments to pass before reaching this limit.
@@ -2204,20 +2151,18 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
WARN_ON(packets > tp->packets_out);
- if (tp->lost_skb_hint) {
- skb = tp->lost_skb_hint;
- cnt = tp->lost_cnt_hint;
+ skb = tp->lost_skb_hint;
+ if (skb) {
/* Head already handled? */
- if (mark_head && skb != tcp_write_queue_head(sk))
+ if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
return;
+ cnt = tp->lost_cnt_hint;
} else {
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
cnt = 0;
}
- tcp_for_write_queue_from(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
+ skb_rbtree_walk_from(skb) {
/* TODO: do this better */
/* this is not the most efficient way to do this... */
tp->lost_skb_hint = skb;
@@ -2227,12 +2172,12 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
break;
oldcnt = cnt;
- if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
+ if (tcp_is_reno(tp) ||
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
cnt += tcp_skb_pcount(skb);
if (cnt > packets) {
- if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
+ if (tcp_is_sack(tp) ||
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
(oldcnt >= packets))
break;
@@ -2241,7 +2186,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
/* If needed, chop off the prefix to mark as lost. */
lost = (packets - oldcnt) * mss;
if (lost < skb->len &&
- tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
+ tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+ lost, mss, GFP_ATOMIC) < 0)
break;
cnt = packets;
}
@@ -2262,11 +2208,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
if (tcp_is_reno(tp)) {
tcp_mark_head_lost(sk, 1, 1);
- } else if (tcp_is_fack(tp)) {
- int lost = tp->fackets_out - tp->reordering;
- if (lost <= 0)
- lost = 1;
- tcp_mark_head_lost(sk, lost, 0);
} else {
int sacked_upto = tp->sacked_out - tp->reordering;
if (sacked_upto >= 0)
@@ -2325,16 +2266,16 @@ static bool tcp_any_retrans_done(const struct sock *sk)
if (tp->retrans_out)
return true;
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
return true;
return false;
}
-#if FASTRETRANS_DEBUG > 1
static void DBGUNDO(struct sock *sk, const char *msg)
{
+#if FASTRETRANS_DEBUG > 1
struct tcp_sock *tp = tcp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
@@ -2356,10 +2297,8 @@ static void DBGUNDO(struct sock *sk, const char *msg)
tp->packets_out);
}
#endif
-}
-#else
-#define DBGUNDO(x...) do { } while (0)
#endif
+}
static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
{
@@ -2368,9 +2307,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
if (unmark_loss) {
struct sk_buff *skb;
- tcp_for_write_queue(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
+ skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
}
tp->lost_out = 0;
@@ -2415,6 +2352,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
mib_idx = LINUX_MIB_TCPFULLUNDO;
NET_INC_STATS(sock_net(sk), mib_idx);
+ } else if (tp->rack.reo_wnd_persist) {
+ tp->rack.reo_wnd_persist--;
}
if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
/* Hold old state until something *above* high_seq
@@ -2434,6 +2373,8 @@ static bool tcp_try_undo_dsack(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (tp->undo_marker && !tp->undo_retrans) {
+ tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
+ tp->rack.reo_wnd_persist + 1);
DBGUNDO(sk, "D-SACK");
tcp_undo_cwnd_reduction(sk, false);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
@@ -2613,11 +2554,8 @@ void tcp_simple_retransmit(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
unsigned int mss = tcp_current_mss(sk);
- u32 prior_lost = tp->lost_out;
- tcp_for_write_queue(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
+ skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
if (tcp_skb_seglen(skb) > mss &&
!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
@@ -2630,7 +2568,7 @@ void tcp_simple_retransmit(struct sock *sk)
tcp_clear_retrans_hints_partial(tp);
- if (prior_lost == tp->lost_out)
+ if (!tp->lost_out)
return;
if (tcp_is_reno(tp))
@@ -2711,7 +2649,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
* is updated in tcp_ack()). Otherwise fall back to
* the conventional recovery.
*/
- if (tcp_send_head(sk) &&
+ if (!tcp_write_queue_empty(sk) &&
after(tcp_wnd_end(tp), tp->snd_nxt)) {
*rexmit = REXMIT_NEW;
return;
@@ -2738,15 +2676,15 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
}
/* Undo during fast recovery after partial ACK. */
-static bool tcp_try_undo_partial(struct sock *sk, const int acked)
+static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tp->undo_marker && tcp_packet_delayed(tp)) {
/* Plain luck! Hole if filled with delayed
- * packet, rather than with a retransmit.
+ * packet, rather than with a retransmit. Check reordering.
*/
- tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
+ tcp_check_sack_reordering(sk, prior_snd_una, 1);
/* We are getting evidence that the reordering degree is higher
* than we realized. If there are no retransmits out then we
@@ -2773,7 +2711,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
struct tcp_sock *tp = tcp_sk(sk);
/* Use RACK to detect loss */
- if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
+ if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
u32 prior_retrans = tp->retrans_out;
tcp_rack_mark_lost(sk);
@@ -2782,6 +2720,14 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
}
}
+static bool tcp_force_fast_retransmit(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ return after(tcp_highest_sack_seq(tp),
+ tp->snd_una + tp->reordering * tp->mss_cache);
+}
+
/* Process an event, which can update packets-in-flight not trivially.
* Main goal of this function is to calculate new estimate for left_out,
* taking into account both packets sitting in receiver's buffer and
@@ -2794,19 +2740,17 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
* It does _not_ decide what to send, it is made in function
* tcp_xmit_retransmit_queue().
*/
-static void tcp_fastretrans_alert(struct sock *sk, const int acked,
+static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
bool is_dupack, int *ack_flag, int *rexmit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int fast_rexmit = 0, flag = *ack_flag;
bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
- (tcp_fackets_out(tp) > tp->reordering));
+ tcp_force_fast_retransmit(sk));
- if (WARN_ON(!tp->packets_out && tp->sacked_out))
+ if (!tp->packets_out && tp->sacked_out)
tp->sacked_out = 0;
- if (WARN_ON(!tp->sacked_out && tp->fackets_out))
- tp->fackets_out = 0;
/* Now state machine starts.
* A. ECE, hence prohibit cwnd undoing, the reduction is required. */
@@ -2853,11 +2797,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
if (tcp_is_reno(tp) && is_dupack)
tcp_add_reno_sack(sk);
} else {
- if (tcp_try_undo_partial(sk, acked))
+ if (tcp_try_undo_partial(sk, prior_snd_una))
return;
/* Partial ACK arrived. Force fast retransmit. */
do_lost = tcp_is_reno(tp) ||
- tcp_fackets_out(tp) > tp->reordering;
+ tcp_force_fast_retransmit(sk);
}
if (tcp_try_undo_dsack(sk)) {
tcp_try_keep_open(sk);
@@ -2872,6 +2816,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
(*ack_flag & FLAG_LOST_RETRANS)))
return;
/* Change state if cwnd is undone or retransmits are lost */
+ /* fall through */
default:
if (tcp_is_reno(tp)) {
if (flag & FLAG_SND_UNA_ADVANCED)
@@ -2912,8 +2857,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
{
+ u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
struct tcp_sock *tp = tcp_sk(sk);
- u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
rtt_us ? : jiffies_to_usecs(1));
@@ -3009,8 +2954,7 @@ void tcp_rearm_rto(struct sock *sk)
/* delta_us may not be positive if the socket is locked
* when the retrans timer fires and is rescheduled.
*/
- if (delta_us > 0)
- rto = usecs_to_jiffies(delta_us);
+ rto = usecs_to_jiffies(max_t(int, delta_us, 1));
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
TCP_RTO_MAX);
@@ -3056,28 +3000,31 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
shinfo = skb_shinfo(skb);
if (!before(shinfo->tskey, prior_snd_una) &&
- before(shinfo->tskey, tcp_sk(sk)->snd_una))
- __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+ before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
+ tcp_skb_tsorted_save(skb) {
+ __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+ } tcp_skb_tsorted_restore(skb);
+ }
}
/* Remove acknowledged frames from the retransmission queue. If our packet
* is before the ack sequence we can discard it as it's confirmed to have
* arrived at the other end.
*/
-static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
- u32 prior_snd_una, int *acked,
+static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
+ u32 prior_snd_una,
struct tcp_sacktag_state *sack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
u64 first_ackt, last_ackt;
struct tcp_sock *tp = tcp_sk(sk);
u32 prior_sacked = tp->sacked_out;
- u32 reord = tp->packets_out;
+ u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
+ struct sk_buff *skb, *next;
bool fully_acked = true;
long sack_rtt_us = -1L;
long seq_rtt_us = -1L;
long ca_rtt_us = -1L;
- struct sk_buff *skb;
u32 pkts_acked = 0;
u32 last_in_flight = 0;
bool rtt_update;
@@ -3085,8 +3032,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
first_ackt = 0;
- while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
+ for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ const u32 start_seq = scb->seq;
u8 sacked = scb->sacked;
u32 acked_pcount;
@@ -3103,8 +3051,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
break;
fully_acked = false;
} else {
- /* Speedup tcp_unlink_write_queue() and next loop */
- prefetchw(skb->next);
acked_pcount = tcp_skb_pcount(skb);
}
@@ -3119,7 +3065,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
first_ackt = last_ackt;
last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
- reord = min(pkts_acked, reord);
+ if (before(start_seq, reord))
+ reord = start_seq;
if (!after(scb->end_seq, tp->high_seq))
flag |= FLAG_ORIG_SACK_ACKED;
}
@@ -3156,12 +3103,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (!fully_acked)
break;
- tcp_unlink_write_queue(skb, sk);
- sk_wmem_free_skb(sk, skb);
+ next = skb_rb_next(skb);
if (unlikely(skb == tp->retransmit_skb_hint))
tp->retransmit_skb_hint = NULL;
if (unlikely(skb == tp->lost_skb_hint))
tp->lost_skb_hint = NULL;
+ tcp_rtx_queue_unlink_and_free(skb, sk);
}
if (!skb)
@@ -3197,16 +3144,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
int delta;
/* Non-retransmitted hole got filled? That's reordering */
- if (reord < prior_fackets && reord <= tp->fackets_out)
- tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+ if (before(reord, prior_fack))
+ tcp_check_sack_reordering(sk, reord, 0);
- delta = tcp_is_fack(tp) ? pkts_acked :
- prior_sacked - tp->sacked_out;
+ delta = prior_sacked - tp->sacked_out;
tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
}
-
- tp->fackets_out -= min(pkts_acked, tp->fackets_out);
-
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
@@ -3247,18 +3190,19 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
}
}
#endif
- *acked = pkts_acked;
return flag;
}
static void tcp_ack_probe(struct sock *sk)
{
- const struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct sk_buff *head = tcp_send_head(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
/* Was it a usable window open? */
-
- if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
+ if (!head)
+ return;
+ if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
icsk->icsk_backoff = 0;
inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
/* Socket must be waked up by subsequent tcp_data_snd_check().
@@ -3378,7 +3322,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
tp->pred_flags = 0;
tcp_fast_path_check(sk);
- if (tcp_send_head(sk))
+ if (!tcp_write_queue_empty(sk))
tcp_slow_start_after_idle_check(sk);
if (nwin > tp->max_window) {
@@ -3399,7 +3343,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
if (*last_oow_ack_time) {
s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
- if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+ if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
NET_INC_STATS(net, mib_idx);
return true; /* rate-limited: don't send yet! */
}
@@ -3435,10 +3379,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
static u32 challenge_timestamp;
static unsigned int challenge_count;
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
u32 count, now;
/* First check our per-socket dupack rate limit. */
- if (__tcp_oow_rate_limited(sock_net(sk),
+ if (__tcp_oow_rate_limited(net,
LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
&tp->last_oow_ack_time))
return;
@@ -3446,16 +3391,16 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
/* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ;
if (now != challenge_timestamp) {
- u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
+ u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
+ u32 half = (ack_limit + 1) >> 1;
challenge_timestamp = now;
- WRITE_ONCE(challenge_count, half +
- prandom_u32_max(sysctl_tcp_challenge_ack_limit));
+ WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
}
count = READ_ONCE(challenge_count);
if (count > 0) {
WRITE_ONCE(challenge_count, count - 1);
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+ NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
tcp_send_ack(sk);
}
}
@@ -3553,18 +3498,17 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
- u32 prior_fackets;
int prior_packets = tp->packets_out;
u32 delivered = tp->delivered;
u32 lost = tp->lost;
- int acked = 0; /* Number of packets newly acked */
int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+ u32 prior_fack;
sack_state.first_sackt = 0;
sack_state.rate = &rs;
- /* We very likely will need to access write queue head. */
- prefetchw(sk->sk_write_queue.next);
+ /* We very likely will need to access rtx queue. */
+ prefetch(sk->tcp_rtx_queue.rb_node);
/* If the ack is older than previous acks
* then we can probably ignore it.
@@ -3590,7 +3534,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
icsk->icsk_retransmits = 0;
}
- prior_fackets = tp->fackets_out;
+ prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
rs.prior_in_flight = tcp_packets_in_flight(tp);
/* ts_recent update must be made after we are sure that the packet
@@ -3646,8 +3590,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
goto no_queue;
/* See if we can take anything off of the retransmit queue. */
- flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
- &sack_state);
+ flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
+
+ tcp_rack_update_reo_wnd(sk, &rs);
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
@@ -3657,7 +3602,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ &rexmit);
}
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
@@ -3673,13 +3619,13 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ &rexmit);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
*/
- if (tcp_send_head(sk))
- tcp_ack_probe(sk);
+ tcp_ack_probe(sk);
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
@@ -3696,7 +3642,8 @@ old_ack:
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ &rexmit);
tcp_xmit_recovery(sk, rexmit);
}
@@ -3721,6 +3668,21 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
foc->exp = exp_opt;
}
+static void smc_parse_options(const struct tcphdr *th,
+ struct tcp_options_received *opt_rx,
+ const unsigned char *ptr,
+ int opsize)
+{
+#if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (th->syn && !(opsize & 1) &&
+ opsize >= TCPOLEN_EXP_SMC_BASE &&
+ get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
+ opt_rx->smc_ok = 1;
+ }
+#endif
+}
+
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when
* the fast version below fails.
@@ -3828,6 +3790,9 @@ void tcp_parse_options(const struct net *net,
tcp_parse_fastopen_option(opsize -
TCPOLEN_EXP_FASTOPEN_BASE,
ptr + 2, th->syn, foc, true);
+ else
+ smc_parse_options(th, opt_rx, ptr,
+ opsize);
break;
}
@@ -3995,6 +3960,8 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
/* When we get a reset we do this. */
void tcp_reset(struct sock *sk)
{
+ trace_tcp_receive_reset(sk);
+
/* We want the right error as BSD sees it (and indeed as we do). */
switch (sk->sk_state) {
case TCP_SYN_SENT:
@@ -4117,7 +4084,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
int mib_idx;
if (before(seq, tp->rcv_nxt))
@@ -4152,7 +4119,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk);
- if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -4271,6 +4238,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
/**
* tcp_try_coalesce - try to merge skb to prior one
* @sk: socket
+ * @dest: destination queue
* @to: prior buffer
* @from: buffer to add in queue
* @fragstolen: pointer to boolean
@@ -4303,6 +4271,12 @@ static bool tcp_try_coalesce(struct sock *sk,
TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
+
+ if (TCP_SKB_CB(from)->has_rxtstamp) {
+ TCP_SKB_CB(to)->has_rxtstamp = true;
+ to->tstamp = from->tstamp;
+ }
+
return true;
}
@@ -4325,7 +4299,7 @@ static void tcp_ofo_queue(struct sock *sk)
p = rb_first(&tp->out_of_order_queue);
while (p) {
- skb = rb_entry(p, struct sk_buff, rbnode);
+ skb = rb_to_skb(p);
if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
@@ -4389,7 +4363,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct rb_node **p, *q, *parent;
+ struct rb_node **p, *parent;
struct sk_buff *skb1;
u32 seq, end_seq;
bool fragstolen;
@@ -4429,7 +4403,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
/* In the typical case, we are adding an skb to the end of the list.
* Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
*/
- if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
+ if (tcp_try_coalesce(sk, tp->ooo_last_skb,
+ skb, &fragstolen)) {
coalesce_done:
tcp_grow_window(sk, skb);
kfree_skb_partial(skb, fragstolen);
@@ -4447,7 +4422,7 @@ coalesce_done:
parent = NULL;
while (*p) {
parent = *p;
- skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ skb1 = rb_to_skb(parent);
if (before(seq, TCP_SKB_CB(skb1)->seq)) {
p = &parent->rb_left;
continue;
@@ -4479,7 +4454,8 @@ coalesce_done:
__kfree_skb(skb1);
goto merge_right;
}
- } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
+ } else if (tcp_try_coalesce(sk, skb1,
+ skb, &fragstolen)) {
goto coalesce_done;
}
p = &parent->rb_right;
@@ -4491,9 +4467,7 @@ insert:
merge_right:
/* Remove other segments covered by skb. */
- while ((q = rb_next(&skb->rbnode)) != NULL) {
- skb1 = rb_entry(q, struct sk_buff, rbnode);
-
+ while ((skb1 = skb_rb_next(skb)) != NULL) {
if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
break;
if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
@@ -4508,7 +4482,7 @@ merge_right:
tcp_drop(sk, skb1);
}
/* If there is no skb after us, we are the last_skb ! */
- if (!q)
+ if (!skb1)
tp->ooo_last_skb = skb;
add_sack:
@@ -4530,7 +4504,8 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
__skb_pull(skb, hdrlen);
eaten = (tail &&
- tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
+ tcp_try_coalesce(sk, tail,
+ skb, fragstolen)) ? 1 : 0;
tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
if (!eaten) {
__skb_queue_tail(&sk->sk_receive_queue, skb);
@@ -4592,8 +4567,8 @@ err:
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- bool fragstolen = false;
- int eaten = -1;
+ bool fragstolen;
+ int eaten;
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
__kfree_skb(skb);
@@ -4615,32 +4590,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
goto out_of_window;
/* Ok. In sequence. In window. */
- if (tp->ucopy.task == current &&
- tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
- sock_owned_by_user(sk) && !tp->urg_data) {
- int chunk = min_t(unsigned int, skb->len,
- tp->ucopy.len);
-
- __set_current_state(TASK_RUNNING);
-
- if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
- tp->ucopy.len -= chunk;
- tp->copied_seq += chunk;
- eaten = (chunk == skb->len);
- tcp_rcv_space_adjust(sk);
- }
- }
-
- if (eaten <= 0) {
queue_and_out:
- if (eaten < 0) {
- if (skb_queue_len(&sk->sk_receive_queue) == 0)
- sk_forced_mem_schedule(sk, skb->truesize);
- else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
- goto drop;
- }
- eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
- }
+ if (skb_queue_len(&sk->sk_receive_queue) == 0)
+ sk_forced_mem_schedule(sk, skb->truesize);
+ else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+ goto drop;
+
+ eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
if (skb->len)
tcp_event_data_recv(sk, skb);
@@ -4712,7 +4668,7 @@ static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *li
if (list)
return !skb_queue_is_last(list, skb) ? skb->next : NULL;
- return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+ return skb_rb_next(skb);
}
static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
@@ -4733,7 +4689,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
}
/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
-static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -4741,7 +4697,7 @@ static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
while (*p) {
parent = *p;
- skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ skb1 = rb_to_skb(parent);
if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
p = &parent->rb_left;
else
@@ -4788,7 +4744,7 @@ restart:
* overlaps to the next one.
*/
if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
- (tcp_win_from_space(skb->truesize) > skb->len ||
+ (tcp_win_from_space(sk, skb->truesize) > skb->len ||
before(TCP_SKB_CB(skb)->seq, start))) {
end_of_skbs = false;
break;
@@ -4860,26 +4816,19 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb, *head;
- struct rb_node *p;
u32 start, end;
- p = rb_first(&tp->out_of_order_queue);
- skb = rb_entry_safe(p, struct sk_buff, rbnode);
+ skb = skb_rb_first(&tp->out_of_order_queue);
new_range:
if (!skb) {
- p = rb_last(&tp->out_of_order_queue);
- /* Note: This is possible p is NULL here. We do not
- * use rb_entry_safe(), as ooo_last_skb is valid only
- * if rbtree is not empty.
- */
- tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
+ tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
return;
}
start = TCP_SKB_CB(skb)->seq;
end = TCP_SKB_CB(skb)->end_seq;
for (head = skb;;) {
- skb = tcp_skb_next(skb, NULL);
+ skb = skb_rb_next(skb);
/* Range is terminated when we see a gap or when
* we are at the queue end.
@@ -4922,14 +4871,14 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
do {
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
- tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
+ tcp_drop(sk, rb_to_skb(node));
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
!tcp_under_memory_pressure(sk))
break;
node = prev;
} while (node);
- tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+ tp->ooo_last_skb = rb_to_skb(prev);
/* Reset SACK state. A conforming SACK implementation will
* do the same at a timeout based retransmit. When a connection
@@ -5104,7 +5053,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
- if (ptr && !sysctl_tcp_stdurg)
+ if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
ptr--;
ptr += ntohl(th->seq);
@@ -5190,26 +5139,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t
}
}
-static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- int chunk = skb->len - hlen;
- int err;
-
- if (skb_csum_unnecessary(skb))
- err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
- else
- err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg);
-
- if (!err) {
- tp->ucopy.len -= chunk;
- tp->copied_seq += chunk;
- tcp_rcv_space_adjust(sk);
- }
-
- return err;
-}
-
/* Accept RST for rcv_nxt - 1 after a FIN.
* When tcp connections are abruptly terminated from Mac OSX (via ^C), a
* FIN is sent followed by a RST packet. The RST is sent with the same
@@ -5362,8 +5291,9 @@ discard:
* tcp_data_queue when everything is OK.
*/
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct tcphdr *th, unsigned int len)
+ const struct tcphdr *th)
{
+ unsigned int len = skb->len;
struct tcp_sock *tp = tcp_sk(sk);
tcp_mstamp_refresh(tp);
@@ -5449,56 +5379,28 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
int eaten = 0;
bool fragstolen = false;
- if (tp->ucopy.task == current &&
- tp->copied_seq == tp->rcv_nxt &&
- len - tcp_header_len <= tp->ucopy.len &&
- sock_owned_by_user(sk)) {
- __set_current_state(TASK_RUNNING);
-
- if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
- /* Predicted packet is in window by definition.
- * seq == rcv_nxt and rcv_wup <= rcv_nxt.
- * Hence, check seq<=rcv_wup reduces to:
- */
- if (tcp_header_len ==
- (sizeof(struct tcphdr) +
- TCPOLEN_TSTAMP_ALIGNED) &&
- tp->rcv_nxt == tp->rcv_wup)
- tcp_store_ts_recent(tp);
-
- tcp_rcv_rtt_measure_ts(sk, skb);
-
- __skb_pull(skb, tcp_header_len);
- tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPHPHITSTOUSER);
- eaten = 1;
- }
- }
- if (!eaten) {
- if (tcp_checksum_complete(skb))
- goto csum_error;
+ if (tcp_checksum_complete(skb))
+ goto csum_error;
- if ((int)skb->truesize > sk->sk_forward_alloc)
- goto step5;
+ if ((int)skb->truesize > sk->sk_forward_alloc)
+ goto step5;
- /* Predicted packet is in window by definition.
- * seq == rcv_nxt and rcv_wup <= rcv_nxt.
- * Hence, check seq<=rcv_wup reduces to:
- */
- if (tcp_header_len ==
- (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
- tp->rcv_nxt == tp->rcv_wup)
- tcp_store_ts_recent(tp);
+ /* Predicted packet is in window by definition.
+ * seq == rcv_nxt and rcv_wup <= rcv_nxt.
+ * Hence, check seq<=rcv_wup reduces to:
+ */
+ if (tcp_header_len ==
+ (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
+ tp->rcv_nxt == tp->rcv_wup)
+ tcp_store_ts_recent(tp);
- tcp_rcv_rtt_measure_ts(sk, skb);
+ tcp_rcv_rtt_measure_ts(sk, skb);
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
- /* Bulk data transfer: receiver */
- eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
- &fragstolen);
- }
+ /* Bulk data transfer: receiver */
+ eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
+ &fragstolen);
tcp_event_data_recv(sk, skb);
@@ -5571,20 +5473,13 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
security_inet_conn_established(sk, skb);
}
- /* Make sure socket is routed, for correct metrics. */
- icsk->icsk_af_ops->rebuild_header(sk);
-
- tcp_init_metrics(sk);
- tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
- tcp_init_congestion_control(sk);
+ tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
/* Prevent spurious tcp_cwnd_restart() on first data
* packet.
*/
tp->lsndtime = tcp_jiffies32;
- tcp_init_buffer_space(sk);
-
if (sock_flag(sk, SOCK_KEEPOPEN))
inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
@@ -5592,14 +5487,13 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
__tcp_fast_path_on(tp, tp->snd_wnd);
else
tp->pred_flags = 0;
-
}
static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
struct tcp_fastopen_cookie *cookie)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
+ struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
bool syn_drop = false;
@@ -5634,9 +5528,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
if (data) { /* Retransmit unacked data in SYN */
- tcp_for_write_queue_from(data, sk) {
- if (data == tcp_send_head(sk) ||
- __tcp_retransmit_skb(sk, data, 1))
+ skb_rbtree_walk_from(data) {
+ if (__tcp_retransmit_skb(sk, data, 1))
break;
}
tcp_rearm_rto(sk);
@@ -5654,6 +5547,16 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
return false;
}
+static void smc_check_reset_syn(struct tcp_sock *tp)
+{
+#if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (tp->syn_smc && !tp->rx_opt.smc_ok)
+ tp->syn_smc = 0;
+ }
+#endif
+}
+
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th)
{
@@ -5749,10 +5652,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tp->tcp_header_len = sizeof(struct tcphdr);
}
- if (tcp_is_sack(tp) && sysctl_tcp_fack)
- tcp_enable_fack(tp);
-
- tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
@@ -5761,6 +5660,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* is initialized. */
tp->copied_seq = tp->rcv_nxt;
+ smc_check_reset_syn(tp);
+
smp_mb();
tcp_finish_connect(sk, skb);
@@ -5978,15 +5879,18 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (req) {
inet_csk(sk)->icsk_retransmits = 0;
reqsk_fastopen_remove(sk, req, false);
+ /* Re-arm the timer because data may have been sent out.
+ * This is similar to the regular data transmission case
+ * when new data has just been ack'ed.
+ *
+ * (TFO) - we could try to be more aggressive and
+ * retransmitting any data sooner based on when they
+ * are sent out.
+ */
+ tcp_rearm_rto(sk);
} else {
- /* Make sure socket is routed, for correct metrics. */
- icsk->icsk_af_ops->rebuild_header(sk);
- tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
- tcp_init_congestion_control(sk);
-
- tcp_mtup_init(sk);
+ tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
tp->copied_seq = tp->rcv_nxt;
- tcp_init_buffer_space(sk);
}
smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
@@ -6006,19 +5910,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
- if (req) {
- /* Re-arm the timer because data may have been sent out.
- * This is similar to the regular data transmission case
- * when new data has just been ack'ed.
- *
- * (TFO) - we could try to be more aggressive and
- * retransmitting any data sooner based on when they
- * are sent out.
- */
- tcp_rearm_rto(sk);
- } else
- tcp_init_metrics(sk);
-
if (!inet_csk(sk)->icsk_ca_ops->cong_control)
tcp_update_pacing_rate(sk);
@@ -6115,6 +6006,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
+ /* fall through */
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
/* RFC 793 says to queue data in these states,
@@ -6223,6 +6115,9 @@ static void tcp_openreq_init(struct request_sock *req,
ireq->ir_rmt_port = tcp_hdr(skb)->source;
ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
ireq->ir_mark = inet_request_mark(sk, skb);
+#if IS_ENABLED(CONFIG_SMC)
+ ireq->smc_ok = rx_opt->smc_ok;
+#endif
}
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
@@ -6235,8 +6130,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
if (req) {
struct inet_request_sock *ireq = inet_rsk(req);
- kmemcheck_annotate_bitfield(ireq, flags);
- ireq->opt = NULL;
+ ireq->ireq_opt = NULL;
#if IS_ENABLED(CONFIG_IPV6)
ireq->pktopts = NULL;
#endif
@@ -6307,9 +6201,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
struct sock *fastopen_sk = NULL;
- struct dst_entry *dst = NULL;
struct request_sock *req;
bool want_cookie = false;
+ struct dst_entry *dst;
struct flowi fl;
/* TW buckets are converted to open requests without
@@ -6359,6 +6253,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (tmp_opt.tstamp_ok)
tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
+ dst = af_ops->route_req(sk, &fl, req);
+ if (!dst)
+ goto drop_and_free;
+
if (!want_cookie && !isn) {
/* Kill the following clause, if you dislike this way. */
if (!net->ipv4.sysctl_tcp_syncookies &&
@@ -6379,11 +6277,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
isn = af_ops->init_seq(skb);
}
- if (!dst) {
- dst = af_ops->route_req(sk, &fl, req);
- if (!dst)
- goto drop_and_free;
- }
tcp_ecn_create_request(req, skb, sk, dst);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e9252c7df809..c6bc0c4d19c6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -85,7 +85,7 @@
#include <crypto/hash.h>
#include <linux/scatterlist.h>
-int sysctl_tcp_low_latency __read_mostly;
+#include <trace/events/tcp.h>
#ifdef CONFIG_TCP_MD5SIG
static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
@@ -385,7 +385,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
th->dest, iph->saddr, ntohs(th->source),
- inet_iif(icmp_skb));
+ inet_iif(icmp_skb), 0);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
@@ -482,7 +482,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
TCP_TIMEOUT_INIT;
icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
BUG_ON(!skb);
tcp_mstamp_refresh(tp);
@@ -661,7 +661,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
ip_hdr(skb)->saddr,
th->source, ip_hdr(skb)->daddr,
- ntohs(th->source), inet_iif(skb));
+ ntohs(th->source), inet_iif(skb),
+ tcp_v4_sdif(skb));
/* don't send rst if it can't find key */
if (!sk1)
goto out;
@@ -702,8 +703,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
* routing might fail in this case. No choice here, if we choose to force
* input interface, we will misroute in case of asymmetric route.
*/
- if (sk)
+ if (sk) {
arg.bound_dev_if = sk->sk_bound_dev_if;
+ trace_tcp_send_reset(sk, skb);
+ }
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
@@ -878,7 +881,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq->opt);
+ ireq_opt_deref(ireq));
err = net_xmit_eval(err);
}
@@ -890,7 +893,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
*/
static void tcp_v4_reqsk_destructor(struct request_sock *req)
{
- kfree(inet_rsk(req)->opt);
+ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
#ifdef CONFIG_TCP_MD5SIG
@@ -1266,10 +1269,11 @@ static void tcp_v4_init_req(struct request_sock *req,
struct sk_buff *skb)
{
struct inet_request_sock *ireq = inet_rsk(req);
+ struct net *net = sock_net(sk_listener);
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
- ireq->opt = tcp_v4_save_options(skb);
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
}
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
@@ -1356,10 +1360,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
sk_daddr_set(newsk, ireq->ir_rmt_addr);
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
newsk->sk_bound_dev_if = ireq->ir_iif;
- newinet->inet_saddr = ireq->ir_loc_addr;
- inet_opt = ireq->opt;
- rcu_assign_pointer(newinet->inet_opt, inet_opt);
- ireq->opt = NULL;
+ newinet->inet_saddr = ireq->ir_loc_addr;
+ inet_opt = rcu_dereference(ireq->ireq_opt);
+ RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->rcv_tos = ip_hdr(skb)->tos;
@@ -1404,9 +1407,12 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
- if (*own_req)
+ if (likely(*own_req)) {
tcp_move_syn(newtp, req);
-
+ ireq->ireq_opt = NULL;
+ } else {
+ newinet->inet_opt = NULL;
+ }
return newsk;
exit_overflow:
@@ -1417,6 +1423,7 @@ exit:
tcp_listendrop(sk);
return NULL;
put_and_exit:
+ newinet->inet_opt = NULL;
inet_csk_prepare_forced_close(newsk);
tcp_done(newsk);
goto exit;
@@ -1458,7 +1465,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
sk->sk_rx_dst = NULL;
}
}
- tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
+ tcp_rcv_established(sk, skb, tcp_hdr(skb));
return 0;
}
@@ -1504,28 +1511,28 @@ csum_err:
}
EXPORT_SYMBOL(tcp_v4_do_rcv);
-void tcp_v4_early_demux(struct sk_buff *skb)
+int tcp_v4_early_demux(struct sk_buff *skb)
{
const struct iphdr *iph;
const struct tcphdr *th;
struct sock *sk;
if (skb->pkt_type != PACKET_HOST)
- return;
+ return 0;
if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
- return;
+ return 0;
iph = ip_hdr(skb);
th = tcp_hdr(skb);
if (th->doff < sizeof(struct tcphdr) / 4)
- return;
+ return 0;
sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
iph->saddr, th->source,
iph->daddr, ntohs(th->dest),
- skb->skb_iif);
+ skb->skb_iif, inet_sdif(skb));
if (sk) {
skb->sk = sk;
skb->destructor = sock_edemux;
@@ -1539,63 +1546,9 @@ void tcp_v4_early_demux(struct sk_buff *skb)
skb_dst_set_noref(skb, dst);
}
}
+ return 0;
}
-/* Packet is added to VJ-style prequeue for processing in process
- * context, if a reader task is waiting. Apparently, this exciting
- * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
- * failed somewhere. Latency? Burstiness? Well, at least now we will
- * see, why it failed. 8)8) --ANK
- *
- */
-bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (sysctl_tcp_low_latency || !tp->ucopy.task)
- return false;
-
- if (skb->len <= tcp_hdrlen(skb) &&
- skb_queue_len(&tp->ucopy.prequeue) == 0)
- return false;
-
- /* Before escaping RCU protected region, we need to take care of skb
- * dst. Prequeue is only enabled for established sockets.
- * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
- * Instead of doing full sk_rx_dst validity here, let's perform
- * an optimistic check.
- */
- if (likely(sk->sk_rx_dst))
- skb_dst_drop(skb);
- else
- skb_dst_force_safe(skb);
-
- __skb_queue_tail(&tp->ucopy.prequeue, skb);
- tp->ucopy.memory += skb->truesize;
- if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
- tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
- struct sk_buff *skb1;
-
- BUG_ON(sock_owned_by_user(sk));
- __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
- skb_queue_len(&tp->ucopy.prequeue));
-
- while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
- sk_backlog_rcv(sk, skb1);
-
- tp->ucopy.memory = 0;
- } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
- wake_up_interruptible_sync_poll(sk_sleep(sk),
- POLLIN | POLLRDNORM | POLLRDBAND);
- if (!inet_csk_ack_scheduled(sk))
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
- (3 * tcp_rto_min(sk)) / 4,
- TCP_RTO_MAX);
- }
- return true;
-}
-EXPORT_SYMBOL(tcp_prequeue);
-
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
{
u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
@@ -1645,6 +1598,7 @@ EXPORT_SYMBOL(tcp_filter);
int tcp_v4_rcv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
+ int sdif = inet_sdif(skb);
const struct iphdr *iph;
const struct tcphdr *th;
bool refcounted;
@@ -1692,10 +1646,12 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;
+ TCP_SKB_CB(skb)->has_rxtstamp =
+ skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
lookup:
sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
- th->dest, &refcounted);
+ th->dest, sdif, &refcounted);
if (!sk)
goto no_tcp_socket;
@@ -1722,9 +1678,9 @@ process:
*/
sock_hold(sk);
refcounted = true;
- if (tcp_filter(sk, skb))
- goto discard_and_relse;
- nsk = tcp_check_req(sk, skb, req, false);
+ nsk = NULL;
+ if (!tcp_filter(sk, skb))
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
goto discard_and_relse;
@@ -1770,8 +1726,7 @@ process:
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
- if (!tcp_prequeue(sk, skb))
- ret = tcp_v4_do_rcv(sk, skb);
+ ret = tcp_v4_do_rcv(sk, skb);
} else if (tcp_add_backlog(sk, skb)) {
goto discard_and_relse;
}
@@ -1824,15 +1779,17 @@ do_time_wait:
__tcp_hdrlen(th),
iph->saddr, th->source,
iph->daddr, th->dest,
- inet_iif(skb));
+ inet_iif(skb),
+ sdif);
if (sk2) {
inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
refcounted = false;
goto process;
}
- /* Fall through to ACK */
}
+ /* to ACK */
+ /* fall through */
case TCP_TW_ACK:
tcp_v4_timewait_ack(sk, skb);
break;
@@ -1912,6 +1869,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ trace_tcp_destroy_sock(sk);
+
tcp_clear_xmit_timers(sk);
tcp_cleanup_congestion_control(sk);
@@ -1936,9 +1895,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
}
#endif
- /* Clean prequeue, it must be empty really */
- __skb_queue_purge(&tp->ucopy.prequeue);
-
/* Clean up a referenced TCP bind bucket. */
if (inet_csk(sk)->icsk_bind_hash)
inet_put_port(sk);
@@ -1947,6 +1903,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
/* If socket is aborted during connect operation */
tcp_free_fastopen_req(tp);
+ tcp_fastopen_destroy_cipher(sk);
tcp_saved_syn_free(tp);
sk_sockets_allocated_dec(sk);
@@ -2452,8 +2409,8 @@ struct proto tcp_prot = {
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem,
- .sysctl_wmem = sysctl_tcp_wmem,
- .sysctl_rmem = sysctl_tcp_rmem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
@@ -2473,6 +2430,8 @@ static void __net_exit tcp_sk_exit(struct net *net)
{
int cpu;
+ module_put(net->ipv4.tcp_congestion_control->owner);
+
for_each_possible_cpu(cpu)
inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
free_percpu(net->ipv4.tcp_sk);
@@ -2527,6 +2486,50 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_sack = 1;
net->ipv4.sysctl_tcp_window_scaling = 1;
net->ipv4.sysctl_tcp_timestamps = 1;
+ net->ipv4.sysctl_tcp_early_retrans = 3;
+ net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
+ net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */
+ net->ipv4.sysctl_tcp_retrans_collapse = 1;
+ net->ipv4.sysctl_tcp_max_reordering = 300;
+ net->ipv4.sysctl_tcp_dsack = 1;
+ net->ipv4.sysctl_tcp_app_win = 31;
+ net->ipv4.sysctl_tcp_adv_win_scale = 1;
+ net->ipv4.sysctl_tcp_frto = 2;
+ net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
+ /* This limits the percentage of the congestion window which we
+ * will allow a single TSO frame to consume. Building TSO frames
+ * which are too large can cause TCP streams to be bursty.
+ */
+ net->ipv4.sysctl_tcp_tso_win_divisor = 3;
+ /* Default TSQ limit of four TSO segments */
+ net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
+ /* rfc5961 challenge ack rate limiting */
+ net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
+ net->ipv4.sysctl_tcp_min_tso_segs = 2;
+ net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
+ net->ipv4.sysctl_tcp_autocorking = 1;
+ net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
+ net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
+ net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
+ if (net != &init_net) {
+ memcpy(net->ipv4.sysctl_tcp_rmem,
+ init_net.ipv4.sysctl_tcp_rmem,
+ sizeof(init_net.ipv4.sysctl_tcp_rmem));
+ memcpy(net->ipv4.sysctl_tcp_wmem,
+ init_net.ipv4.sysctl_tcp_wmem,
+ sizeof(init_net.ipv4.sysctl_tcp_wmem));
+ }
+ net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
+ spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
+ net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
+ atomic_set(&net->ipv4.tfo_active_disable_times, 0);
+
+ /* Reno is always built in */
+ if (!net_eq(net, &init_net) &&
+ try_module_get(init_net.ipv4.tcp_congestion_control->owner))
+ net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
+ else
+ net->ipv4.tcp_congestion_control = &tcp_reno;
return 0;
fail:
@@ -2537,7 +2540,12 @@ fail:
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
+ struct net *net;
+
inet_twsk_purge(&tcp_hashinfo, AF_INET);
+
+ list_for_each_entry(net, net_exit_list, exit_list)
+ tcp_fastopen_ctx_destroy(net);
}
static struct pernet_operations __net_initdata tcp_sk_ops = {
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 102b2c90bb80..7097f92d16e5 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/rcupdate.h>
#include <linux/spinlock.h>
#include <linux/jiffies.h>
@@ -20,8 +21,6 @@
#include <net/tcp.h>
#include <net/genetlink.h>
-int sysctl_tcp_nometrics_save __read_mostly;
-
static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
const struct inetpeer_addr *daddr,
struct net *net, unsigned int hash);
@@ -330,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
int m;
sk_dst_confirm(sk);
- if (sysctl_tcp_nometrics_save || !dst)
+ if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
return;
rcu_read_lock();
@@ -471,10 +470,8 @@ void tcp_init_metrics(struct sock *sk)
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
}
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
- if (val && tp->reordering != val) {
- tcp_disable_fack(tp);
+ if (val && tp->reordering != val)
tp->reordering = val;
- }
crtt = tcp_metric_get(tm, TCP_METRIC_RTT);
rcu_read_unlock();
@@ -892,10 +889,14 @@ static void tcp_metrics_flush_all(struct net *net)
for (row = 0; row < max_rows; row++, hb++) {
struct tcp_metrics_block __rcu **pp;
+ bool match;
+
spin_lock_bh(&tcp_metrics_lock);
pp = &hb->chain;
for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
- if (net_eq(tm_net(tm), net)) {
+ match = net ? net_eq(tm_net(tm), net) :
+ !atomic_read(&tm_net(tm)->count);
+ if (match) {
*pp = tm->tcpm_next;
kfree_rcu(tm, rcu_head);
} else {
@@ -1018,14 +1019,14 @@ static int __net_init tcp_net_metrics_init(struct net *net)
return 0;
}
-static void __net_exit tcp_net_metrics_exit(struct net *net)
+static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_list)
{
- tcp_metrics_flush_all(net);
+ tcp_metrics_flush_all(NULL);
}
static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
- .init = tcp_net_metrics_init,
- .exit = tcp_net_metrics_exit,
+ .init = tcp_net_metrics_init,
+ .exit_batch = tcp_net_metrics_exit_batch,
};
void __init tcp_metrics_init(void)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0ff83c1637d8..e36eff0403f4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -23,13 +23,12 @@
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/workqueue.h>
+#include <linux/static_key.h>
#include <net/tcp.h>
#include <net/inet_common.h>
#include <net/xfrm.h>
#include <net/busy_poll.h>
-int sysctl_tcp_abort_on_overflow __read_mostly;
-
static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
{
if (seq == s_win)
@@ -180,7 +179,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
* Oh well... nobody has a sufficient solution to this
* protocol bug yet.
*/
- if (sysctl_tcp_rfc1337 == 0) {
+ if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
kill:
inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS;
@@ -298,8 +297,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
key = tp->af_specific->md5_lookup(sk, sk);
if (key) {
tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
- if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool())
- BUG();
+ BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
}
} while (0);
#endif
@@ -371,7 +369,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
full_space = rcv_wnd * mss;
/* tcp_full_space because it is guaranteed to be the first packet */
- tcp_select_initial_window(full_space,
+ tcp_select_initial_window(sk_listener, full_space,
mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
&req->rsk_rcv_wnd,
&req->rsk_window_clamp,
@@ -417,6 +415,21 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
}
EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
+static void smc_check_reset_syn_req(struct tcp_sock *oldtp,
+ struct request_sock *req,
+ struct tcp_sock *newtp)
+{
+#if IS_ENABLED(CONFIG_SMC)
+ struct inet_request_sock *ireq;
+
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ ireq = inet_rsk(req);
+ if (oldtp->syn_smc && !ireq->smc_ok)
+ newtp->syn_smc = 0;
+ }
+#endif
+}
+
/* This is not only more efficient than what we used to do, it eliminates
* a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
*
@@ -434,6 +447,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
struct tcp_request_sock *treq = tcp_rsk(req);
struct inet_connection_sock *newicsk = inet_csk(newsk);
struct tcp_sock *newtp = tcp_sk(newsk);
+ struct tcp_sock *oldtp = tcp_sk(sk);
+
+ smc_check_reset_syn_req(oldtp, req, newtp);
/* Now setup tcp_sock */
newtp->pred_flags = 0;
@@ -445,8 +461,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->snd_sml = newtp->snd_una =
newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
- tcp_prequeue_init(newtp);
INIT_LIST_HEAD(&newtp->tsq_node);
+ INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
tcp_init_wl(newtp, treq->rcv_isn);
@@ -459,7 +475,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->packets_out = 0;
newtp->retrans_out = 0;
newtp->sacked_out = 0;
- newtp->fackets_out = 0;
newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
newtp->tlp_high_seq = 0;
newtp->lsndtime = tcp_jiffies32;
@@ -493,10 +508,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
keepalive_time_when(newtp));
newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
- if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
- if (sysctl_tcp_fack)
- tcp_enable_fack(newtp);
- }
+ newtp->rx_opt.sack_ok = ireq->sack_ok;
newtp->window_clamp = req->rsk_window_clamp;
newtp->rcv_ssthresh = req->rsk_rcv_wnd;
newtp->rcv_wnd = req->rsk_rcv_wnd;
@@ -535,6 +547,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->syn_data_acked = 0;
newtp->rack.mstamp = 0;
newtp->rack.advanced = 0;
+ newtp->rack.reo_wnd_steps = 1;
+ newtp->rack.last_delivered = 0;
+ newtp->rack.reo_wnd_persist = 0;
+ newtp->rack.dsack_seen = 0;
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
}
@@ -765,7 +781,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
return inet_csk_complete_hashdance(sk, child, req, own_req);
listen_overflow:
- if (!sysctl_tcp_abort_on_overflow) {
+ if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
inet_rsk(req)->acked = 1;
return NULL;
}
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 6d650ed3cb59..0b5a05bd82e3 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -39,7 +39,7 @@
* nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected
* nv_ssthresh_factor On congestion set ssthresh to this * <desired cwnd> / 8
* nv_rtt_factor RTT averaging factor
- * nv_loss_dec_factor Decrease cwnd by this (50%) when losses occur
+ * nv_loss_dec_factor Decrease cwnd to this (80%) when losses occur
* nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd
* nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd
* nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping
@@ -61,7 +61,7 @@ static int nv_min_cwnd __read_mostly = 2;
static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */
static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */
static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */
-static int nv_loss_dec_factor __read_mostly = 512; /* => 50% */
+static int nv_loss_dec_factor __read_mostly = 819; /* => 80% */
static int nv_cwnd_growth_rate_neg __read_mostly = 8;
static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */
static int nv_dec_eval_min_calls __read_mostly = 60;
@@ -86,7 +86,6 @@ struct tcpnv {
* < 0 => less than 1 packet/RTT */
u8 available8;
u16 available16;
- u32 loss_cwnd; /* cwnd at last loss */
u8 nv_allow_cwnd_growth:1, /* whether cwnd can grow */
nv_reset:1, /* whether to reset values */
nv_catchup:1; /* whether we are growing because
@@ -102,6 +101,11 @@ struct tcpnv {
u32 nv_last_rtt; /* last rtt */
u32 nv_min_rtt; /* active min rtt. Used to determine slope */
u32 nv_min_rtt_new; /* min rtt for future use */
+ u32 nv_base_rtt; /* If non-zero it represents the threshold for
+ * congestion */
+ u32 nv_lower_bound_rtt; /* Used in conjunction with nv_base_rtt. It is
+ * set to 80% of nv_base_rtt. It helps reduce
+ * unfairness between flows */
u32 nv_rtt_max_rate; /* max rate seen during current RTT */
u32 nv_rtt_start_seq; /* current RTT ends when packet arrives
* acking beyond nv_rtt_start_seq */
@@ -121,7 +125,6 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
ca->nv_reset = 0;
- ca->loss_cwnd = 0;
ca->nv_no_cong_cnt = 0;
ca->nv_rtt_cnt = 0;
ca->nv_last_rtt = 0;
@@ -134,9 +137,24 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk)
static void tcpnv_init(struct sock *sk)
{
struct tcpnv *ca = inet_csk_ca(sk);
+ int base_rtt;
tcpnv_reset(ca, sk);
+ /* See if base_rtt is available from socket_ops bpf program.
+ * It is meant to be used in environments, such as communication
+ * within a datacenter, where we have reasonable estimates of
+ * RTTs
+ */
+ base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT);
+ if (base_rtt > 0) {
+ ca->nv_base_rtt = base_rtt;
+ ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */
+ } else {
+ ca->nv_base_rtt = 0;
+ ca->nv_lower_bound_rtt = 0;
+ }
+
ca->nv_allow_cwnd_growth = 1;
ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ;
ca->nv_min_rtt = NV_INIT_RTT;
@@ -146,6 +164,19 @@ static void tcpnv_init(struct sock *sk)
ca->cwnd_growth_factor = 0;
}
+/* If provided, apply upper (base_rtt) and lower (lower_bound_rtt)
+ * bounds to RTT.
+ */
+inline u32 nv_get_bounded_rtt(struct tcpnv *ca, u32 val)
+{
+ if (ca->nv_lower_bound_rtt > 0 && val < ca->nv_lower_bound_rtt)
+ return ca->nv_lower_bound_rtt;
+ else if (ca->nv_base_rtt > 0 && val > ca->nv_base_rtt)
+ return ca->nv_base_rtt;
+ else
+ return val;
+}
+
static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -177,19 +208,10 @@ static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
static u32 tcpnv_recalc_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- struct tcpnv *ca = inet_csk_ca(sk);
- ca->loss_cwnd = tp->snd_cwnd;
return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U);
}
-static u32 tcpnv_undo_cwnd(struct sock *sk)
-{
- struct tcpnv *ca = inet_csk_ca(sk);
-
- return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
static void tcpnv_state(struct sock *sk, u8 new_state)
{
struct tcpnv *ca = inet_csk_ca(sk);
@@ -220,7 +242,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
struct tcp_sock *tp = tcp_sk(sk);
struct tcpnv *ca = inet_csk_ca(sk);
unsigned long now = jiffies;
- s64 rate64 = 0;
+ u64 rate64;
u32 rate, max_win, cwnd_by_slope;
u32 avg_rtt;
u32 bytes_acked = 0;
@@ -262,8 +284,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
}
/* rate in 100's bits per second */
- rate64 = ((u64)sample->in_flight) * 8000000;
- rate = (u32)div64_u64(rate64, (u64)(avg_rtt * 100));
+ rate64 = ((u64)sample->in_flight) * 80000;
+ do_div(rate64, avg_rtt ?: 1);
+ rate = (u32)rate64;
/* Remember the maximum rate seen during this RTT
* Note: It may be more than one RTT. This function should be
@@ -276,6 +299,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
if (ca->nv_eval_call_cnt < 255)
ca->nv_eval_call_cnt++;
+ /* Apply bounds to rtt. Only used to update min_rtt */
+ avg_rtt = nv_get_bounded_rtt(ca, avg_rtt);
+
/* update min rtt if necessary */
if (avg_rtt < ca->nv_min_rtt)
ca->nv_min_rtt = avg_rtt;
@@ -446,7 +472,7 @@ static struct tcp_congestion_ops tcpnv __read_mostly = {
.ssthresh = tcpnv_recalc_ssthresh,
.cong_avoid = tcpnv_cong_avoid,
.set_state = tcpnv_state,
- .undo_cwnd = tcpnv_undo_cwnd,
+ .undo_cwnd = tcp_reno_undo_cwnd,
.pkts_acked = tcpnv_acked,
.get_info = tcpnv_get_info,
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 11f69bbf9307..b6a2aa1dcf56 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -149,11 +149,19 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
* is freed by GSO engine
*/
if (copy_destructor) {
+ int delta;
+
swap(gso_skb->sk, skb->sk);
swap(gso_skb->destructor, skb->destructor);
sum_truesize += skb->truesize;
- refcount_add(sum_truesize - gso_skb->truesize,
- &skb->sk->sk_wmem_alloc);
+ delta = sum_truesize - gso_skb->truesize;
+ /* In some pathological cases, delta can be negative.
+ * We need to either use refcount_add() or refcount_sub_and_test()
+ */
+ if (likely(delta >= 0))
+ refcount_add(delta, &skb->sk->sk_wmem_alloc);
+ else
+ WARN_ON_ONCE(refcount_sub_and_test(-delta, &skb->sk->sk_wmem_alloc));
}
delta = htonl(oldlen + (skb_tail_pointer(skb) -
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b7661a68d498..540b7d92cc70 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -41,40 +41,25 @@
#include <linux/compiler.h>
#include <linux/gfp.h>
#include <linux/module.h>
+#include <linux/static_key.h>
-/* People can turn this off for buggy TCP's found in printers etc. */
-int sysctl_tcp_retrans_collapse __read_mostly = 1;
-
-/* People can turn this on to work with those rare, broken TCPs that
- * interpret the window field as a signed quantity.
- */
-int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
-
-/* Default TSQ limit of four TSO segments */
-int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
-
-/* This limits the percentage of the congestion window which we
- * will allow a single TSO frame to consume. Building TSO frames
- * which are too large can cause TCP streams to be bursty.
- */
-int sysctl_tcp_tso_win_divisor __read_mostly = 3;
-
-/* By default, RFC2861 behavior. */
-int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
+#include <trace/events/tcp.h>
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp);
/* Account for new data that has been sent to the network. */
-static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
+static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
unsigned int prior_packets = tp->packets_out;
- tcp_advance_send_head(sk, skb);
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+ __skb_unlink(skb, &sk->sk_write_queue);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
+
tp->packets_out += tcp_skb_pcount(skb);
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
@@ -203,7 +188,7 @@ u32 tcp_default_init_rwnd(u32 mss)
* be a multiple of mss if possible. We assume here that mss >= 1.
* This MUST be enforced by all callers.
*/
-void tcp_select_initial_window(int __space, __u32 mss,
+void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
__u32 *rcv_wnd, __u32 *window_clamp,
int wscale_ok, __u8 *rcv_wscale,
__u32 init_rcv_wnd)
@@ -227,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
* which we interpret as a sign the remote TCP is not
* misinterpreting the window field as a signed quantity.
*/
- if (sysctl_tcp_workaround_signed_windows)
+ if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
(*rcv_wnd) = space;
@@ -235,7 +220,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
(*rcv_wscale) = 0;
if (wscale_ok) {
/* Set window scaling on max possible window */
- space = max_t(u32, space, sysctl_tcp_rmem[2]);
+ space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
@@ -287,7 +272,8 @@ static u16 tcp_select_window(struct sock *sk)
/* Make sure we do not exceed the maximum possible
* scaled window.
*/
- if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
+ if (!tp->rx_opt.rcv_wscale &&
+ sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
@@ -395,7 +381,6 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
{
skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum = 0;
TCP_SKB_CB(skb)->tcp_flags = flags;
TCP_SKB_CB(skb)->sacked = 0;
@@ -418,6 +403,22 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
#define OPTION_MD5 (1 << 2)
#define OPTION_WSCALE (1 << 3)
#define OPTION_FAST_OPEN_COOKIE (1 << 8)
+#define OPTION_SMC (1 << 9)
+
+static void smc_options_write(__be32 *ptr, u16 *options)
+{
+#if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (unlikely(OPTION_SMC & *options)) {
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_EXP << 8) |
+ (TCPOLEN_EXP_SMC_BASE));
+ *ptr++ = htonl(TCPOPT_SMC_MAGIC);
+ }
+ }
+#endif
+}
struct tcp_out_options {
u16 options; /* bit field of OPTION_* */
@@ -536,6 +537,41 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
}
ptr += (len + 3) >> 2;
}
+
+ smc_options_write(ptr, &options);
+}
+
+static void smc_set_option(const struct tcp_sock *tp,
+ struct tcp_out_options *opts,
+ unsigned int *remaining)
+{
+#if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (tp->syn_smc) {
+ if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+ opts->options |= OPTION_SMC;
+ *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
+ }
+ }
+ }
+#endif
+}
+
+static void smc_set_option_cond(const struct tcp_sock *tp,
+ const struct inet_request_sock *ireq,
+ struct tcp_out_options *opts,
+ unsigned int *remaining)
+{
+#if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (tp->syn_smc && ireq->smc_ok) {
+ if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+ opts->options |= OPTION_SMC;
+ *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
+ }
+ }
+ }
+#endif
}
/* Compute TCP options for SYN packets. This is not the final
@@ -603,11 +639,14 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
}
}
+ smc_set_option(tp, opts, &remaining);
+
return MAX_TCP_OPTION_SPACE - remaining;
}
/* Set up TCP options for SYN-ACKs. */
-static unsigned int tcp_synack_options(struct request_sock *req,
+static unsigned int tcp_synack_options(const struct sock *sk,
+ struct request_sock *req,
unsigned int mss, struct sk_buff *skb,
struct tcp_out_options *opts,
const struct tcp_md5sig_key *md5,
@@ -663,6 +702,8 @@ static unsigned int tcp_synack_options(struct request_sock *req,
}
}
+ smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
+
return MAX_TCP_OPTION_SPACE - remaining;
}
@@ -739,8 +780,10 @@ static void tcp_tsq_handler(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (tp->lost_out > tp->retrans_out &&
- tp->snd_cwnd > tcp_packets_in_flight(tp))
+ tp->snd_cwnd > tcp_packets_in_flight(tp)) {
+ tcp_mstamp_refresh(tp);
tcp_xmit_retransmit_queue(sk);
+ }
tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
0, GFP_ATOMIC);
@@ -971,6 +1014,12 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
HRTIMER_MODE_ABS_PINNED);
}
+static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
+{
+ skb->skb_mstamp = tp->tcp_mstamp;
+ list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
+}
+
/* This routine actually transmits TCP packets queued in by
* tcp_do_sendmsg(). This is used by both the initial
* transmission and possible later retransmissions.
@@ -991,6 +1040,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
struct tcp_skb_cb *tcb;
struct tcp_out_options opts;
unsigned int tcp_options_size, tcp_header_size;
+ struct sk_buff *oskb = NULL;
struct tcp_md5sig_key *md5;
struct tcphdr *th;
int err;
@@ -998,19 +1048,22 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
BUG_ON(!skb || !tcp_skb_pcount(skb));
tp = tcp_sk(sk);
- skb->skb_mstamp = tp->tcp_mstamp;
if (clone_it) {
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una;
- tcp_rate_skb_sent(sk, skb);
+ oskb = skb;
+
+ tcp_skb_tsorted_save(oskb) {
+ if (unlikely(skb_cloned(oskb)))
+ skb = pskb_copy(oskb, gfp_mask);
+ else
+ skb = skb_clone(oskb, gfp_mask);
+ } tcp_skb_tsorted_restore(oskb);
- if (unlikely(skb_cloned(skb)))
- skb = pskb_copy(skb, gfp_mask);
- else
- skb = skb_clone(skb, gfp_mask);
if (unlikely(!skb))
return -ENOBUFS;
}
+ skb->skb_mstamp = tp->tcp_mstamp;
inet = inet_sk(sk);
tcb = TCP_SKB_CB(skb);
@@ -1122,12 +1175,15 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
- if (likely(err <= 0))
- return err;
-
- tcp_enter_cwr(sk);
-
- return net_xmit_eval(err);
+ if (unlikely(err > 0)) {
+ tcp_enter_cwr(sk);
+ err = net_xmit_eval(err);
+ }
+ if (!err && oskb) {
+ tcp_update_skb_after_send(tp, oskb);
+ tcp_rate_skb_sent(sk, oskb);
+ }
+ return err;
}
/* This routine just queues the buffer for sending.
@@ -1162,21 +1218,6 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
}
}
-/* When a modification to fackets out becomes necessary, we need to check
- * skb is counted to fackets_out or not.
- */
-static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
- int decr)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (!tp->sacked_out || tcp_is_reno(tp))
- return;
-
- if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
- tp->fackets_out -= decr;
-}
-
/* Pcount in the middle of the write queue got changed, we need to do various
* tweaks to fix counters
*/
@@ -1197,11 +1238,9 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
if (tcp_is_reno(tp) && decr > 0)
tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
- tcp_adjust_fackets_out(sk, skb, decr);
-
if (tp->lost_skb_hint &&
before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
- (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
+ (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
tp->lost_cnt_hint -= decr;
tcp_verify_left_out(tp);
@@ -1236,12 +1275,25 @@ static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
TCP_SKB_CB(skb)->eor = 0;
}
+/* Insert buff after skb on the write or rtx queue of sk. */
+static void tcp_insert_write_queue_after(struct sk_buff *skb,
+ struct sk_buff *buff,
+ struct sock *sk,
+ enum tcp_queue tcp_queue)
+{
+ if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
+ __skb_queue_after(&sk->sk_write_queue, skb, buff);
+ else
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
+}
+
/* Function to create two new TCP segments. Shrinks the given segment
* to the specified size and appends a new segment with the rest of the
* packet to the list. This won't be called frequently, I hope.
* Remember, these are still headerless SKBs at this point.
*/
-int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
+int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+ struct sk_buff *skb, u32 len,
unsigned int mss_now, gfp_t gfp)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -1324,7 +1376,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
/* Link BUFF into the send queue. */
__skb_header_release(buff);
- tcp_insert_write_queue_after(skb, buff, sk);
+ tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
+ if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE)
+ list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
return 0;
}
@@ -1602,7 +1656,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
- if (sysctl_tcp_slow_start_after_idle &&
+ if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
!ca_ops->cong_control)
tcp_cwnd_application_limited(sk);
@@ -1611,10 +1665,10 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
* is caused by insufficient sender buffer:
* 1) just sent some data (see tcp_write_xmit)
* 2) not cwnd limited (this else condition)
- * 3) no more data to send (null tcp_send_head )
+ * 3) no more data to send (tcp_write_queue_empty())
* 4) application is hitting buffer limit (SOCK_NOSPACE)
*/
- if (!tcp_send_head(sk) && sk->sk_socket &&
+ if (tcp_write_queue_empty(sk) && sk->sk_socket &&
test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
(1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
@@ -1666,7 +1720,7 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
{
u32 bytes, segs;
- bytes = min(sk->sk_pacing_rate >> 10,
+ bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
/* Goal is to send at least one packet per ms,
@@ -1689,7 +1743,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
return tso_segs ? :
- tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs);
+ tcp_tso_autosize(sk, mss_now,
+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
}
/* Returns the portion of skb which can be sent right away */
@@ -1803,40 +1858,6 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
return !after(end_seq, tcp_wnd_end(tp));
}
-/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
- * should be put on the wire right now. If so, it returns the number of
- * packets allowed by the congestion window.
- */
-static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
- unsigned int cur_mss, int nonagle)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- unsigned int cwnd_quota;
-
- tcp_init_tso_segs(skb, cur_mss);
-
- if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
- return 0;
-
- cwnd_quota = tcp_cwnd_test(tp, skb);
- if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
- cwnd_quota = 0;
-
- return cwnd_quota;
-}
-
-/* Test if sending is allowed right now. */
-bool tcp_may_send_now(struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb = tcp_send_head(sk);
-
- return skb &&
- tcp_snd_test(sk, skb, tcp_current_mss(sk),
- (tcp_skb_is_last(sk, skb) ?
- tp->nonagle : TCP_NAGLE_PUSH));
-}
-
/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
* which is put after SKB on the list. It is very much like
* tcp_fragment() except that it may make several kinds of assumptions
@@ -1844,7 +1865,8 @@ bool tcp_may_send_now(struct sock *sk)
* know that all the data is in scatter-gather pages, and that the
* packet has never been sent out before (and thus is not cloned).
*/
-static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
+static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+ struct sk_buff *skb, unsigned int len,
unsigned int mss_now, gfp_t gfp)
{
struct sk_buff *buff;
@@ -1853,7 +1875,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* All of a TSO frame must be composed of paged data. */
if (skb->len != skb->data_len)
- return tcp_fragment(sk, skb, len, mss_now, gfp);
+ return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
buff = sk_stream_alloc_skb(sk, 0, gfp, true);
if (unlikely(!buff))
@@ -1889,7 +1911,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* Link BUFF into the send queue. */
__skb_header_release(buff);
- tcp_insert_write_queue_after(skb, buff, sk);
+ tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
return 0;
}
@@ -1939,7 +1961,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
goto send_now;
- win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
+ win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
if (win_divisor) {
u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
@@ -1959,8 +1981,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
goto send_now;
}
- head = tcp_write_queue_head(sk);
-
+ /* TODO : use tsorted_sent_queue ? */
+ head = tcp_rtx_queue_head(sk);
+ if (!head)
+ goto send_now;
age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
/* If next ACK is likely to come too late (half srtt), do not defer */
if (age < (tp->srtt_us >> 4))
@@ -2091,6 +2115,7 @@ static int tcp_mtu_probe(struct sock *sk)
nskb->ip_summed = skb->ip_summed;
tcp_insert_write_queue_before(nskb, skb, sk);
+ tcp_highest_sack_replace(sk, skb, nskb);
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
@@ -2173,18 +2198,18 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
{
unsigned int limit;
- limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
- limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
+ limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift);
+ limit = min_t(u32, limit,
+ sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
limit <<= factor;
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
- /* Always send the 1st or 2nd skb in write queue.
+ /* Always send skb if rtx queue is empty.
* No need to wait for TX completion to call us back,
* after softirq/tasklet schedule.
* This helps when TX completions are delayed too much.
*/
- if (skb == sk->sk_write_queue.next ||
- skb->prev == sk->sk_write_queue.next)
+ if (tcp_rtx_queue_empty(sk))
return false;
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
@@ -2235,7 +2260,7 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
* it's the "most interesting" or current chrono we are
* tracking and starts busy chrono if we have pending data.
*/
- if (tcp_write_queue_empty(sk))
+ if (tcp_rtx_and_write_queues_empty(sk))
tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
else if (type == tp->chrono_type)
tcp_chrono_set(tp, TCP_CHRONO_BUSY);
@@ -2268,6 +2293,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
sent_pkts = 0;
+ tcp_mstamp_refresh(tp);
if (!push_one) {
/* Do MTU probing. */
result = tcp_mtu_probe(sk);
@@ -2279,7 +2305,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
}
max_segs = tcp_tso_segs(sk, mss_now);
- tcp_mstamp_refresh(tp);
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
@@ -2291,7 +2316,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
/* "skb_mstamp" is used as a start point for the retransmit timer */
- skb->skb_mstamp = tp->tcp_mstamp;
+ tcp_update_skb_after_send(tp, skb);
goto repair; /* Skip network transmission */
}
@@ -2330,7 +2355,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
nonagle);
if (skb->len > limit &&
- unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
+ unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+ skb, limit, mss_now, gfp)))
break;
if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
@@ -2370,15 +2396,15 @@ repair:
tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
}
- return !tp->packets_out && tcp_send_head(sk);
+ return !tp->packets_out && !tcp_write_queue_empty(sk);
}
bool tcp_schedule_loss_probe(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
u32 timeout, rto_delta_us;
+ int early_retrans;
/* Don't do any loss probe on a Fast Open connection before 3WHS
* finishes.
@@ -2386,27 +2412,32 @@ bool tcp_schedule_loss_probe(struct sock *sk)
if (tp->fastopen_rsk)
return false;
+ early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
/* Schedule a loss probe in 2*RTT for SACK capable connections
* in Open state, that are either limited by cwnd or application.
*/
- if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) ||
+ if ((early_retrans != 3 && early_retrans != 4) ||
!tp->packets_out || !tcp_is_sack(tp) ||
icsk->icsk_ca_state != TCP_CA_Open)
return false;
if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
- tcp_send_head(sk))
+ !tcp_write_queue_empty(sk))
return false;
- /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
+ /* Probe timeout is 2*rtt. Add minimum RTO to account
* for delayed ack when there's one outstanding packet. If no RTT
* sample is available then probe after TCP_TIMEOUT_INIT.
*/
- timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
- if (tp->packets_out == 1)
- timeout = max_t(u32, timeout,
- (rtt + (rtt >> 1) + TCP_DELACK_MAX));
- timeout = max_t(u32, timeout, msecs_to_jiffies(10));
+ if (tp->srtt_us) {
+ timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+ if (tp->packets_out == 1)
+ timeout += TCP_RTO_MIN;
+ else
+ timeout += TCP_TIMEOUT_MIN;
+ } else {
+ timeout = TCP_TIMEOUT_INIT;
+ }
/* If the RTO formula yields an earlier time, then use that time. */
rto_delta_us = tcp_rto_delta_us(sk); /* How far in future is RTO? */
@@ -2444,18 +2475,14 @@ void tcp_send_loss_probe(struct sock *sk)
int mss = tcp_current_mss(sk);
skb = tcp_send_head(sk);
- if (skb) {
- if (tcp_snd_wnd_test(tp, skb, mss)) {
- pcount = tp->packets_out;
- tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
- if (tp->packets_out > pcount)
- goto probe_sent;
- goto rearm_timer;
- }
- skb = tcp_write_queue_prev(sk, skb);
- } else {
- skb = tcp_write_queue_tail(sk);
+ if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
+ pcount = tp->packets_out;
+ tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
+ if (tp->packets_out > pcount)
+ goto probe_sent;
+ goto rearm_timer;
}
+ skb = skb_rb_last(&sk->tcp_rtx_queue);
/* At most one outstanding TLP retransmission. */
if (tp->tlp_high_seq)
@@ -2473,10 +2500,11 @@ void tcp_send_loss_probe(struct sock *sk)
goto rearm_timer;
if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
- if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
+ if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+ (pcount - 1) * mss, mss,
GFP_ATOMIC)))
goto rearm_timer;
- skb = tcp_write_queue_next(sk, skb);
+ skb = skb_rb_next(skb);
}
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
@@ -2676,7 +2704,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
+ struct sk_buff *next_skb = skb_rb_next(skb);
int skb_size, next_skb_size;
skb_size = skb->len;
@@ -2691,9 +2719,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
else if (!skb_shift(skb, next_skb, next_skb_size))
return false;
}
- tcp_highest_sack_combine(sk, next_skb, skb);
-
- tcp_unlink_write_queue(next_skb, sk);
+ tcp_highest_sack_replace(sk, next_skb, skb);
if (next_skb->ip_summed == CHECKSUM_PARTIAL)
skb->ip_summed = CHECKSUM_PARTIAL;
@@ -2722,7 +2748,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
tcp_skb_collapse_tstamp(skb, next_skb);
- sk_wmem_free_skb(sk, next_skb);
+ tcp_rtx_queue_unlink_and_free(next_skb, sk);
return true;
}
@@ -2733,8 +2759,6 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
return false;
if (skb_cloned(skb))
return false;
- if (skb == tcp_send_head(sk))
- return false;
/* Some heuristics for collapsing over SACK'd could be invented */
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
return false;
@@ -2752,12 +2776,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
struct sk_buff *skb = to, *tmp;
bool first = true;
- if (!sysctl_tcp_retrans_collapse)
+ if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
return;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
- tcp_for_write_queue_from_safe(skb, tmp, sk) {
+ skb_rbtree_walk_from_safe(skb, tmp) {
if (!tcp_can_collapse(sk, skb))
break;
@@ -2832,7 +2856,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
len = cur_mss * segs;
if (skb->len > len) {
- if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC))
+ if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
+ cur_mss, GFP_ATOMIC))
return -ENOMEM; /* We'll try again later. */
} else {
if (skb_unclone(skb, GFP_ATOMIC))
@@ -2866,16 +2891,23 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
skb_headroom(skb) >= 0xFFFF)) {
struct sk_buff *nskb;
- skb->skb_mstamp = tp->tcp_mstamp;
- nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
- err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
- -ENOBUFS;
+ tcp_skb_tsorted_save(skb) {
+ nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
+ err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+ -ENOBUFS;
+ } tcp_skb_tsorted_restore(skb);
+
+ if (!err) {
+ tcp_update_skb_after_send(tp, skb);
+ tcp_rate_skb_sent(sk, skb);
+ }
} else {
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
if (likely(!err)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+ trace_tcp_retransmit_skb(sk, skb);
} else if (err != -EBUSY) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
}
@@ -2912,36 +2944,25 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
* retransmitted data is acknowledged. It tries to continue
* resending the rest of the retransmit queue, until either
* we've sent it all or the congestion window limit is reached.
- * If doing SACK, the first ACK which comes back for a timeout
- * based retransmit packet might feed us FACK information again.
- * If so, we use it to avoid unnecessarily retransmissions.
*/
void tcp_xmit_retransmit_queue(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct sk_buff *skb, *rtx_head, *hole = NULL;
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
- struct sk_buff *hole = NULL;
u32 max_segs;
int mib_idx;
if (!tp->packets_out)
return;
- if (tp->retransmit_skb_hint) {
- skb = tp->retransmit_skb_hint;
- } else {
- skb = tcp_write_queue_head(sk);
- }
-
+ rtx_head = tcp_rtx_queue_head(sk);
+ skb = tp->retransmit_skb_hint ?: rtx_head;
max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
- tcp_for_write_queue_from(skb, sk) {
+ skb_rbtree_walk_from(skb) {
__u8 sacked;
int segs;
- if (skb == tcp_send_head(sk))
- break;
-
if (tcp_pacing_check(sk))
break;
@@ -2986,7 +3007,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (tcp_in_cwnd_reduction(sk))
tp->prr_out += tcp_skb_pcount(skb);
- if (skb == tcp_write_queue_head(sk) &&
+ if (skb == rtx_head &&
icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto,
@@ -3028,12 +3049,15 @@ void tcp_send_fin(struct sock *sk)
* Note: in the latter case, FIN packet will be sent after a timeout,
* as TCP stack thinks it has already been transmitted.
*/
- if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) {
+ if (!tskb && tcp_under_memory_pressure(sk))
+ tskb = skb_rb_last(&sk->tcp_rtx_queue);
+
+ if (tskb) {
coalesce:
TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
TCP_SKB_CB(tskb)->end_seq++;
tp->write_seq++;
- if (!tcp_send_head(sk)) {
+ if (tcp_write_queue_empty(sk)) {
/* This means tskb was already sent.
* Pretend we included the FIN on previous transmit.
* We need to set tp->snd_nxt to the value it would have
@@ -3050,6 +3074,7 @@ coalesce:
goto coalesce;
return;
}
+ INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
skb_reserve(skb, MAX_TCP_HEADER);
sk_forced_mem_schedule(sk, skb->truesize);
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
@@ -3086,6 +3111,11 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
/* Send it off. */
if (tcp_transmit_skb(sk, skb, 0, priority))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
+
+ /* skb of trace_tcp_send_reset() keeps the skb that caused RST,
+ * skb here is different to the troublesome skb, so use NULL
+ */
+ trace_tcp_send_reset(sk, NULL);
}
/* Send a crossed SYN-ACK during socket establishment.
@@ -3098,20 +3128,24 @@ int tcp_send_synack(struct sock *sk)
{
struct sk_buff *skb;
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
- pr_debug("%s: wrong queue state\n", __func__);
+ pr_err("%s: wrong queue state\n", __func__);
return -EFAULT;
}
if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
if (skb_cloned(skb)) {
- struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+ struct sk_buff *nskb;
+
+ tcp_skb_tsorted_save(skb) {
+ nskb = skb_copy(skb, GFP_ATOMIC);
+ } tcp_skb_tsorted_restore(skb);
if (!nskb)
return -ENOMEM;
- tcp_unlink_write_queue(skb, sk);
+ INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
+ tcp_rtx_queue_unlink_and_free(skb, sk);
__skb_header_release(nskb);
- __tcp_add_write_queue_head(sk, nskb);
- sk_wmem_free_skb(sk, skb);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
sk->sk_wmem_queued += nskb->truesize;
sk_mem_charge(sk, nskb->truesize);
skb = nskb;
@@ -3188,8 +3222,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
#endif
skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
- tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
- sizeof(*th);
+ tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
+ foc) + sizeof(*th);
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
@@ -3202,13 +3236,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
th->source = htons(ireq->ir_num);
th->dest = ireq->ir_rmt_port;
skb->mark = ireq->ir_mark;
- /* Setting of flags are superfluous here for callers (and ECE is
- * not even correctly set)
- */
- tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
- TCPHDR_SYN | TCPHDR_ACK);
-
- th->seq = htonl(TCP_SKB_CB(skb)->seq);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ th->seq = htonl(tcp_rsk(req)->snt_isn);
/* XXX data is queued and acked as is. No buffer/window check */
th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
@@ -3295,7 +3324,7 @@ static void tcp_connect_init(struct sock *sk)
if (rcv_wnd == 0)
rcv_wnd = dst_metric(dst, RTAX_INITRWND);
- tcp_select_initial_window(tcp_full_space(sk),
+ tcp_select_initial_window(sk, tcp_full_space(sk),
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
@@ -3334,7 +3363,6 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
tcb->end_seq += skb->len;
__skb_header_release(skb);
- __tcp_add_write_queue_tail(sk, skb);
sk->sk_wmem_queued += skb->truesize;
sk_mem_charge(sk, skb->truesize);
tp->write_seq = tcb->end_seq;
@@ -3382,6 +3410,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
int copied = copy_from_iter(skb_put(syn_data, space), space,
&fo->data->msg_iter);
if (unlikely(!copied)) {
+ tcp_skb_tsorted_anchor_cleanup(syn_data);
kfree_skb(syn_data);
goto fallback;
}
@@ -3412,10 +3441,15 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
if (!err) {
tp->syn_data = (fo->copied > 0);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
goto done;
}
+ /* data was not sent, put it in write_queue */
+ __skb_queue_tail(&sk->sk_write_queue, syn_data);
+ tp->packets_out -= tcp_skb_pcount(syn_data);
+
fallback:
/* Send a regular SYN with Fast Open cookie request option */
if (fo->cookie.len > 0)
@@ -3456,6 +3490,7 @@ int tcp_connect(struct sock *sk)
tp->retrans_stamp = tcp_time_stamp(tp);
tcp_connect_queue_skb(sk, buff);
tcp_ecn_send_syn(sk, buff);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
/* Send off SYN; include data in Fast Open. */
err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
@@ -3468,6 +3503,11 @@ int tcp_connect(struct sock *sk)
*/
tp->snd_nxt = tp->write_seq;
tp->pushed_seq = tp->write_seq;
+ buff = tcp_send_head(sk);
+ if (unlikely(buff)) {
+ tp->snd_nxt = TCP_SKB_CB(buff)->seq;
+ tp->pushed_seq = TCP_SKB_CB(buff)->seq;
+ }
TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
/* Timer for repeating the SYN until an answer. */
@@ -3645,7 +3685,8 @@ int tcp_write_wakeup(struct sock *sk, int mib)
skb->len > mss) {
seg_size = min(seg_size, mss);
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
- if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
+ if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+ skb, seg_size, mss, GFP_ATOMIC))
return -1;
} else if (!tcp_skb_pcount(skb))
tcp_set_skb_tso_segs(skb, mss);
@@ -3675,7 +3716,7 @@ void tcp_send_probe0(struct sock *sk)
err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
- if (tp->packets_out || !tcp_send_head(sk)) {
+ if (tp->packets_out || tcp_write_queue_empty(sk)) {
/* Cancel probe timer, if it is not required. */
icsk->icsk_probes_out = 0;
icsk->icsk_backoff = 0;
@@ -3716,6 +3757,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
if (unlikely(tcp_passive_fastopen(sk)))
tcp_sk(sk)->total_retrans++;
+ trace_tcp_retransmit_synack(sk, req);
}
return res;
}
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index f6c50af24a64..697f4c67b2e3 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -105,8 +105,9 @@ static inline int tcp_probe_avail(void)
* Note: arguments must match tcp_rcv_established()!
*/
static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct tcphdr *th, unsigned int len)
+ const struct tcphdr *th)
{
+ unsigned int len = skb->len;
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
@@ -145,7 +146,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
BUG();
}
- p->length = skb->len;
+ p->length = len;
p->snd_nxt = tp->snd_nxt;
p->snd_una = tp->snd_una;
p->snd_cwnd = tp->snd_cwnd;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index fe9a493d0208..d3ea89020c69 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/tcp.h>
#include <net/tcp.h>
-int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOSS_DETECTION;
-
static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -45,7 +44,8 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
+ u32 min_rtt = tcp_min_rtt(tp);
+ struct sk_buff *skb, *n;
u32 reo_wnd;
*reo_timeout = 0;
@@ -55,48 +55,36 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
* to queuing or delayed ACKs.
*/
reo_wnd = 1000;
- if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U)
- reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd);
+ if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) {
+ reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd);
+ reo_wnd = min(reo_wnd, tp->srtt_us >> 3);
+ }
- tcp_for_write_queue(skb, sk) {
+ list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue,
+ tcp_tsorted_anchor) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ s32 remaining;
- if (skb == tcp_send_head(sk))
- break;
-
- /* Skip ones already (s)acked */
- if (!after(scb->end_seq, tp->snd_una) ||
- scb->sacked & TCPCB_SACKED_ACKED)
+ /* Skip ones marked lost but not yet retransmitted */
+ if ((scb->sacked & TCPCB_LOST) &&
+ !(scb->sacked & TCPCB_SACKED_RETRANS))
continue;
- if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
- tp->rack.end_seq, scb->end_seq)) {
- /* Step 3 in draft-cheng-tcpm-rack-00.txt:
- * A packet is lost if its elapsed time is beyond
- * the recent RTT plus the reordering window.
- */
- u32 elapsed = tcp_stamp_us_delta(tp->tcp_mstamp,
- skb->skb_mstamp);
- s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed;
-
- if (remaining < 0) {
- tcp_rack_mark_skb_lost(sk, skb);
- continue;
- }
-
- /* Skip ones marked lost but not yet retransmitted */
- if ((scb->sacked & TCPCB_LOST) &&
- !(scb->sacked & TCPCB_SACKED_RETRANS))
- continue;
+ if (!tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
+ tp->rack.end_seq, scb->end_seq))
+ break;
+ /* A packet is lost if it has not been s/acked beyond
+ * the recent RTT plus the reordering window.
+ */
+ remaining = tp->rack.rtt_us + reo_wnd -
+ tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
+ if (remaining < 0) {
+ tcp_rack_mark_skb_lost(sk, skb);
+ list_del_init(&skb->tcp_tsorted_anchor);
+ } else {
/* Record maximum wait time (+1 to avoid 0) */
*reo_timeout = max_t(u32, *reo_timeout, 1 + remaining);
-
- } else if (!(scb->sacked & TCPCB_RETRANS)) {
- /* Original data are sent sequentially so stop early
- * b/c the rest are all sent after rack_sent
- */
- break;
}
}
}
@@ -113,7 +101,7 @@ void tcp_rack_mark_lost(struct sock *sk)
tp->rack.advanced = 0;
tcp_rack_detect_loss(sk, &timeout);
if (timeout) {
- timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN);
+ timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
timeout, inet_csk(sk)->icsk_rto);
}
@@ -175,3 +163,44 @@ void tcp_rack_reo_timeout(struct sock *sk)
if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS)
tcp_rearm_rto(sk);
}
+
+/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
+ *
+ * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded
+ * by srtt), since there is possibility that spurious retransmission was
+ * due to reordering delay longer than reo_wnd.
+ *
+ * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16)
+ * no. of successful recoveries (accounts for full DSACK-based loss
+ * recovery undo). After that, reset it to default (min_rtt/4).
+ *
+ * At max, reo_wnd is incremented only once per rtt. So that the new
+ * DSACK on which we are reacting, is due to the spurious retx (approx)
+ * after the reo_wnd has been updated last time.
+ *
+ * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than
+ * absolute value to account for change in rtt.
+ */
+void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
+ !rs->prior_delivered)
+ return;
+
+ /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */
+ if (before(rs->prior_delivered, tp->rack.last_delivered))
+ tp->rack.dsack_seen = 0;
+
+ /* Adjust the reo_wnd if update is pending */
+ if (tp->rack.dsack_seen) {
+ tp->rack.reo_wnd_steps = min_t(u32, 0xFF,
+ tp->rack.reo_wnd_steps + 1);
+ tp->rack.dsack_seen = 0;
+ tp->rack.last_delivered = tp->delivered;
+ tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH;
+ } else if (!tp->rack.reo_wnd_persist) {
+ tp->rack.reo_wnd_steps = 1;
+ }
+}
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index f2123075ce6e..addc122f8818 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,10 +15,6 @@
#define TCP_SCALABLE_AI_CNT 50U
#define TCP_SCALABLE_MD_SCALE 3
-struct scalable {
- u32 loss_cwnd;
-};
-
static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -36,23 +32,13 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
static u32 tcp_scalable_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- struct scalable *ca = inet_csk_ca(sk);
-
- ca->loss_cwnd = tp->snd_cwnd;
return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
}
-static u32 tcp_scalable_cwnd_undo(struct sock *sk)
-{
- const struct scalable *ca = inet_csk_ca(sk);
-
- return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
static struct tcp_congestion_ops tcp_scalable __read_mostly = {
.ssthresh = tcp_scalable_ssthresh,
- .undo_cwnd = tcp_scalable_cwnd_undo,
+ .undo_cwnd = tcp_reno_undo_cwnd,
.cong_avoid = tcp_scalable_cong_avoid,
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index e906014890b6..16df6dd44b98 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,8 +22,6 @@
#include <linux/gfp.h>
#include <net/tcp.h>
-int sysctl_tcp_thin_linear_timeouts __read_mostly;
-
/**
* tcp_write_err() - close socket and save error info
* @sk: The socket the error has appeared on.
@@ -109,26 +107,23 @@ static int tcp_orphan_retries(struct sock *sk, bool alive)
static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
{
- struct net *net = sock_net(sk);
+ const struct net *net = sock_net(sk);
+ int mss;
/* Black hole detection */
- if (net->ipv4.sysctl_tcp_mtu_probing) {
- if (!icsk->icsk_mtup.enabled) {
- icsk->icsk_mtup.enabled = 1;
- icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
- tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
- } else {
- struct net *net = sock_net(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- int mss;
-
- mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
- mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
- mss = max(mss, 68 - tp->tcp_header_len);
- icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
- tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
- }
+ if (!net->ipv4.sysctl_tcp_mtu_probing)
+ return;
+
+ if (!icsk->icsk_mtup.enabled) {
+ icsk->icsk_mtup.enabled = 1;
+ icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
+ } else {
+ mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
+ mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
+ mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len);
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
}
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
@@ -156,8 +151,13 @@ static bool retransmits_timed_out(struct sock *sk,
return false;
start_ts = tcp_sk(sk)->retrans_stamp;
- if (unlikely(!start_ts))
- start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk));
+ if (unlikely(!start_ts)) {
+ struct sk_buff *head = tcp_rtx_queue_head(sk);
+
+ if (!head)
+ return false;
+ start_ts = tcp_skb_timestamp(head);
+ }
if (likely(timeout == 0)) {
linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
@@ -239,7 +239,6 @@ static int tcp_write_timeout(struct sock *sk)
/* Called with BH disabled */
void tcp_delack_timer_handler(struct sock *sk)
{
- struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
sk_mem_reclaim_partial(sk);
@@ -254,17 +253,6 @@ void tcp_delack_timer_handler(struct sock *sk)
}
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
- if (!skb_queue_empty(&tp->ucopy.prequeue)) {
- struct sk_buff *skb;
-
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
-
- while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
- sk_backlog_rcv(sk, skb);
-
- tp->ucopy.memory = 0;
- }
-
if (inet_csk_ack_scheduled(sk)) {
if (!icsk->icsk_ack.pingpong) {
/* Delayed ACK missed: inflate ATO. */
@@ -295,15 +283,17 @@ out:
*
* Returns: Nothing (void)
*/
-static void tcp_delack_timer(unsigned long data)
+static void tcp_delack_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
+ struct inet_connection_sock *icsk =
+ from_timer(icsk, t, icsk_delack_timer);
+ struct sock *sk = &icsk->icsk_inet.sk;
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
tcp_delack_timer_handler(sk);
} else {
- inet_csk(sk)->icsk_ack.blocked = 1;
+ icsk->icsk_ack.blocked = 1;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
/* deleguate our work to tcp_release_cb() */
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
@@ -316,11 +306,12 @@ static void tcp_delack_timer(unsigned long data)
static void tcp_probe_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct sk_buff *skb = tcp_send_head(sk);
struct tcp_sock *tp = tcp_sk(sk);
int max_probes;
u32 start_ts;
- if (tp->packets_out || !tcp_send_head(sk)) {
+ if (tp->packets_out || !skb) {
icsk->icsk_probes_out = 0;
return;
}
@@ -333,9 +324,9 @@ static void tcp_probe_timer(struct sock *sk)
* corresponding system limit. We also implement similar policy when
* we use RTO to probe window in tcp_retransmit_timer().
*/
- start_ts = tcp_skb_timestamp(tcp_send_head(sk));
+ start_ts = tcp_skb_timestamp(skb);
if (!start_ts)
- tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp;
+ skb->skb_mstamp = tp->tcp_mstamp;
else if (icsk->icsk_user_timeout &&
(s32)(tcp_time_stamp(tp) - start_ts) >
jiffies_to_msecs(icsk->icsk_user_timeout))
@@ -420,7 +411,7 @@ void tcp_retransmit_timer(struct sock *sk)
if (!tp->packets_out)
goto out;
- WARN_ON(tcp_write_queue_empty(sk));
+ WARN_ON(tcp_rtx_queue_empty(sk));
tp->tlp_high_seq = 0;
@@ -453,7 +444,7 @@ void tcp_retransmit_timer(struct sock *sk)
goto out;
}
tcp_enter_loss(sk);
- tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1);
+ tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1);
__sk_dst_reset(sk);
goto out_reset_timer;
}
@@ -485,7 +476,7 @@ void tcp_retransmit_timer(struct sock *sk)
tcp_enter_loss(sk);
- if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) {
+ if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
/* Retransmission failed because of local congestion,
* do not backoff.
*/
@@ -526,7 +517,7 @@ out_reset_timer:
* linear-timeout retransmissions into a black hole
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
+ (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
tcp_stream_is_thin(tp) &&
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0;
@@ -582,9 +573,11 @@ out:
sk_mem_reclaim(sk);
}
-static void tcp_write_timer(unsigned long data)
+static void tcp_write_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
+ struct inet_connection_sock *icsk =
+ from_timer(icsk, t, icsk_retransmit_timer);
+ struct sock *sk = &icsk->icsk_inet.sk;
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
@@ -619,9 +612,9 @@ void tcp_set_keepalive(struct sock *sk, int val)
EXPORT_SYMBOL_GPL(tcp_set_keepalive);
-static void tcp_keepalive_timer (unsigned long data)
+static void tcp_keepalive_timer (struct timer_list *t)
{
- struct sock *sk = (struct sock *) data;
+ struct sock *sk = from_timer(sk, t, sk_timer);
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 elapsed;
@@ -659,7 +652,7 @@ static void tcp_keepalive_timer (unsigned long data)
elapsed = keepalive_time_when(tp);
/* It is alive without keepalive 8) */
- if (tp->packets_out || tcp_send_head(sk))
+ if (tp->packets_out || !tcp_write_queue_empty(sk))
goto resched;
elapsed = keepalive_time_elapsed(tp);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 218cfcc77650..ee113ff15fd0 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
{
- return min(tp->snd_ssthresh, tp->snd_cwnd-1);
+ return min(tp->snd_ssthresh, tp->snd_cwnd);
}
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
index 248cfc0ff9ae..4f24d0e37d9c 100644
--- a/net/ipv4/tcp_vegas.h
+++ b/net/ipv4/tcp_vegas.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* TCP Vegas congestion control interface
*/
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 76005d4b8dfc..6fcf482d611b 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -30,7 +30,6 @@ struct veno {
u32 basertt; /* the min of all Veno rtt measurements seen (in usec) */
u32 inc; /* decide whether to increase cwnd */
u32 diff; /* calculate the diff rate */
- u32 loss_cwnd; /* cwnd when loss occured */
};
/* There are several situations when we must "re-start" Veno:
@@ -194,7 +193,6 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
const struct tcp_sock *tp = tcp_sk(sk);
struct veno *veno = inet_csk_ca(sk);
- veno->loss_cwnd = tp->snd_cwnd;
if (veno->diff < beta)
/* in "non-congestive state", cut cwnd by 1/5 */
return max(tp->snd_cwnd * 4 / 5, 2U);
@@ -203,17 +201,10 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
return max(tp->snd_cwnd >> 1U, 2U);
}
-static u32 tcp_veno_cwnd_undo(struct sock *sk)
-{
- const struct veno *veno = inet_csk_ca(sk);
-
- return max(tcp_sk(sk)->snd_cwnd, veno->loss_cwnd);
-}
-
static struct tcp_congestion_ops tcp_veno __read_mostly = {
.init = tcp_veno_init,
.ssthresh = tcp_veno_ssthresh,
- .undo_cwnd = tcp_veno_cwnd_undo,
+ .undo_cwnd = tcp_reno_undo_cwnd,
.cong_avoid = tcp_veno_cong_avoid,
.pkts_acked = tcp_veno_pkts_acked,
.set_state = tcp_veno_state,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index e6ff99c4bd3b..96e829b2e2fc 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -37,7 +37,6 @@ struct yeah {
u32 fast_count;
u32 pkts_acked;
- u32 loss_cwnd;
};
static void tcp_yeah_init(struct sock *sk)
@@ -220,22 +219,14 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
yeah->fast_count = 0;
yeah->reno_count = max(yeah->reno_count>>1, 2U);
- yeah->loss_cwnd = tp->snd_cwnd;
return max_t(int, tp->snd_cwnd - reduction, 2);
}
-static u32 tcp_yeah_cwnd_undo(struct sock *sk)
-{
- const struct yeah *yeah = inet_csk_ca(sk);
-
- return max(tcp_sk(sk)->snd_cwnd, yeah->loss_cwnd);
-}
-
static struct tcp_congestion_ops tcp_yeah __read_mostly = {
.init = tcp_yeah_init,
.ssthresh = tcp_yeah_ssthresh,
- .undo_cwnd = tcp_yeah_cwnd_undo,
+ .undo_cwnd = tcp_reno_undo_cwnd,
.cong_avoid = tcp_yeah_cong_avoid,
.set_state = tcp_vegas_state,
.cwnd_event = tcp_vegas_cwnd_event,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index a7c804f73990..e4ff25c947c5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -231,10 +231,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
}
}
- /* Initial allocation may have already happened via setsockopt */
- if (!rcu_access_pointer(sk->sk_reuseport_cb))
- return reuseport_alloc(sk);
- return 0;
+ return reuseport_alloc(sk);
}
/**
@@ -380,8 +377,8 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
static int compute_score(struct sock *sk, struct net *net,
__be32 saddr, __be16 sport,
- __be32 daddr, unsigned short hnum, int dif,
- bool exact_dif)
+ __be32 daddr, unsigned short hnum,
+ int dif, int sdif, bool exact_dif)
{
int score;
struct inet_sock *inet;
@@ -413,10 +410,15 @@ static int compute_score(struct sock *sk, struct net *net,
}
if (sk->sk_bound_dev_if || exact_dif) {
- if (sk->sk_bound_dev_if != dif)
+ bool dev_match = (sk->sk_bound_dev_if == dif ||
+ sk->sk_bound_dev_if == sdif);
+
+ if (exact_dif && !dev_match)
return -1;
- score += 4;
+ if (sk->sk_bound_dev_if && dev_match)
+ score += 4;
}
+
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
return score;
@@ -436,10 +438,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
- __be32 saddr, __be16 sport,
- __be32 daddr, unsigned int hnum, int dif, bool exact_dif,
- struct udp_hslot *hslot2,
- struct sk_buff *skb)
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned int hnum,
+ int dif, int sdif, bool exact_dif,
+ struct udp_hslot *hslot2,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
int score, badness, matches = 0, reuseport = 0;
@@ -449,7 +452,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
badness = 0;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, exact_dif);
+ daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
reuseport = sk->sk_reuseport;
if (reuseport) {
@@ -477,8 +480,8 @@ static struct sock *udp4_lib_lookup2(struct net *net,
* harder than this. -DaveM
*/
struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
- __be16 sport, __be32 daddr, __be16 dport,
- int dif, struct udp_table *udptable, struct sk_buff *skb)
+ __be16 sport, __be32 daddr, __be16 dport, int dif,
+ int sdif, struct udp_table *udptable, struct sk_buff *skb)
{
struct sock *sk, *result;
unsigned short hnum = ntohs(dport);
@@ -496,7 +499,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
goto begin;
result = udp4_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif,
+ daddr, hnum, dif, sdif,
exact_dif, hslot2, skb);
if (!result) {
unsigned int old_slot2 = slot2;
@@ -511,7 +514,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
goto begin;
result = udp4_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif,
+ daddr, hnum, dif, sdif,
exact_dif, hslot2, skb);
}
return result;
@@ -521,7 +524,7 @@ begin:
badness = 0;
sk_for_each_rcu(sk, &hslot->head) {
score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, exact_dif);
+ daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
reuseport = sk->sk_reuseport;
if (reuseport) {
@@ -554,7 +557,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
iph->daddr, dport, inet_iif(skb),
- udptable, skb);
+ inet_sdif(skb), udptable, skb);
}
struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
@@ -576,7 +579,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
struct sock *sk;
sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
- dif, &udp_table, NULL);
+ dif, 0, &udp_table, NULL);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
return sk;
@@ -587,7 +590,7 @@ EXPORT_SYMBOL_GPL(udp4_lib_lookup);
static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
__be16 loc_port, __be32 loc_addr,
__be16 rmt_port, __be32 rmt_addr,
- int dif, unsigned short hnum)
+ int dif, int sdif, unsigned short hnum)
{
struct inet_sock *inet = inet_sk(sk);
@@ -597,9 +600,10 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
(inet->inet_dport != rmt_port && inet->inet_dport) ||
(inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
ipv6_only_sock(sk) ||
- (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+ sk->sk_bound_dev_if != sdif))
return false;
- if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif))
+ if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif))
return false;
return true;
}
@@ -628,8 +632,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
struct net *net = dev_net(skb->dev);
sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
- iph->saddr, uh->source, skb->dev->ifindex, udptable,
- NULL);
+ iph->saddr, uh->source, skb->dev->ifindex, 0,
+ udptable, NULL);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return; /* No socket for error */
@@ -802,7 +806,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
if (is_udplite) /* UDP-Lite */
csum = udplite_csum(skb);
- else if (sk->sk_no_check_tx && !skb_is_gso(skb)) { /* UDP csum off */
+ else if (sk->sk_no_check_tx) { /* UDP csum off */
skb->ip_summed = CHECKSUM_NONE;
goto send;
@@ -1054,7 +1058,7 @@ back_from_confirm:
/* ... which is an evident application bug. --ANK */
release_sock(sk);
- net_dbg_ratelimited("cork app bug 2\n");
+ net_dbg_ratelimited("socket already corked\n");
err = -EINVAL;
goto out;
}
@@ -1137,7 +1141,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
if (unlikely(!up->pending)) {
release_sock(sk);
- net_dbg_ratelimited("udp cork app bug 3\n");
+ net_dbg_ratelimited("cork failed\n");
return -EINVAL;
}
@@ -1176,7 +1180,11 @@ static void udp_set_dev_scratch(struct sk_buff *skb)
scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
scratch->is_linear = !skb_is_nonlinear(skb);
#endif
- if (likely(!skb->_skb_refdst))
+ /* all head states execept sp (dst, sk, nf) are always cleared by
+ * udp_rcv() and we need to preserve secpath, if present, to eventually
+ * process IP_CMSG_PASSSEC at recvmsg() time
+ */
+ if (likely(!skb_sec_path(skb)))
scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
}
@@ -1201,8 +1209,7 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
if (likely(partial)) {
up->forward_deficit += size;
size = up->forward_deficit;
- if (size < (sk->sk_rcvbuf >> 2) &&
- !skb_queue_empty(&up->reader_queue))
+ if (size < (sk->sk_rcvbuf >> 2))
return;
} else {
size += up->forward_deficit;
@@ -1386,12 +1393,15 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
unlock_sock_fast(sk, slow);
}
+ if (!skb_unref(skb))
+ return;
+
/* In the more common cases we cleared the head states previously,
* see __udp_queue_rcv_skb().
*/
if (unlikely(udp_skb_has_head_state(skb)))
skb_release_head_state(skb);
- consume_stateless_skb(skb);
+ __consume_stateless_skb(skb);
}
EXPORT_SYMBOL_GPL(skb_consume_udp);
@@ -1574,7 +1584,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
return ip_recv_error(sk, msg, len, addr_len);
try_again:
- peeking = off = sk_peek_offset(sk, flags);
+ peeking = flags & MSG_PEEK;
+ off = sk_peek_offset(sk, flags);
skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
if (!skb)
return err;
@@ -1782,13 +1793,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
sk_mark_napi_id_once(sk, skb);
}
- /* At recvmsg() time we may access skb->dst or skb->sp depending on
- * the IP options and the cmsg flags, elsewhere can we clear all
- * pending head states while they are hot in the cache
- */
- if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb)))
- skb_release_head_state(skb);
-
rc = __udp_enqueue_schedule_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
@@ -1809,8 +1813,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
static struct static_key udp_encap_needed __read_mostly;
void udp_encap_enable(void)
{
- if (!static_key_enabled(&udp_encap_needed))
- static_key_slow_inc(&udp_encap_needed);
+ static_key_enable(&udp_encap_needed);
}
EXPORT_SYMBOL(udp_encap_enable);
@@ -1849,7 +1852,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
*/
/* if we're overly short, let UDP handle it */
- encap_rcv = ACCESS_ONCE(up->encap_rcv);
+ encap_rcv = READ_ONCE(up->encap_rcv);
if (encap_rcv) {
int ret;
@@ -1928,14 +1931,16 @@ drop:
/* For TCP sockets, sk_rx_dst is protected by socket lock
* For UDP, we use xchg() to guard against concurrent changes.
*/
-void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
{
struct dst_entry *old;
if (dst_hold_safe(dst)) {
old = xchg(&sk->sk_rx_dst, dst);
dst_release(old);
+ return old != dst;
}
+ return false;
}
EXPORT_SYMBOL(udp_sk_rx_dst_set);
@@ -1956,6 +1961,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
unsigned int offset = offsetof(typeof(*sk), sk_node);
int dif = skb->dev->ifindex;
+ int sdif = inet_sdif(skb);
struct hlist_node *node;
struct sk_buff *nskb;
@@ -1970,7 +1976,7 @@ start_lookup:
sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr,
- uh->source, saddr, dif, hnum))
+ uh->source, saddr, dif, sdif, hnum))
continue;
if (!first) {
@@ -2160,7 +2166,7 @@ drop:
static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
__be16 loc_port, __be32 loc_addr,
__be16 rmt_port, __be32 rmt_addr,
- int dif)
+ int dif, int sdif)
{
struct sock *sk, *result;
unsigned short hnum = ntohs(loc_port);
@@ -2174,7 +2180,7 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
result = NULL;
sk_for_each_rcu(sk, &hslot->head) {
if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr,
- rmt_port, rmt_addr, dif, hnum)) {
+ rmt_port, rmt_addr, dif, sdif, hnum)) {
if (result)
return NULL;
result = sk;
@@ -2191,7 +2197,7 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
static struct sock *__udp4_lib_demux_lookup(struct net *net,
__be16 loc_port, __be32 loc_addr,
__be16 rmt_port, __be32 rmt_addr,
- int dif)
+ int dif, int sdif)
{
unsigned short hnum = ntohs(loc_port);
unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
@@ -2203,7 +2209,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
if (INET_MATCH(sk, net, acookie, rmt_addr,
- loc_addr, ports, dif))
+ loc_addr, ports, dif, sdif))
return sk;
/* Only check first socket in chain */
break;
@@ -2211,47 +2217,46 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
return NULL;
}
-void udp_v4_early_demux(struct sk_buff *skb)
+int udp_v4_early_demux(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
+ struct in_device *in_dev = NULL;
const struct iphdr *iph;
const struct udphdr *uh;
struct sock *sk = NULL;
struct dst_entry *dst;
int dif = skb->dev->ifindex;
+ int sdif = inet_sdif(skb);
int ours;
/* validate the packet */
if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
- return;
+ return 0;
iph = ip_hdr(skb);
uh = udp_hdr(skb);
- if (skb->pkt_type == PACKET_BROADCAST ||
- skb->pkt_type == PACKET_MULTICAST) {
- struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+ if (skb->pkt_type == PACKET_MULTICAST) {
+ in_dev = __in_dev_get_rcu(skb->dev);
if (!in_dev)
- return;
+ return 0;
- /* we are supposed to accept bcast packets */
- if (skb->pkt_type == PACKET_MULTICAST) {
- ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
- iph->protocol);
- if (!ours)
- return;
- }
+ ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
+ iph->protocol);
+ if (!ours)
+ return 0;
sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
- uh->source, iph->saddr, dif);
+ uh->source, iph->saddr,
+ dif, sdif);
} else if (skb->pkt_type == PACKET_HOST) {
sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
- uh->source, iph->saddr, dif);
+ uh->source, iph->saddr, dif, sdif);
}
if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
- return;
+ return 0;
skb->sk = sk;
skb->destructor = sock_efree;
@@ -2260,12 +2265,23 @@ void udp_v4_early_demux(struct sk_buff *skb)
if (dst)
dst = dst_check(dst, 0);
if (dst) {
+ u32 itag = 0;
+
/* set noref for now.
* any place which wants to hold dst has to call
* dst_hold_safe()
*/
skb_dst_set_noref(skb, dst);
+
+ /* for unconnected multicast sockets we need to validate
+ * the source on each packet
+ */
+ if (!inet_sk(sk)->inet_daddr && in_dev)
+ return ip_mc_validate_source(skb, iph->daddr,
+ iph->saddr, iph->tos,
+ skb->dev, in_dev, &itag);
}
+ return 0;
}
int udp_rcv(struct sk_buff *skb)
@@ -2281,7 +2297,7 @@ void udp_destroy_sock(struct sock *sk)
unlock_sock_fast(sk, slow);
if (static_key_false(&udp_encap_needed) && up->encap_type) {
void (*encap_destroy)(struct sock *sk);
- encap_destroy = ACCESS_ONCE(up->encap_destroy);
+ encap_destroy = READ_ONCE(up->encap_destroy);
if (encap_destroy)
encap_destroy(sk);
}
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 4515836d2a3a..d0390d844ac8 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -45,7 +45,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
sk = __udp4_lib_lookup(net,
req->id.idiag_src[0], req->id.idiag_sport,
req->id.idiag_dst[0], req->id.idiag_dport,
- req->id.idiag_if, tbl, NULL);
+ req->id.idiag_if, 0, tbl, NULL);
#if IS_ENABLED(CONFIG_IPV6)
else if (req->sdiag_family == AF_INET6)
sk = __udp6_lib_lookup(net,
@@ -53,7 +53,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
req->id.idiag_sport,
(struct in6_addr *)req->id.idiag_dst,
req->id.idiag_dport,
- req->id.idiag_if, tbl, NULL);
+ req->id.idiag_if, 0, tbl, NULL);
#endif
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
@@ -182,7 +182,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
sk = __udp4_lib_lookup(net,
req->id.idiag_dst[0], req->id.idiag_dport,
req->id.idiag_src[0], req->id.idiag_sport,
- req->id.idiag_if, tbl, NULL);
+ req->id.idiag_if, 0, tbl, NULL);
#if IS_ENABLED(CONFIG_IPV6)
else if (req->sdiag_family == AF_INET6) {
if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
@@ -190,7 +190,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
sk = __udp4_lib_lookup(net,
req->id.idiag_dst[3], req->id.idiag_dport,
req->id.idiag_src[3], req->id.idiag_sport,
- req->id.idiag_if, tbl, NULL);
+ req->id.idiag_if, 0, tbl, NULL);
else
sk = __udp6_lib_lookup(net,
@@ -198,7 +198,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
req->id.idiag_dport,
(struct in6_addr *)req->id.idiag_src,
req->id.idiag_sport,
- req->id.idiag_if, tbl, NULL);
+ req->id.idiag_if, 0, tbl, NULL);
}
#endif
else {
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index a8cf8c6fb60c..e7d18b140287 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _UDP4_IMPL_H
#define _UDP4_IMPL_H
#include <net/udp.h>
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0932c85b42af..e360d55be555 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -21,7 +21,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
__be16 new_protocol, bool is_ipv6)
{
int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
- bool remcsum, need_csum, offload_csum, ufo, gso_partial;
+ bool remcsum, need_csum, offload_csum, gso_partial;
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct udphdr *uh = udp_hdr(skb);
u16 mac_offset = skb->mac_header;
@@ -61,8 +61,6 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
skb->remcsum_offload = remcsum;
- ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
-
need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb));
/* Try to offload checksum if possible */
offload_csum = !!(need_csum &&
@@ -77,7 +75,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
* outer one so strip the existing checksum feature flags and
* instead set the flag based on our outer checksum offload value.
*/
- if (remcsum || ufo) {
+ if (remcsum) {
features &= ~NETIF_F_CSUM_MASK;
if (!need_csum || offload_csum)
features |= NETIF_F_HW_CSUM;
@@ -122,7 +120,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
* will be using a length value equal to only one MSS sized
* segment instead of the entire frame.
*/
- if (gso_partial) {
+ if (gso_partial && skb_is_gso(skb)) {
uh->len = htons(skb_shinfo(skb)->gso_size +
SKB_GSO_CB(skb)->data_offset +
skb->head - (unsigned char *)uh);
@@ -189,66 +187,16 @@ out_unlock:
}
EXPORT_SYMBOL(skb_udp_tunnel_segment);
-static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
- netdev_features_t features)
+static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
- unsigned int mss;
- __wsum csum;
- struct udphdr *uh;
- struct iphdr *iph;
if (skb->encapsulation &&
(skb_shinfo(skb)->gso_type &
- (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
+ (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)))
segs = skb_udp_tunnel_segment(skb, features, false);
- goto out;
- }
- if (!pskb_may_pull(skb, sizeof(struct udphdr)))
- goto out;
-
- mss = skb_shinfo(skb)->gso_size;
- if (unlikely(skb->len <= mss))
- goto out;
-
- if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
- /* Packet is from an untrusted source, reset gso_segs. */
-
- skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
-
- segs = NULL;
- goto out;
- }
-
- /* Do software UFO. Complete and fill in the UDP checksum as
- * HW cannot do checksum of UDP packets sent as multiple
- * IP fragments.
- */
-
- uh = udp_hdr(skb);
- iph = ip_hdr(skb);
-
- uh->check = 0;
- csum = skb_checksum(skb, 0, skb->len, 0);
- uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
-
- /* If there is no outer header we can fake a checksum offload
- * due to the fact that we have already done the checksum in
- * software prior to segmenting the frame.
- */
- if (!skb->encap_hdr_csum)
- features |= NETIF_F_HW_CSUM;
-
- /* Fragment the skb. IP headers of the fragments are updated in
- * inet_gso_segment()
- */
- segs = skb_segment(skb, features);
-out:
return segs;
}
@@ -382,7 +330,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
static const struct net_offload udpv4_offload = {
.callbacks = {
- .gso_segment = udp4_ufo_fragment,
+ .gso_segment = udp4_tunnel_segment,
.gro_receive = udp4_gro_receive,
.gro_complete = udp4_gro_complete,
},
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 58bd39fb14b4..6539ff15e9a3 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -82,7 +82,8 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
struct sock *sk = sock->sk;
struct udp_tunnel_info ti;
- if (!dev->netdev_ops->ndo_udp_tunnel_add)
+ if (!dev->netdev_ops->ndo_udp_tunnel_add ||
+ !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
return;
ti.type = type;
@@ -93,6 +94,24 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
}
EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port);
+void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
+ unsigned short type)
+{
+ struct sock *sk = sock->sk;
+ struct udp_tunnel_info ti;
+
+ if (!dev->netdev_ops->ndo_udp_tunnel_del ||
+ !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+ return;
+
+ ti.type = type;
+ ti.sa_family = sk->sk_family;
+ ti.port = inet_sk(sk)->inet_sport;
+
+ dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port);
+
/* Notify netdevs that UDP port started listening */
void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
{
@@ -109,6 +128,8 @@ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
for_each_netdev_rcu(net, dev) {
if (!dev->netdev_ops->ndo_udp_tunnel_add)
continue;
+ if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+ continue;
dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
}
rcu_read_unlock();
@@ -131,6 +152,8 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type)
for_each_netdev_rcu(net, dev) {
if (!dev->netdev_ops->ndo_udp_tunnel_del)
continue;
+ if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+ continue;
dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
}
rcu_read_unlock();
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 1fc684111ce6..e50b7fea57ee 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* xfrm4_input.c
*
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 71b4ecc195c7..05017e2c849c 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* xfrm4_policy.c
*
@@ -20,7 +21,8 @@
static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
int tos, int oif,
const xfrm_address_t *saddr,
- const xfrm_address_t *daddr)
+ const xfrm_address_t *daddr,
+ u32 mark)
{
struct rtable *rt;
@@ -28,6 +30,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
fl4->daddr = daddr->a4;
fl4->flowi4_tos = tos;
fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif);
+ fl4->flowi4_mark = mark;
if (saddr)
fl4->saddr = saddr->a4;
@@ -42,20 +45,22 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif,
const xfrm_address_t *saddr,
- const xfrm_address_t *daddr)
+ const xfrm_address_t *daddr,
+ u32 mark)
{
struct flowi4 fl4;
- return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr);
+ return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark);
}
static int xfrm4_get_saddr(struct net *net, int oif,
- xfrm_address_t *saddr, xfrm_address_t *daddr)
+ xfrm_address_t *saddr, xfrm_address_t *daddr,
+ u32 mark)
{
struct dst_entry *dst;
struct flowi4 fl4;
- dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr);
+ dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark);
if (IS_ERR(dst))
return -EHOSTUNREACH;
@@ -213,14 +218,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
fl4->flowi4_tos = iph->tos;
}
-static inline int xfrm4_garbage_collect(struct dst_ops *ops)
-{
- struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
-
- xfrm_garbage_collect_deferred(net);
- return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
-}
-
static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu)
{
@@ -259,14 +256,13 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static struct dst_ops xfrm4_dst_ops_template = {
.family = AF_INET,
- .gc = xfrm4_garbage_collect,
.update_pmtu = xfrm4_update_pmtu,
.redirect = xfrm4_redirect,
.cow_metrics = dst_cow_metrics_generic,
.destroy = xfrm4_dst_destroy,
.ifdown = xfrm4_dst_ifdown,
.local_out = __ip_local_out,
- .gc_thresh = INT_MAX,
+ .gc_thresh = 32768,
};
static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index d6660a8c0ea5..80c40b4981bb 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* xfrm4_state.c
*
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 48c452959d2c..ea71e4b0ab7a 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -308,22 +308,12 @@ config IPV6_SEG6_LWTUNNEL
depends on IPV6
select LWTUNNEL
select DST_CACHE
+ select IPV6_MULTIPLE_TABLES
---help---
Support for encapsulation of packets within an outer IPv6
header and a Segment Routing Header using the lightweight
- tunnels mechanism.
-
- If unsure, say N.
-
-config IPV6_SEG6_INLINE
- bool "IPv6: direct Segment Routing Header insertion "
- depends on IPV6_SEG6_LWTUNNEL
- ---help---
- Support for direct insertion of the Segment Routing Header,
- also known as inline mode. Be aware that direct insertion of
- extension headers (as opposed to encapsulation) may break
- multiple mechanisms such as PMTUD or IPSec AH. Use this feature
- only if you know exactly what you are doing.
+ tunnels mechanism. Also enable support for advanced local
+ processing of SRv6 packets based on their active segment.
If unsure, say N.
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 217e9ff0e24b..e0026fa1261b 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux TCP/IP (INET6) layer.
#
@@ -9,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
- udp_offload.o seg6.o
+ udp_offload.o seg6.o fib6_notifier.o
ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
@@ -23,7 +24,7 @@ ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_PROC_FS) += proc.o
ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o
ipv6-$(CONFIG_NETLABEL) += calipso.o
-ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o
+ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o
ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
ipv6-objs += $(ipv6-y)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3c46e9513a31..a0ae1c9d37df 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -152,11 +152,13 @@ static void ipv6_regen_rndid(struct inet6_dev *idev);
static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
-static int ipv6_count_addresses(struct inet6_dev *idev);
+static int ipv6_count_addresses(const struct inet6_dev *idev);
static int ipv6_generate_stable_address(struct in6_addr *addr,
u8 dad_count,
const struct inet6_dev *idev);
+#define IN6_ADDR_HSIZE_SHIFT 8
+#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT)
/*
* Configured unicast address hash table
*/
@@ -192,8 +194,6 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
static void inet6_prefix_notify(int event, struct inet6_dev *idev,
struct prefix_info *pinfo);
-static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
- struct net_device *dev);
static struct ipv6_devconf ipv6_devconf __read_mostly = {
.forwarding = 0,
@@ -231,7 +231,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.proxy_ndp = 0,
.accept_source_route = 0, /* we do not accept RH0 by default. */
.disable_ipv6 = 0,
- .accept_dad = 1,
+ .accept_dad = 0,
.suppress_frag_ndisc = 1,
.accept_ra_mtu = 1,
.stable_secret = {
@@ -303,10 +303,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.disable_policy = 0,
};
-/* Check if a valid qdisc is available */
-static inline bool addrconf_qdisc_ok(const struct net_device *dev)
+/* Check if link is ready: is it up and is a valid qdisc available */
+static inline bool addrconf_link_ready(const struct net_device *dev)
{
- return !qdisc_tx_is_noop(dev);
+ return netif_oper_up(dev) && !qdisc_tx_is_noop(dev);
}
static void addrconf_del_rs_timer(struct inet6_dev *idev)
@@ -451,7 +451,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ndev->token = in6addr_any;
- if (netif_running(dev) && addrconf_qdisc_ok(dev))
+ if (netif_running(dev) && addrconf_link_ready(dev))
ndev->if_flags |= IF_READY;
ipv6_mc_init_dev(ndev);
@@ -616,23 +616,23 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[NETCONFA_MAX+1];
+ struct inet6_dev *in6_dev = NULL;
+ struct net_device *dev = NULL;
struct netconfmsg *ncm;
struct sk_buff *skb;
struct ipv6_devconf *devconf;
- struct inet6_dev *in6_dev;
- struct net_device *dev;
int ifindex;
int err;
err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
devconf_ipv6_policy, extack);
if (err < 0)
- goto errout;
+ return err;
- err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
- goto errout;
+ return -EINVAL;
+ err = -EINVAL;
ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
switch (ifindex) {
case NETCONFA_IFINDEX_ALL:
@@ -642,10 +642,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
devconf = net->ipv6.devconf_dflt;
break;
default:
- dev = __dev_get_by_index(net, ifindex);
+ dev = dev_get_by_index(net, ifindex);
if (!dev)
- goto errout;
- in6_dev = __in6_dev_get(dev);
+ return -EINVAL;
+ in6_dev = in6_dev_get(dev);
if (!in6_dev)
goto errout;
devconf = &in6_dev->cnf;
@@ -653,7 +653,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
}
err = -ENOBUFS;
- skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC);
+ skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
if (!skb)
goto errout;
@@ -669,6 +669,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
}
err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
+ if (in6_dev)
+ in6_dev_put(in6_dev);
+ if (dev)
+ dev_put(dev);
return err;
}
@@ -945,12 +949,50 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
break;
}
- list_add_tail(&ifp->if_list, p);
+ list_add_tail_rcu(&ifp->if_list, p);
+}
+
+static u32 inet6_addr_hash(const struct net *net, const struct in6_addr *addr)
+{
+ u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+ return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+ struct net_device *dev, unsigned int hash)
+{
+ struct inet6_ifaddr *ifp;
+
+ hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
+ if (!net_eq(dev_net(ifp->idev->dev), net))
+ continue;
+ if (ipv6_addr_equal(&ifp->addr, addr)) {
+ if (!dev || ifp->idev->dev == dev)
+ return true;
+ }
+ }
+ return false;
}
-static u32 inet6_addr_hash(const struct in6_addr *addr)
+static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
{
- return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT);
+ unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr);
+ int err = 0;
+
+ spin_lock(&addrconf_hash_lock);
+
+ /* Ignore adding duplicate addresses on an interface */
+ if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
+ ADBG("ipv6_add_addr: already assigned\n");
+ err = -EEXIST;
+ } else {
+ hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
+ }
+
+ spin_unlock(&addrconf_hash_lock);
+
+ return err;
}
/* On success it returns ifp with increased reference count */
@@ -958,13 +1000,13 @@ static u32 inet6_addr_hash(const struct in6_addr *addr)
static struct inet6_ifaddr *
ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
const struct in6_addr *peer_addr, int pfxlen,
- int scope, u32 flags, u32 valid_lft, u32 prefered_lft)
+ int scope, u32 flags, u32 valid_lft, u32 prefered_lft,
+ bool can_block, struct netlink_ext_ack *extack)
{
+ gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC;
struct net *net = dev_net(idev->dev);
struct inet6_ifaddr *ifa = NULL;
- struct rt6_info *rt;
- struct in6_validator_info i6vi;
- unsigned int hash;
+ struct rt6_info *rt = NULL;
int err = 0;
int addr_type = ipv6_addr_type(addr);
@@ -974,42 +1016,33 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
addr_type & IPV6_ADDR_LOOPBACK))
return ERR_PTR(-EADDRNOTAVAIL);
- rcu_read_lock_bh();
-
- in6_dev_hold(idev);
-
if (idev->dead) {
err = -ENODEV; /*XXX*/
- goto out2;
+ goto out;
}
if (idev->cnf.disable_ipv6) {
err = -EACCES;
- goto out2;
- }
-
- i6vi.i6vi_addr = *addr;
- i6vi.i6vi_dev = idev;
- rcu_read_unlock_bh();
-
- err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
-
- rcu_read_lock_bh();
- err = notifier_to_errno(err);
- if (err)
- goto out2;
-
- spin_lock(&addrconf_hash_lock);
-
- /* Ignore adding duplicate addresses on an interface */
- if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
- ADBG("ipv6_add_addr: already assigned\n");
- err = -EEXIST;
goto out;
}
- ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
+ /* validator notifier needs to be blocking;
+ * do not call in atomic context
+ */
+ if (can_block) {
+ struct in6_validator_info i6vi = {
+ .i6vi_addr = *addr,
+ .i6vi_dev = idev,
+ .extack = extack,
+ };
+
+ err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
+ err = notifier_to_errno(err);
+ if (err < 0)
+ goto out;
+ }
+ ifa = kzalloc(sizeof(*ifa), gfp_flags);
if (!ifa) {
ADBG("ipv6_add_addr: malloc failed\n");
err = -ENOBUFS;
@@ -1019,6 +1052,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
rt = addrconf_dst_alloc(idev, addr, false);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
+ rt = NULL;
goto out;
}
@@ -1049,16 +1083,21 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
ifa->rt = rt;
ifa->idev = idev;
+ in6_dev_hold(idev);
+
/* For caller */
refcount_set(&ifa->refcnt, 1);
- /* Add to big hash table */
- hash = inet6_addr_hash(addr);
+ rcu_read_lock_bh();
- hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
- spin_unlock(&addrconf_hash_lock);
+ err = ipv6_add_addr_hash(idev->dev, ifa);
+ if (err < 0) {
+ rcu_read_unlock_bh();
+ goto out;
+ }
write_lock(&idev->lock);
+
/* Add to inet6_dev unicast addr list. */
ipv6_link_dev_addr(idev, ifa);
@@ -1069,21 +1108,23 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
in6_ifa_hold(ifa);
write_unlock(&idev->lock);
-out2:
+
rcu_read_unlock_bh();
- if (likely(err == 0))
- inet6addr_notifier_call_chain(NETDEV_UP, ifa);
- else {
- kfree(ifa);
- in6_dev_put(idev);
+ inet6addr_notifier_call_chain(NETDEV_UP, ifa);
+out:
+ if (unlikely(err < 0)) {
+ if (rt)
+ ip6_rt_put(rt);
+ if (ifa) {
+ if (ifa->idev)
+ in6_dev_put(ifa->idev);
+ kfree(ifa);
+ }
ifa = ERR_PTR(err);
}
return ifa;
-out:
- spin_unlock(&addrconf_hash_lock);
- goto out2;
}
enum cleanup_prefix_rt_t {
@@ -1204,7 +1245,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE))
action = check_cleanup_prefix_route(ifp, &expires);
- list_del_init(&ifp->if_list);
+ list_del_rcu(&ifp->if_list);
__in6_ifa_put(ifp);
write_unlock_bh(&ifp->idev->lock);
@@ -1226,7 +1267,9 @@ out:
in6_ifa_put(ifp);
}
-static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
+static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
+ struct inet6_ifaddr *ift,
+ bool block)
{
struct inet6_dev *idev = ifp->idev;
struct in6_addr addr, *tmpaddr;
@@ -1330,7 +1373,7 @@ retry:
ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
ipv6_addr_scope(&addr), addr_flags,
- tmp_valid_lft, tmp_prefered_lft);
+ tmp_valid_lft, tmp_prefered_lft, block, NULL);
if (IS_ERR(ift)) {
in6_ifa_put(ifp);
in6_dev_put(idev);
@@ -1399,10 +1442,18 @@ static inline int ipv6_saddr_preferred(int type)
return 0;
}
-static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev)
+static bool ipv6_use_optimistic_addr(struct net *net,
+ struct inet6_dev *idev)
{
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic;
+ if (!idev)
+ return false;
+ if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+ return false;
+ if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic)
+ return false;
+
+ return true;
#else
return false;
#endif
@@ -1472,7 +1523,7 @@ static int ipv6_get_saddr_eval(struct net *net,
/* Rule 3: Avoid deprecated and optimistic addresses */
u8 avoid = IFA_F_DEPRECATED;
- if (!ipv6_use_optimistic_addr(score->ifa->idev))
+ if (!ipv6_use_optimistic_addr(net, score->ifa->idev))
avoid |= IFA_F_OPTIMISTIC;
ret = ipv6_saddr_preferred(score->addr_type) ||
!(score->ifa->flags & avoid);
@@ -1550,8 +1601,7 @@ static int __ipv6_dev_get_saddr(struct net *net,
{
struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
- read_lock_bh(&idev->lock);
- list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
+ list_for_each_entry_rcu(score->ifa, &idev->addr_list, if_list) {
int i;
/*
@@ -1601,11 +1651,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
}
break;
} else if (minihiscore < miniscore) {
- if (hiscore->ifa)
- in6_ifa_put(hiscore->ifa);
-
- in6_ifa_hold(score->ifa);
-
swap(hiscore, score);
hiscore_idx = 1 - hiscore_idx;
@@ -1617,7 +1662,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
}
}
out:
- read_unlock_bh(&idev->lock);
return hiscore_idx;
}
@@ -1654,6 +1698,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
int dst_type;
bool use_oif_addr = false;
int hiscore_idx = 0;
+ int ret = 0;
dst_type = __ipv6_addr_type(daddr);
dst.addr = daddr;
@@ -1729,15 +1774,14 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
}
out:
- rcu_read_unlock();
-
hiscore = &scores[hiscore_idx];
if (!hiscore->ifa)
- return -EADDRNOTAVAIL;
+ ret = -EADDRNOTAVAIL;
+ else
+ *saddr = hiscore->ifa->addr;
- *saddr = hiscore->ifa->addr;
- in6_ifa_put(hiscore->ifa);
- return 0;
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(ipv6_dev_get_saddr);
@@ -1777,15 +1821,15 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
return err;
}
-static int ipv6_count_addresses(struct inet6_dev *idev)
+static int ipv6_count_addresses(const struct inet6_dev *idev)
{
+ const struct inet6_ifaddr *ifp;
int cnt = 0;
- struct inet6_ifaddr *ifp;
- read_lock_bh(&idev->lock);
- list_for_each_entry(ifp, &idev->addr_list, if_list)
+ rcu_read_lock();
+ list_for_each_entry_rcu(ifp, &idev->addr_list, if_list)
cnt++;
- read_unlock_bh(&idev->lock);
+ rcu_read_unlock();
return cnt;
}
@@ -1800,11 +1844,11 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict,
u32 banned_flags)
{
+ unsigned int hash = inet6_addr_hash(net, addr);
struct inet6_ifaddr *ifp;
- unsigned int hash = inet6_addr_hash(addr);
u32 ifp_flags;
- rcu_read_lock_bh();
+ rcu_read_lock();
hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
@@ -1818,32 +1862,16 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
!(ifp_flags&banned_flags) &&
(!dev || ifp->idev->dev == dev ||
!(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return 1;
}
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return 0;
}
EXPORT_SYMBOL(ipv6_chk_addr_and_flags);
-static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
- struct net_device *dev)
-{
- unsigned int hash = inet6_addr_hash(addr);
- struct inet6_ifaddr *ifp;
-
- hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
- if (!net_eq(dev_net(ifp->idev->dev), net))
- continue;
- if (ipv6_addr_equal(&ifp->addr, addr)) {
- if (!dev || ifp->idev->dev == dev)
- return true;
- }
- }
- return false;
-}
/* Compares an address/prefix_len with addresses on device @dev.
* If one is found it returns true.
@@ -1851,20 +1879,18 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
const unsigned int prefix_len, struct net_device *dev)
{
- struct inet6_dev *idev;
- struct inet6_ifaddr *ifa;
+ const struct inet6_ifaddr *ifa;
+ const struct inet6_dev *idev;
bool ret = false;
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev) {
- read_lock_bh(&idev->lock);
- list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
if (ret)
break;
}
- read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
@@ -1874,22 +1900,20 @@ EXPORT_SYMBOL(ipv6_chk_custom_prefix);
int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
{
- struct inet6_dev *idev;
- struct inet6_ifaddr *ifa;
+ const struct inet6_ifaddr *ifa;
+ const struct inet6_dev *idev;
int onlink;
onlink = 0;
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev) {
- read_lock_bh(&idev->lock);
- list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
onlink = ipv6_prefix_equal(addr, &ifa->addr,
ifa->prefix_len);
if (onlink)
break;
}
- read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
return onlink;
@@ -1899,11 +1923,11 @@ EXPORT_SYMBOL(ipv6_chk_prefix);
struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
struct net_device *dev, int strict)
{
+ unsigned int hash = inet6_addr_hash(net, addr);
struct inet6_ifaddr *ifp, *result = NULL;
- unsigned int hash = inet6_addr_hash(addr);
- rcu_read_lock_bh();
- hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1915,7 +1939,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
}
}
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return result;
}
@@ -1934,7 +1958,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
if (ifpub) {
in6_ifa_hold(ifpub);
spin_unlock_bh(&ifp->lock);
- ipv6_create_tempaddr(ifpub, ifp);
+ ipv6_create_tempaddr(ifpub, ifp, true);
in6_ifa_put(ifpub);
} else {
spin_unlock_bh(&ifp->lock);
@@ -1967,7 +1991,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
return err;
}
-void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net *net = dev_net(ifp->idev->dev);
@@ -1977,8 +2001,8 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
return;
}
- net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n",
- ifp->idev->dev->name, &ifp->addr);
+ net_info_ratelimited("%s: IPv6 duplicate address %pI6c used by %pM detected!\n",
+ ifp->idev->dev->name, &ifp->addr, eth_hdr(skb)->h_source);
spin_lock_bh(&ifp->lock);
@@ -2017,7 +2041,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen,
scope, flags, valid_lft,
- preferred_lft);
+ preferred_lft, false, NULL);
if (IS_ERR(ifp2))
goto lock_errdad;
@@ -2313,24 +2337,24 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
if (!table)
return NULL;
- read_lock_bh(&table->tb6_lock);
- fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
+ rcu_read_lock();
+ fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0, true);
if (!fn)
goto out;
- noflags |= RTF_CACHE;
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_node_rt_rcu(fn) {
if (rt->dst.dev->ifindex != dev->ifindex)
continue;
if ((rt->rt6i_flags & flags) != flags)
continue;
if ((rt->rt6i_flags & noflags) != 0)
continue;
- dst_hold(&rt->dst);
+ if (!dst_hold_safe(&rt->dst))
+ rt = NULL;
break;
}
out:
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
return rt;
}
@@ -2434,7 +2458,7 @@ static void manage_tempaddrs(struct inet6_dev *idev,
* no temporary address currently exists.
*/
read_unlock_bh(&idev->lock);
- ipv6_create_tempaddr(ifp, NULL);
+ ipv6_create_tempaddr(ifp, NULL, false);
} else {
read_unlock_bh(&idev->lock);
}
@@ -2460,7 +2484,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
int max_addresses = in6_dev->cnf.max_addresses;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- if (in6_dev->cnf.optimistic_dad &&
+ if ((net->ipv6.devconf_all->optimistic_dad ||
+ in6_dev->cnf.optimistic_dad) &&
!net->ipv6.devconf_all->forwarding && sllao)
addr_flags |= IFA_F_OPTIMISTIC;
#endif
@@ -2474,7 +2499,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
pinfo->prefix_len,
addr_type&IPV6_ADDR_SCOPE_MASK,
addr_flags, valid_lft,
- prefered_lft);
+ prefered_lft, false, NULL);
if (IS_ERR_OR_NULL(ifp))
return -1;
@@ -2784,7 +2809,8 @@ static int inet6_addr_add(struct net *net, int ifindex,
const struct in6_addr *pfx,
const struct in6_addr *peer_pfx,
unsigned int plen, __u32 ifa_flags,
- __u32 prefered_lft, __u32 valid_lft)
+ __u32 prefered_lft, __u32 valid_lft,
+ struct netlink_ext_ack *extack)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
@@ -2843,7 +2869,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
}
ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags,
- valid_lft, prefered_lft);
+ valid_lft, prefered_lft, true, extack);
if (!IS_ERR(ifp)) {
if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
@@ -2928,7 +2954,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
rtnl_lock();
err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL,
ireq.ifr6_prefixlen, IFA_F_PERMANENT,
- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, NULL);
rtnl_unlock();
return err;
}
@@ -2958,7 +2984,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
ifp = ipv6_add_addr(idev, addr, NULL, plen,
scope, IFA_F_PERMANENT,
- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME,
+ true, NULL);
if (!IS_ERR(ifp)) {
spin_lock_bh(&ifp->lock);
ifp->flags &= ~IFA_F_TENTATIVE;
@@ -3030,9 +3057,6 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
static void init_loopback(struct net_device *dev)
{
struct inet6_dev *idev;
- struct net_device *sp_dev;
- struct inet6_ifaddr *sp_ifa;
- struct rt6_info *sp_rt;
/* ::1 */
@@ -3045,45 +3069,6 @@ static void init_loopback(struct net_device *dev)
}
add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
-
- /* Add routes to other interface's IPv6 addresses */
- for_each_netdev(dev_net(dev), sp_dev) {
- if (!strcmp(sp_dev->name, dev->name))
- continue;
-
- idev = __in6_dev_get(sp_dev);
- if (!idev)
- continue;
-
- read_lock_bh(&idev->lock);
- list_for_each_entry(sp_ifa, &idev->addr_list, if_list) {
-
- if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
- continue;
-
- if (sp_ifa->rt) {
- /* This dst has been added to garbage list when
- * lo device down, release this obsolete dst and
- * reallocate a new router for ifa.
- */
- if (!atomic_read(&sp_ifa->rt->rt6i_ref)) {
- ip6_rt_put(sp_ifa->rt);
- sp_ifa->rt = NULL;
- } else {
- continue;
- }
- }
-
- sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false);
-
- /* Failure cases are ignored */
- if (!IS_ERR(sp_rt)) {
- sp_ifa->rt = sp_rt;
- ip6_ins_rt(sp_rt);
- }
- }
- read_unlock_bh(&idev->lock);
- }
}
void addrconf_add_linklocal(struct inet6_dev *idev,
@@ -3093,13 +3078,14 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
u32 addr_flags = flags | IFA_F_PERMANENT;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- if (idev->cnf.optimistic_dad &&
+ if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad ||
+ idev->cnf.optimistic_dad) &&
!dev_net(idev->dev)->ipv6.devconf_all->forwarding)
addr_flags |= IFA_F_OPTIMISTIC;
#endif
ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL);
if (!IS_ERR(ifp)) {
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
addrconf_dad_start(ifp);
@@ -3321,15 +3307,15 @@ static void addrconf_gre_config(struct net_device *dev)
static int fixup_permanent_addr(struct inet6_dev *idev,
struct inet6_ifaddr *ifp)
{
- /* rt6i_ref == 0 means the host route was removed from the
+ /* !rt6i_node means the host route was removed from the
* FIB, for example, if 'lo' device is taken down. In that
* case regenerate the host route.
*/
- if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
+ if (!ifp->rt || !ifp->rt->rt6i_node) {
struct rt6_info *rt, *prev;
rt = addrconf_dst_alloc(idev, &ifp->addr, false);
- if (unlikely(IS_ERR(rt)))
+ if (IS_ERR(rt))
return PTR_ERR(rt);
/* ifp->rt can be accessed outside of rtnl */
@@ -3367,6 +3353,7 @@ static void addrconf_permanent_addr(struct net_device *dev)
if ((ifp->flags & IFA_F_PERMANENT) &&
fixup_permanent_addr(idev, ifp) < 0) {
write_unlock_bh(&idev->lock);
+ in6_ifa_hold(ifp);
ipv6_del_addr(ifp);
write_lock_bh(&idev->lock);
@@ -3435,7 +3422,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
/* restore routes for permanent addresses */
addrconf_permanent_addr(dev);
- if (!addrconf_qdisc_ok(dev)) {
+ if (!addrconf_link_ready(dev)) {
/* device is not ready yet. */
pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
dev->name);
@@ -3450,7 +3437,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
run_pending = 1;
}
} else if (event == NETDEV_CHANGE) {
- if (!addrconf_qdisc_ok(dev)) {
+ if (!addrconf_link_ready(dev)) {
/* device is still not ready. */
break;
}
@@ -3594,7 +3581,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
struct net *net = dev_net(dev);
struct inet6_dev *idev;
struct inet6_ifaddr *ifa, *tmp;
- struct list_head del_list;
int _keep_addr;
bool keep_addr;
int state, i;
@@ -3686,7 +3672,6 @@ restart:
*/
keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
- INIT_LIST_HEAD(&del_list);
list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
struct rt6_info *rt = NULL;
bool keep;
@@ -3695,8 +3680,6 @@ restart:
keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
!addr_is_local(&ifa->addr);
- if (!keep)
- list_move(&ifa->if_list, &del_list);
write_unlock_bh(&idev->lock);
spin_lock_bh(&ifa->lock);
@@ -3730,19 +3713,14 @@ restart:
}
write_lock_bh(&idev->lock);
+ if (!keep) {
+ list_del_rcu(&ifa->if_list);
+ in6_ifa_put(ifa);
+ }
}
write_unlock_bh(&idev->lock);
- /* now clean up addresses to be removed */
- while (!list_empty(&del_list)) {
- ifa = list_first_entry(&del_list,
- struct inet6_ifaddr, if_list);
- list_del(&ifa->if_list);
-
- in6_ifa_put(ifa);
- }
-
/* Step 5: Discard anycast and multicast list */
if (how) {
ipv6_ac_destroy_dev(idev);
@@ -3852,7 +3830,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
goto out;
if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
- idev->cnf.accept_dad < 1 ||
+ (dev_net(dev)->ipv6.devconf_all->accept_dad < 1 &&
+ idev->cnf.accept_dad < 1) ||
!(ifp->flags&IFA_F_TENTATIVE) ||
ifp->flags & IFA_F_NODAD) {
bump_id = ifp->flags & IFA_F_TENTATIVE;
@@ -3883,7 +3862,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
*/
if (ifp->flags & IFA_F_OPTIMISTIC) {
ip6_ins_rt(ifp->rt);
- if (ipv6_use_optimistic_addr(idev)) {
+ if (ipv6_use_optimistic_addr(dev_net(dev), idev)) {
/* Because optimistic nodes can use this address,
* notify listeners. If DAD fails, RTM_DELADDR is sent.
*/
@@ -3939,7 +3918,9 @@ static void addrconf_dad_work(struct work_struct *w)
action = DAD_ABORT;
ifp->state = INET6_IFADDR_STATE_POSTDAD;
- if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6 &&
+ if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 ||
+ idev->cnf.accept_dad > 1) &&
+ !idev->cnf.disable_ipv6 &&
!(ifp->flags & IFA_F_STABLE_PRIVACY)) {
struct in6_addr addr;
@@ -4121,9 +4102,9 @@ struct if6_iter_state {
static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
{
- struct inet6_ifaddr *ifa = NULL;
struct if6_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
+ struct inet6_ifaddr *ifa = NULL;
int p = 0;
/* initial bucket if pos is 0 */
@@ -4133,7 +4114,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
}
for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
- hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket],
+ hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket],
addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
@@ -4159,7 +4140,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
struct if6_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
- hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) {
+ hlist_for_each_entry_continue_rcu(ifa, addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
state->offset++;
@@ -4168,7 +4149,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
while (++state->bucket < IN6_ADDR_HSIZE) {
state->offset = 0;
- hlist_for_each_entry_rcu_bh(ifa,
+ hlist_for_each_entry_rcu(ifa,
&inet6_addr_lst[state->bucket], addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
@@ -4181,9 +4162,9 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
}
static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(rcu_bh)
+ __acquires(rcu)
{
- rcu_read_lock_bh();
+ rcu_read_lock();
return if6_get_first(seq, *pos);
}
@@ -4197,9 +4178,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void if6_seq_stop(struct seq_file *seq, void *v)
- __releases(rcu_bh)
+ __releases(rcu)
{
- rcu_read_unlock_bh();
+ rcu_read_unlock();
}
static int if6_seq_show(struct seq_file *seq, void *v)
@@ -4268,12 +4249,12 @@ void if6_proc_exit(void)
/* Check if address is a home address configured on any interface. */
int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
{
- int ret = 0;
+ unsigned int hash = inet6_addr_hash(net, addr);
struct inet6_ifaddr *ifp = NULL;
- unsigned int hash = inet6_addr_hash(addr);
+ int ret = 0;
- rcu_read_lock_bh();
- hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -4282,7 +4263,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
break;
}
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return ret;
}
#endif
@@ -4372,7 +4353,7 @@ restart:
spin_lock(&ifpub->lock);
ifpub->regen_count = 0;
spin_unlock(&ifpub->lock);
- ipv6_create_tempaddr(ifpub, ifp);
+ ipv6_create_tempaddr(ifpub, ifp, true);
in6_ifa_put(ifpub);
in6_ifa_put(ifp);
goto restart;
@@ -4608,7 +4589,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
*/
return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,
ifm->ifa_prefixlen, ifa_flags,
- preferred_lft, valid_lft);
+ preferred_lft, valid_lft, extack);
}
if (nlh->nlmsg_flags & NLM_F_EXCL ||
@@ -4935,17 +4916,15 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
extack);
if (err < 0)
- goto errout;
+ return err;
addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
- if (!addr) {
- err = -EINVAL;
- goto errout;
- }
+ if (!addr)
+ return -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifa_index)
- dev = __dev_get_by_index(net, ifm->ifa_index);
+ dev = dev_get_by_index(net, ifm->ifa_index);
ifa = ipv6_get_ifaddr(net, addr, dev, 1);
if (!ifa) {
@@ -4971,6 +4950,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
errout_ifa:
in6_ifa_put(ifa);
errout:
+ if (dev)
+ dev_put(dev);
return err;
}
@@ -4982,9 +4963,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
/* Don't send DELADDR notification for TENTATIVE address,
* since NEWADDR notification is sent only after removing
- * TENTATIVE flag.
+ * TENTATIVE flag, if DAD has not failed.
*/
- if (ifa->flags & IFA_F_TENTATIVE && event == RTM_DELADDR)
+ if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_DADFAILED) &&
+ event == RTM_DELADDR)
return;
skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
@@ -5077,6 +5059,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
+ array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
}
static inline size_t inet6_ifla6_size(void)
@@ -5556,7 +5539,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
* our DAD process, so we don't need
* to do it again
*/
- if (!(ifp->rt->rt6i_node))
+ if (!rcu_access_pointer(ifp->rt->rt6i_node))
ip6_ins_rt(ifp->rt);
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
@@ -5926,10 +5909,9 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
spin_lock(&ifa->lock);
if (ifa->rt) {
struct rt6_info *rt = ifa->rt;
- struct fib6_table *table = rt->rt6i_table;
int cpu;
- read_lock(&table->tb6_lock);
+ rcu_read_lock();
addrconf_set_nopolicy(ifa->rt, val);
if (rt->rt6i_pcpu) {
for_each_possible_cpu(cpu) {
@@ -5939,7 +5921,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
addrconf_set_nopolicy(*rtp, val);
}
}
- read_unlock(&table->tb6_lock);
+ rcu_read_unlock();
}
spin_unlock(&ifa->lock);
}
@@ -6005,6 +5987,7 @@ int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
}
static int minus_one = -1;
+static const int zero = 0;
static const int one = 1;
static const int two_five_five = 255;
@@ -6376,6 +6359,15 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = addrconf_sysctl_disable_policy,
},
{
+ .procname = "ndisc_tclass",
+ .data = &ipv6_devconf.ndisc_tclass,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&zero,
+ .extra2 = (void *)&two_five_five,
+ },
+ {
/* sentinel */
}
};
@@ -6605,21 +6597,21 @@ int __init addrconf_init(void)
rtnl_af_register(&inet6_ops);
err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
- NULL);
+ 0);
if (err < 0)
goto errout;
/* Only the first call to __rtnl_register can fail */
- __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL);
- __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL);
+ __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0);
+ __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0);
__rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
- inet6_dump_ifaddr, NULL);
+ inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED);
__rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
- inet6_dump_ifmcaddr, NULL);
+ inet6_dump_ifmcaddr, 0);
__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
- inet6_dump_ifacaddr, NULL);
+ inet6_dump_ifacaddr, 0);
__rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
- inet6_netconf_dump_devconf, NULL);
+ inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED);
ipv6_addr_label_rtnl_register();
@@ -6646,9 +6638,9 @@ void addrconf_cleanup(void)
unregister_pernet_subsys(&addrconf_ops);
ipv6_addr_label_cleanup();
- rtnl_lock();
+ rtnl_af_unregister(&inet6_ops);
- __rtnl_af_unregister(&inet6_ops);
+ rtnl_lock();
/* clean dev list */
for_each_netdev(&init_net, dev) {
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 9e3488d50b15..32b564dfd02a 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -88,7 +88,7 @@ int __ipv6_addr_type(const struct in6_addr *addr)
EXPORT_SYMBOL(__ipv6_addr_type);
static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
-static ATOMIC_NOTIFIER_HEAD(inet6addr_validator_chain);
+static BLOCKING_NOTIFIER_HEAD(inet6addr_validator_chain);
int register_inet6addr_notifier(struct notifier_block *nb)
{
@@ -110,19 +110,20 @@ EXPORT_SYMBOL(inet6addr_notifier_call_chain);
int register_inet6addr_validator_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_register(&inet6addr_validator_chain, nb);
+ return blocking_notifier_chain_register(&inet6addr_validator_chain, nb);
}
EXPORT_SYMBOL(register_inet6addr_validator_notifier);
int unregister_inet6addr_validator_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_unregister(&inet6addr_validator_chain, nb);
+ return blocking_notifier_chain_unregister(&inet6addr_validator_chain,
+ nb);
}
EXPORT_SYMBOL(unregister_inet6addr_validator_notifier);
int inet6addr_validator_notifier_call_chain(unsigned long val, void *v)
{
- return atomic_notifier_call_chain(&inet6addr_validator_chain, val, v);
+ return blocking_notifier_call_chain(&inet6addr_validator_chain, val, v);
}
EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 7a428f65c7ec..00e1f8ee08f8 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* IPv6 Address Label subsystem
* for the IPv6 "Default" Source Address Selection
@@ -18,7 +19,6 @@
#include <linux/if_addrlabel.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
-#include <linux/refcount.h>
#if 0
#define ADDRLABEL(x...) printk(x)
@@ -30,30 +30,15 @@
* Policy Table
*/
struct ip6addrlbl_entry {
- possible_net_t lbl_net;
struct in6_addr prefix;
int prefixlen;
int ifindex;
int addrtype;
u32 label;
struct hlist_node list;
- refcount_t refcnt;
struct rcu_head rcu;
};
-static struct ip6addrlbl_table
-{
- struct hlist_head head;
- spinlock_t lock;
- u32 seq;
-} ip6addrlbl_table;
-
-static inline
-struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
-{
- return read_pnet(&lbl->lbl_net);
-}
-
/*
* Default policy table (RFC6724 + extensions)
*
@@ -125,36 +110,11 @@ static const __net_initconst struct ip6addrlbl_init_table
}
};
-/* Object management */
-static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
-{
- kfree(p);
-}
-
-static void ip6addrlbl_free_rcu(struct rcu_head *h)
-{
- ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
-}
-
-static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p)
-{
- return refcount_inc_not_zero(&p->refcnt);
-}
-
-static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
-{
- if (refcount_dec_and_test(&p->refcnt))
- call_rcu(&p->rcu, ip6addrlbl_free_rcu);
-}
-
/* Find label */
-static bool __ip6addrlbl_match(struct net *net,
- const struct ip6addrlbl_entry *p,
+static bool __ip6addrlbl_match(const struct ip6addrlbl_entry *p,
const struct in6_addr *addr,
int addrtype, int ifindex)
{
- if (!net_eq(ip6addrlbl_net(p), net))
- return false;
if (p->ifindex && p->ifindex != ifindex)
return false;
if (p->addrtype && p->addrtype != addrtype)
@@ -169,8 +129,9 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
int type, int ifindex)
{
struct ip6addrlbl_entry *p;
- hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
- if (__ip6addrlbl_match(net, p, addr, type, ifindex))
+
+ hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
+ if (__ip6addrlbl_match(p, addr, type, ifindex))
return p;
}
return NULL;
@@ -196,8 +157,7 @@ u32 ipv6_addr_label(struct net *net,
}
/* allocate one entry */
-static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
- const struct in6_addr *prefix,
+static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
int prefixlen, int ifindex,
u32 label)
{
@@ -236,24 +196,22 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
newp->addrtype = addrtype;
newp->label = label;
INIT_HLIST_NODE(&newp->list);
- write_pnet(&newp->lbl_net, net);
- refcount_set(&newp->refcnt, 1);
return newp;
}
/* add a label */
-static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
+static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
+ int replace)
{
- struct hlist_node *n;
struct ip6addrlbl_entry *last = NULL, *p = NULL;
+ struct hlist_node *n;
int ret = 0;
ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
replace);
- hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
+ hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
if (p->prefixlen == newp->prefixlen &&
- net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
p->ifindex == newp->ifindex &&
ipv6_addr_equal(&p->prefix, &newp->prefix)) {
if (!replace) {
@@ -261,7 +219,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
goto out;
}
hlist_replace_rcu(&p->list, &newp->list);
- ip6addrlbl_put(p);
+ kfree_rcu(p, rcu);
goto out;
} else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
(p->prefixlen < newp->prefixlen)) {
@@ -273,10 +231,10 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
if (last)
hlist_add_behind_rcu(&newp->list, &last->list);
else
- hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
+ hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head);
out:
if (!ret)
- ip6addrlbl_table.seq++;
+ net->ipv6.ip6addrlbl_table.seq++;
return ret;
}
@@ -292,14 +250,14 @@ static int ip6addrlbl_add(struct net *net,
__func__, prefix, prefixlen, ifindex, (unsigned int)label,
replace);
- newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label);
+ newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
if (IS_ERR(newp))
return PTR_ERR(newp);
- spin_lock(&ip6addrlbl_table.lock);
- ret = __ip6addrlbl_add(newp, replace);
- spin_unlock(&ip6addrlbl_table.lock);
+ spin_lock(&net->ipv6.ip6addrlbl_table.lock);
+ ret = __ip6addrlbl_add(net, newp, replace);
+ spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
if (ret)
- ip6addrlbl_free(newp);
+ kfree(newp);
return ret;
}
@@ -315,13 +273,12 @@ static int __ip6addrlbl_del(struct net *net,
ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
__func__, prefix, prefixlen, ifindex);
- hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
+ hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
if (p->prefixlen == prefixlen &&
- net_eq(ip6addrlbl_net(p), net) &&
p->ifindex == ifindex &&
ipv6_addr_equal(&p->prefix, prefix)) {
hlist_del_rcu(&p->list);
- ip6addrlbl_put(p);
+ kfree_rcu(p, rcu);
ret = 0;
break;
}
@@ -340,9 +297,9 @@ static int ip6addrlbl_del(struct net *net,
__func__, prefix, prefixlen, ifindex);
ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
- spin_lock(&ip6addrlbl_table.lock);
+ spin_lock(&net->ipv6.ip6addrlbl_table.lock);
ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex);
- spin_unlock(&ip6addrlbl_table.lock);
+ spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
return ret;
}
@@ -354,6 +311,9 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
ADDRLABEL(KERN_DEBUG "%s\n", __func__);
+ spin_lock_init(&net->ipv6.ip6addrlbl_table.lock);
+ INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head);
+
for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
int ret = ip6addrlbl_add(net,
ip6addrlbl_init_table[i].prefix,
@@ -373,14 +333,12 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net)
struct hlist_node *n;
/* Remove all labels belonging to the exiting net */
- spin_lock(&ip6addrlbl_table.lock);
- hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
- if (net_eq(ip6addrlbl_net(p), net)) {
- hlist_del_rcu(&p->list);
- ip6addrlbl_put(p);
- }
+ spin_lock(&net->ipv6.ip6addrlbl_table.lock);
+ hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
+ hlist_del_rcu(&p->list);
+ kfree_rcu(p, rcu);
}
- spin_unlock(&ip6addrlbl_table.lock);
+ spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
}
static struct pernet_operations ipv6_addr_label_ops = {
@@ -390,8 +348,6 @@ static struct pernet_operations ipv6_addr_label_ops = {
int __init ipv6_addr_label_init(void)
{
- spin_lock_init(&ip6addrlbl_table.lock);
-
return register_pernet_subsys(&ipv6_addr_label_ops);
}
@@ -405,6 +361,18 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
[IFAL_LABEL] = { .len = sizeof(u32), },
};
+static bool addrlbl_ifindex_exists(struct net *net, int ifindex)
+{
+
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, ifindex);
+ rcu_read_unlock();
+
+ return dev != NULL;
+}
+
static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -439,7 +407,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
switch (nlh->nlmsg_type) {
case RTM_NEWADDRLABEL:
if (ifal->ifal_index &&
- !__dev_get_by_index(net, ifal->ifal_index))
+ !addrlbl_ifindex_exists(net, ifal->ifal_index))
return -EINVAL;
err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen,
@@ -498,11 +466,10 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
int err;
rcu_read_lock();
- hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
- if (idx >= s_idx &&
- net_eq(ip6addrlbl_net(p), net)) {
+ hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
+ if (idx >= s_idx) {
err = ip6addrlbl_fill(skb, p,
- ip6addrlbl_table.seq,
+ net->ipv6.ip6addrlbl_table.seq,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWADDRLABEL,
@@ -548,55 +515,45 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return -EINVAL;
if (ifal->ifal_index &&
- !__dev_get_by_index(net, ifal->ifal_index))
+ !addrlbl_ifindex_exists(net, ifal->ifal_index))
return -EINVAL;
if (!tb[IFAL_ADDRESS])
return -EINVAL;
addr = nla_data(tb[IFAL_ADDRESS]);
- rcu_read_lock();
- p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
- if (p && !ip6addrlbl_hold(p))
- p = NULL;
- lseq = ip6addrlbl_table.seq;
- rcu_read_unlock();
-
- if (!p) {
- err = -ESRCH;
- goto out;
- }
-
skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
- if (!skb) {
- ip6addrlbl_put(p);
+ if (!skb)
return -ENOBUFS;
- }
- err = ip6addrlbl_fill(skb, p, lseq,
- NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- RTM_NEWADDRLABEL, 0);
+ err = -ESRCH;
- ip6addrlbl_put(p);
+ rcu_read_lock();
+ p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
+ lseq = net->ipv6.ip6addrlbl_table.seq;
+ if (p)
+ err = ip6addrlbl_fill(skb, p, lseq,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWADDRLABEL, 0);
+ rcu_read_unlock();
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
- goto out;
+ } else {
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
}
-
- err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
-out:
return err;
}
void __init ipv6_addr_label_rtnl_register(void)
{
__rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
- NULL, NULL);
+ NULL, RTNL_FLAG_DOIT_UNLOCKED);
__rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
- NULL, NULL);
+ NULL, RTNL_FLAG_DOIT_UNLOCKED);
__rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
- ip6addrlbl_dump, NULL);
+ ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a88b5b5b7955..c26f71234b9c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,8 @@ lookup_protocol:
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk));
+ np->autoflowlabel = ip6_default_np_autolabel(net);
+ np->repflow = net->ipv6.sysctl.flowlabel_reflect;
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
/* Init the ipv4 part of the socket since we can have sockets
@@ -554,6 +555,8 @@ const struct proto_ops inet6_stream_ops = {
.recvmsg = inet_recvmsg, /* ok */
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
+ .sendmsg_locked = tcp_sendmsg_locked,
+ .sendpage_locked = tcp_sendpage_locked,
.splice_read = tcp_splice_read,
.read_sock = tcp_read_sock,
.peek_len = tcp_peek_len,
@@ -807,6 +810,10 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.idgen_retries = 3;
net->ipv6.sysctl.idgen_delay = 1 * HZ;
net->ipv6.sysctl.flowlabel_state_ranges = 0;
+ net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT;
+ net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT;
+ net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN;
+ net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN;
atomic_set(&net->ipv6.fib6_sernum, 1);
err = ipv6_init_mibs(net);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 7802b72196f3..78c974391567 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -271,6 +271,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
case NEXTHDR_DEST:
if (dir == XFRM_POLICY_OUT)
ipv6_rearrange_destopt(iph, exthdr.opth);
+ /* fall through */
case NEXTHDR_HOP:
if (!zero_out_mutable_opts(exthdr.opth)) {
net_dbg_ratelimited("overrun %sopts\n",
@@ -443,7 +444,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
if (err == -EINPROGRESS)
goto out;
- if (err == -EBUSY)
+ if (err == -ENOSPC)
err = NET_XMIT_DROP;
goto out_free;
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 9ed35473dcb5..a902ff8f59be 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -226,7 +226,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
int tailen = esp->tailen;
if (!skb_cloned(skb)) {
- if (tailen <= skb_availroom(skb)) {
+ if (tailen <= skb_tailroom(skb)) {
nfrags = 1;
trailer = skb;
tail = skb_tail_pointer(trailer);
@@ -260,8 +260,6 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
kunmap_atomic(vaddr);
- spin_unlock_bh(&x->lock);
-
nfrags = skb_shinfo(skb)->nr_frags;
__skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@ -269,6 +267,9 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
skb_shinfo(skb)->nr_frags = ++nfrags;
pfrag->offset = pfrag->offset + allocsize;
+
+ spin_unlock_bh(&x->lock);
+
nfrags++;
skb->len += tailen;
@@ -345,7 +346,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
(unsigned char *)esph - skb->data,
assoclen + ivlen + esp->clen + alen);
if (unlikely(err < 0))
- goto error;
+ goto error_free;
if (!esp->inplace) {
int allocsize;
@@ -356,7 +357,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
spin_lock_bh(&x->lock);
if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
spin_unlock_bh(&x->lock);
- goto error;
+ goto error_free;
}
skb_shinfo(skb)->nr_frags = 1;
@@ -373,7 +374,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
(unsigned char *)esph - skb->data,
assoclen + ivlen + esp->clen + alen);
if (unlikely(err < 0))
- goto error;
+ goto error_free;
}
if ((x->props.flags & XFRM_STATE_ESN))
@@ -395,7 +396,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
case -EINPROGRESS:
goto error;
- case -EBUSY:
+ case -ENOSPC:
err = NET_XMIT_DROP;
break;
@@ -406,8 +407,9 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
if (sg != dsg)
esp_ssg_unref(x, tmp);
- kfree(tmp);
+error_free:
+ kfree(tmp);
error:
return err;
}
@@ -461,28 +463,30 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
return esp6_output_tail(x, skb, &esp);
}
-int esp6_input_done2(struct sk_buff *skb, int err)
+static inline int esp_remove_trailer(struct sk_buff *skb)
{
struct xfrm_state *x = xfrm_input_state(skb);
struct xfrm_offload *xo = xfrm_offload(skb);
struct crypto_aead *aead = x->data;
- int alen = crypto_aead_authsize(aead);
- int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
- int elen = skb->len - hlen;
- int hdr_len = skb_network_header_len(skb);
- int padlen;
+ int alen, hlen, elen;
+ int padlen, trimlen;
+ __wsum csumdiff;
u8 nexthdr[2];
+ int ret;
- if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
- kfree(ESP_SKB_CB(skb)->tmp);
+ alen = crypto_aead_authsize(aead);
+ hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+ elen = skb->len - hlen;
- if (unlikely(err))
+ if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
+ ret = xo->proto;
goto out;
+ }
- if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
- BUG();
+ ret = skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2);
+ BUG_ON(ret);
- err = -EINVAL;
+ ret = -EINVAL;
padlen = nexthdr[0];
if (padlen + 2 + alen >= elen) {
net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n",
@@ -490,17 +494,46 @@ int esp6_input_done2(struct sk_buff *skb, int err)
goto out;
}
- /* ... check padding bits here. Silly. :-) */
+ trimlen = alen + padlen + 2;
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0);
+ skb->csum = csum_block_sub(skb->csum, csumdiff,
+ skb->len - trimlen);
+ }
+ pskb_trim(skb, skb->len - trimlen);
+
+ ret = nexthdr[1];
+
+out:
+ return ret;
+}
+
+int esp6_input_done2(struct sk_buff *skb, int err)
+{
+ struct xfrm_state *x = xfrm_input_state(skb);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct crypto_aead *aead = x->data;
+ int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+ int hdr_len = skb_network_header_len(skb);
- pskb_trim(skb, skb->len - alen - padlen - 2);
- __skb_pull(skb, hlen);
+ if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
+ kfree(ESP_SKB_CB(skb)->tmp);
+
+ if (unlikely(err))
+ goto out;
+
+ err = esp_remove_trailer(skb);
+ if (unlikely(err < 0))
+ goto out;
+
+ skb_postpull_rcsum(skb, skb_network_header(skb),
+ skb_network_header_len(skb));
+ skb_pull_rcsum(skb, hlen);
if (x->props.mode == XFRM_MODE_TUNNEL)
skb_reset_transport_header(skb);
else
skb_set_transport_header(skb, -hdr_len);
- err = nexthdr[1];
-
/* RFC4303: Drop dummy packets without any error */
if (err == IPPROTO_NONE)
err = -EINVAL;
@@ -526,14 +559,14 @@ static void esp_input_restore_header(struct sk_buff *skb)
static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
{
struct xfrm_state *x = xfrm_input_state(skb);
- struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
/* For ESN we move the header forward by 4 bytes to
* accomodate the high bits. We will move it back after
* decryption.
*/
if ((x->props.flags & XFRM_STATE_ESN)) {
- esph = skb_push(skb, 4);
+ struct ip_esp_hdr *esph = skb_push(skb, 4);
+
*seqhi = esph->spi;
esph->spi = esph->seq_no;
esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index f02f131f6435..333a478aa161 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -209,11 +209,13 @@ out:
static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
{
struct crypto_aead *aead = x->data;
+ struct xfrm_offload *xo = xfrm_offload(skb);
if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead)))
return -EINVAL;
- skb->ip_summed = CHECKSUM_NONE;
+ if (!(xo->flags & CRYPTO_DONE))
+ skb->ip_summed = CHECKSUM_NONE;
return esp6_input_done2(skb, 0);
}
@@ -286,7 +288,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
err = esp6_output_tail(x, skb, &esp);
- if (err < 0)
+ if (err)
return err;
secpath_reset(skb);
@@ -332,3 +334,4 @@ module_init(esp6_offload_init);
module_exit(esp6_offload_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
+MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET6, XFRM_PROTO_ESP);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 3cec529c6113..83bd75713535 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -74,8 +74,20 @@ struct tlvtype_proc {
/* An unknown option is detected, decide what to do */
-static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
+static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff,
+ bool disallow_unknowns)
{
+ if (disallow_unknowns) {
+ /* If unknown TLVs are disallowed by configuration
+ * then always silently drop packet. Note this also
+ * means no ICMP parameter problem is sent which
+ * could be a good property to mitigate a reflection DOS
+ * attack.
+ */
+
+ goto drop;
+ }
+
switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
case 0: /* ignore */
return true;
@@ -89,25 +101,36 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
*/
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr))
break;
+ /* fall through */
case 2: /* send ICMP PARM PROB regardless and drop packet */
icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
return false;
}
+drop:
kfree_skb(skb);
return false;
}
/* Parse tlv encoded option header (hop-by-hop or destination) */
-static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
+static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
+ struct sk_buff *skb,
+ int max_count)
{
- const struct tlvtype_proc *curr;
+ int len = (skb_transport_header(skb)[1] + 1) << 3;
const unsigned char *nh = skb_network_header(skb);
int off = skb_network_header_len(skb);
- int len = (skb_transport_header(skb)[1] + 1) << 3;
+ const struct tlvtype_proc *curr;
+ bool disallow_unknowns = false;
+ int tlv_count = 0;
int padlen = 0;
+ if (unlikely(max_count < 0)) {
+ disallow_unknowns = true;
+ max_count = -max_count;
+ }
+
if (skb_transport_offset(skb) + len > skb_headlen(skb))
goto bad;
@@ -148,6 +171,11 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
default: /* Other TLV code so scan list */
if (optlen > len)
goto bad;
+
+ tlv_count++;
+ if (tlv_count > max_count)
+ goto bad;
+
for (curr = procs; curr->type >= 0; curr++) {
if (curr->type == nh[off]) {
/* type specific length/alignment
@@ -158,10 +186,10 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
break;
}
}
- if (curr->type < 0) {
- if (ip6_tlvopt_unknown(skb, off) == 0)
- return false;
- }
+ if (curr->type < 0 &&
+ !ip6_tlvopt_unknown(skb, off, disallow_unknowns))
+ return false;
+
padlen = 0;
break;
}
@@ -186,7 +214,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
struct ipv6_destopt_hao *hao;
struct inet6_skb_parm *opt = IP6CB(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- struct in6_addr tmp_addr;
int ret;
if (opt->dsthao) {
@@ -228,9 +255,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
- tmp_addr = ipv6h->saddr;
- ipv6h->saddr = hao->addr;
- hao->addr = tmp_addr;
+ swap(ipv6h->saddr, hao->addr);
if (skb->tstamp == 0)
__net_timestamp(skb);
@@ -260,23 +285,31 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
__u16 dstbuf;
#endif
struct dst_entry *dst = skb_dst(skb);
+ struct net *net = dev_net(skb->dev);
+ int extlen;
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
__IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
IPSTATS_MIB_INHDRERRORS);
+fail_and_free:
kfree_skb(skb);
return -1;
}
+ extlen = (skb_transport_header(skb)[1] + 1) << 3;
+ if (extlen > net->ipv6.sysctl.max_dst_opts_len)
+ goto fail_and_free;
+
opt->lastopt = opt->dst1 = skb_network_header_len(skb);
#if IS_ENABLED(CONFIG_IPV6_MIP6)
dstbuf = opt->dst1;
#endif
- if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
- skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
+ if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
+ init_net.ipv6.sysctl.max_dst_opts_cnt)) {
+ skb->transport_header += extlen;
opt = IP6CB(skb);
#if IS_ENABLED(CONFIG_IPV6_MIP6)
opt->nhoff = dstbuf;
@@ -805,6 +838,8 @@ static const struct tlvtype_proc tlvprochopopt_lst[] = {
int ipv6_parse_hopopts(struct sk_buff *skb)
{
struct inet6_skb_parm *opt = IP6CB(skb);
+ struct net *net = dev_net(skb->dev);
+ int extlen;
/*
* skb_network_header(skb) is equal to skb->data, and
@@ -815,13 +850,19 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
!pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
+fail_and_free:
kfree_skb(skb);
return -1;
}
+ extlen = (skb_transport_header(skb)[1] + 1) << 3;
+ if (extlen > net->ipv6.sysctl.max_hbh_opts_len)
+ goto fail_and_free;
+
opt->flags |= IP6SKB_HOPBYHOP;
- if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
- skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
+ if (ip6_parse_tlv(tlvprochopopt_lst, skb,
+ init_net.ipv6.sysctl.max_hbh_opts_cnt)) {
+ skb->transport_header += extlen;
opt = IP6CB(skb);
opt->nhoff = sizeof(struct ipv6hdr);
return 1;
@@ -882,7 +923,7 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
(hops - 1) * sizeof(struct in6_addr));
sr_phdr->segments[0] = **addr_p;
- *addr_p = &sr_ihdr->segments[hops - 1];
+ *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(sr_phdr)) {
@@ -1174,7 +1215,7 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
{
struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt;
- fl6->daddr = srh->segments[srh->first_segment];
+ fl6->daddr = srh->segments[srh->segments_left];
break;
}
default:
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 305e2ed730bf..11025f8d124b 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -99,7 +99,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
break;
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH)
- hdrlen = (hp->hdrlen+2)<<2;
+ hdrlen = ipv6_authlen(hp);
else
hdrlen = ipv6_optlen(hp);
@@ -187,7 +187,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
{
unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- unsigned int len;
bool found;
if (fragoff)
@@ -204,7 +203,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
start = *offset + sizeof(struct ipv6hdr);
nexthdr = ip6->nexthdr;
}
- len = skb->len - start;
do {
struct ipv6_opt_hdr _hdr, *hp;
@@ -273,7 +271,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
if (!found) {
nexthdr = hp->nexthdr;
- len -= hdrlen;
start += hdrlen;
}
} while (!found);
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
new file mode 100644
index 000000000000..05f82baaa99e
--- /dev/null
+++ b/net/ipv6/fib6_notifier.c
@@ -0,0 +1,63 @@
+#include <linux/notifier.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+#include <net/netns/ipv6.h>
+#include <net/ip6_fib.h>
+
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->family = AF_INET6;
+ return call_fib_notifier(nb, net, event_type, info);
+}
+
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->family = AF_INET6;
+ return call_fib_notifiers(net, event_type, info);
+}
+
+static unsigned int fib6_seq_read(struct net *net)
+{
+ return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
+}
+
+static int fib6_dump(struct net *net, struct notifier_block *nb)
+{
+ int err;
+
+ err = fib6_rules_dump(net, nb);
+ if (err)
+ return err;
+
+ return fib6_tables_dump(net, nb);
+}
+
+static const struct fib_notifier_ops fib6_notifier_ops_template = {
+ .family = AF_INET6,
+ .fib_seq_read = fib6_seq_read,
+ .fib_dump = fib6_dump,
+ .owner = THIS_MODULE,
+};
+
+int __net_init fib6_notifier_init(struct net *net)
+{
+ struct fib_notifier_ops *ops;
+
+ ops = fib_notifier_ops_register(&fib6_notifier_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+ net->ipv6.notifier_ops = ops;
+
+ return 0;
+}
+
+void __net_exit fib6_notifier_exit(struct net *net)
+{
+ fib_notifier_ops_unregister(net->ipv6.notifier_ops);
+}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ec849d88a662..b240f24a6e52 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -14,6 +14,7 @@
*/
#include <linux/netdevice.h>
+#include <linux/notifier.h>
#include <linux/export.h>
#include <net/fib_rules.h>
@@ -29,22 +30,65 @@ struct fib6_rule {
u8 tclass;
};
-struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
- int flags, pol_lookup_t lookup)
+static bool fib6_rule_matchall(const struct fib_rule *rule)
+{
+ struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
+
+ if (r->dst.plen || r->src.plen || r->tclass)
+ return false;
+ return fib_rule_matchall(rule);
+}
+
+bool fib6_rule_default(const struct fib_rule *rule)
{
- struct fib_lookup_arg arg = {
- .lookup_ptr = lookup,
- .flags = FIB_LOOKUP_NOREF,
- };
+ if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
+ rule->l3mdev)
+ return false;
+ if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN)
+ return false;
+ return true;
+}
+EXPORT_SYMBOL_GPL(fib6_rule_default);
- /* update flow if oif or iif point to device enslaved to l3mdev */
- l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return fib_rules_dump(net, nb, AF_INET6);
+}
- fib_rules_lookup(net->ipv6.fib6_rules_ops,
- flowi6_to_flowi(fl6), flags, &arg);
+unsigned int fib6_rules_seq_read(struct net *net)
+{
+ return fib_rules_seq_read(net, AF_INET6);
+}
- if (arg.result)
- return arg.result;
+struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ int flags, pol_lookup_t lookup)
+{
+ if (net->ipv6.fib6_has_custom_rules) {
+ struct fib_lookup_arg arg = {
+ .lookup_ptr = lookup,
+ .flags = FIB_LOOKUP_NOREF,
+ };
+
+ /* update flow if oif or iif point to device enslaved to l3mdev */
+ l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+
+ fib_rules_lookup(net->ipv6.fib6_rules_ops,
+ flowi6_to_flowi(fl6), flags, &arg);
+
+ if (arg.result)
+ return arg.result;
+ } else {
+ struct rt6_info *rt;
+
+ rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+ if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
+ return &rt->dst;
+ ip6_rt_put(rt);
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ if (rt->dst.error != -EAGAIN)
+ return &rt->dst;
+ ip6_rt_put(rt);
+ }
dst_hold(&net->ipv6.ip6_null_entry->dst);
return &net->ipv6.ip6_null_entry->dst;
@@ -214,6 +258,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule6->dst.plen = frh->dst_len;
rule6->tclass = frh->tos;
+ net->ipv6.fib6_has_custom_rules = true;
err = 0;
errout:
return err;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 8d7b113958b1..6ae5dd3f4d0d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -250,16 +250,15 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset)
return (*op & 0xC0) == 0x80;
}
-int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
- struct icmp6hdr *thdr, int len)
+void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+ struct icmp6hdr *thdr, int len)
{
struct sk_buff *skb;
struct icmp6hdr *icmp6h;
- int err = 0;
skb = skb_peek(&sk->sk_write_queue);
if (!skb)
- goto out;
+ return;
icmp6h = icmp6_hdr(skb);
memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
@@ -287,8 +286,6 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
tmp_csum);
}
ip6_push_pending_frames(sk);
-out:
- return err;
}
struct icmpv6_msg {
@@ -399,6 +396,24 @@ relookup_failed:
return ERR_PTR(err);
}
+static int icmp6_iif(const struct sk_buff *skb)
+{
+ int iif = skb->dev->ifindex;
+
+ /* for local traffic to local address, skb dev is the loopback
+ * device. Check if there is a dst attached to the skb and if so
+ * get the real device index.
+ */
+ if (unlikely(iif == LOOPBACK_IFINDEX)) {
+ const struct rt6_info *rt6 = skb_rt6_info(skb);
+
+ if (rt6)
+ iif = rt6->rt6i_idev->dev->ifindex;
+ }
+
+ return iif;
+}
+
/*
* Send an ICMP message in response to a packet in error
*/
@@ -420,7 +435,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
int iif = 0;
int addr_type = 0;
int len;
- int err = 0;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
if ((u8 *)hdr < skb->head ||
@@ -459,9 +473,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
* Source addr check
*/
- if (__ipv6_addr_needs_scope_id(addr_type))
- iif = skb->dev->ifindex;
- else {
+ if (__ipv6_addr_needs_scope_id(addr_type)) {
+ iif = icmp6_iif(skb);
+ } else {
dst = skb_dst(skb);
iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
}
@@ -508,6 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL);
+ fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
@@ -556,17 +571,16 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
- err = ip6_append_data(sk, icmpv6_getfrag, &msg,
- len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr),
- &ipc6, &fl6, (struct rt6_info *)dst,
- MSG_DONTWAIT, &sockc_unused);
- if (err) {
+ if (ip6_append_data(sk, icmpv6_getfrag, &msg,
+ len + sizeof(struct icmp6hdr),
+ sizeof(struct icmp6hdr),
+ &ipc6, &fl6, (struct rt6_info *)dst,
+ MSG_DONTWAIT, &sockc_unused)) {
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
- err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
- len + sizeof(struct icmp6hdr));
+ icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+ len + sizeof(struct icmp6hdr));
}
rcu_read_unlock();
out_dst_release:
@@ -663,7 +677,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
struct icmpv6_msg msg;
struct dst_entry *dst;
struct ipcm6_cookie ipc6;
- int err = 0;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
struct sockcm_cookie sockc_unused = {0};
@@ -682,7 +695,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl6.daddr = ipv6_hdr(skb)->saddr;
if (saddr)
fl6.saddr = *saddr;
- fl6.flowi6_oif = skb->dev->ifindex;
+ fl6.flowi6_oif = icmp6_iif(skb);
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
fl6.flowi6_mark = mark;
fl6.flowi6_uid = sock_net_uid(net, NULL);
@@ -700,8 +713,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- err = ip6_dst_lookup(net, sk, &dst, &fl6);
- if (err)
+ if (ip6_dst_lookup(net, sk, &dst, &fl6))
goto out;
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
if (IS_ERR(dst))
@@ -718,17 +730,16 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
ipc6.dontfrag = np->dontfrag;
ipc6.opt = NULL;
- err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr), &ipc6, &fl6,
- (struct rt6_info *)dst, MSG_DONTWAIT,
- &sockc_unused);
-
- if (err) {
+ if (ip6_append_data(sk, icmpv6_getfrag, &msg,
+ skb->len + sizeof(struct icmp6hdr),
+ sizeof(struct icmp6hdr), &ipc6, &fl6,
+ (struct rt6_info *)dst, MSG_DONTWAIT,
+ &sockc_unused)) {
__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
- err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
- skb->len + sizeof(struct icmp6hdr));
+ icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+ skb->len + sizeof(struct icmp6hdr));
}
dst_release(dst);
out:
@@ -853,10 +864,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
goto discard_it;
hdr = icmp6_hdr(skb);
- /*
- * Drop through to notify
- */
-
+ /* to notify */
+ /* fall through */
case ICMPV6_DEST_UNREACH:
case ICMPV6_TIME_EXCEED:
case ICMPV6_PARAMPROB:
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index e0170f62bc39..3c7a11b62334 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -55,17 +55,6 @@ struct ila_identifier {
};
};
-enum {
- ILA_ATYPE_IID = 0,
- ILA_ATYPE_LUID,
- ILA_ATYPE_VIRT_V4,
- ILA_ATYPE_VIRT_UNI_V6,
- ILA_ATYPE_VIRT_MULTI_V6,
- ILA_ATYPE_RSVD_1,
- ILA_ATYPE_RSVD_2,
- ILA_ATYPE_RSVD_3,
-};
-
#define CSUM_NEUTRAL_FLAG htonl(0x10000000)
struct ila_addr {
@@ -93,6 +82,7 @@ struct ila_params {
struct ila_locator locator_match;
__wsum csum_diff;
u8 csum_mode;
+ u8 ident_type;
};
static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index aba0998ddbfb..8c88ecf29b93 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -13,30 +13,37 @@
#include <uapi/linux/ila.h>
#include "ila.h"
-static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+void ila_init_saved_csum(struct ila_params *p)
{
- struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+ if (!p->locator_match.v64)
+ return;
+ p->csum_diff = compute_csum_diff8(
+ (__be32 *)&p->locator,
+ (__be32 *)&p->locator_match);
+}
+
+static __wsum get_csum_diff_iaddr(struct ila_addr *iaddr, struct ila_params *p)
+{
if (p->locator_match.v64)
return p->csum_diff;
else
- return compute_csum_diff8((__be32 *)&iaddr->loc,
- (__be32 *)&p->locator);
+ return compute_csum_diff8((__be32 *)&p->locator,
+ (__be32 *)&iaddr->loc);
}
-static void ila_csum_do_neutral(struct ila_addr *iaddr,
- struct ila_params *p)
+static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+{
+ return get_csum_diff_iaddr(ila_a2i(&ip6h->daddr), p);
+}
+
+static void ila_csum_do_neutral_fmt(struct ila_addr *iaddr,
+ struct ila_params *p)
{
__sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
__wsum diff, fval;
- /* Check if checksum adjust value has been cached */
- if (p->locator_match.v64) {
- diff = p->csum_diff;
- } else {
- diff = compute_csum_diff8((__be32 *)&p->locator,
- (__be32 *)iaddr);
- }
+ diff = get_csum_diff_iaddr(iaddr, p);
fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG);
@@ -53,13 +60,23 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr,
iaddr->ident.csum_neutral ^= 1;
}
-static void ila_csum_adjust_transport(struct sk_buff *skb,
+static void ila_csum_do_neutral_nofmt(struct ila_addr *iaddr,
struct ila_params *p)
{
+ __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
__wsum diff;
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
- struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+
+ diff = get_csum_diff_iaddr(iaddr, p);
+
+ *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust)));
+}
+
+static void ila_csum_adjust_transport(struct sk_buff *skb,
+ struct ila_params *p)
+{
size_t nhoff = sizeof(struct ipv6hdr);
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ __wsum diff;
switch (ip6h->nexthdr) {
case NEXTHDR_TCP:
@@ -98,52 +115,45 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
}
break;
}
-
- /* Now change destination address */
- iaddr->loc = p->locator;
}
void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
- bool set_csum_neutral)
+ bool sir2ila)
{
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
- /* First deal with the transport checksum */
- if (ila_csum_neutral_set(iaddr->ident)) {
- /* C-bit is set in the locator indicating that this
- * is a locator being translated to a SIR address.
- * Perform (receiver) checksum-neutral translation.
- */
- if (!set_csum_neutral)
- ila_csum_do_neutral(iaddr, p);
- } else {
- switch (p->csum_mode) {
- case ILA_CSUM_ADJUST_TRANSPORT:
- ila_csum_adjust_transport(skb, p);
- break;
- case ILA_CSUM_NEUTRAL_MAP:
- ila_csum_do_neutral(iaddr, p);
- break;
- case ILA_CSUM_NO_ACTION:
+ switch (p->csum_mode) {
+ case ILA_CSUM_ADJUST_TRANSPORT:
+ ila_csum_adjust_transport(skb, p);
+ break;
+ case ILA_CSUM_NEUTRAL_MAP:
+ if (sir2ila) {
+ if (WARN_ON(ila_csum_neutral_set(iaddr->ident))) {
+ /* Checksum flag should never be
+ * set in a formatted SIR address.
+ */
+ break;
+ }
+ } else if (!ila_csum_neutral_set(iaddr->ident)) {
+ /* ILA to SIR translation and C-bit isn't
+ * set so we're good.
+ */
break;
}
+ ila_csum_do_neutral_fmt(iaddr, p);
+ break;
+ case ILA_CSUM_NEUTRAL_MAP_AUTO:
+ ila_csum_do_neutral_nofmt(iaddr, p);
+ break;
+ case ILA_CSUM_NO_ACTION:
+ break;
}
/* Now change destination address */
iaddr->loc = p->locator;
}
-void ila_init_saved_csum(struct ila_params *p)
-{
- if (!p->locator_match.v64)
- return;
-
- p->csum_diff = compute_csum_diff8(
- (__be32 *)&p->locator,
- (__be32 *)&p->locator_match);
-}
-
static int __init ila_init(void)
{
int ret;
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 0c02a09bc351..3d56a2fb6f86 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/errno.h>
#include <linux/ip.h>
#include <linux/kernel.h>
@@ -19,6 +20,7 @@ struct ila_lwt {
struct ila_params p;
struct dst_cache dst_cache;
u32 connected : 1;
+ u32 lwt_output : 1;
};
static inline struct ila_lwt *ila_lwt_lwtunnel(
@@ -44,8 +46,10 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate),
- true);
+ if (ilwt->lwt_output)
+ ila_update_ipv6_locator(skb,
+ ila_params_lwtunnel(orig_dst->lwtstate),
+ true);
if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) {
/* Already have a next hop address in route, no need for
@@ -97,11 +101,15 @@ drop:
static int ila_input(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
+ struct ila_lwt *ilwt = ila_lwt_lwtunnel(dst->lwtstate);
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false);
+ if (!ilwt->lwt_output)
+ ila_update_ipv6_locator(skb,
+ ila_params_lwtunnel(dst->lwtstate),
+ false);
return dst->lwtstate->orig_input(skb);
@@ -113,6 +121,8 @@ drop:
static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
[ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
+ [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
+ [ILA_ATTR_HOOK_TYPE] = { .type = NLA_U8, },
};
static int ila_build_state(struct nlattr *nla,
@@ -126,33 +136,84 @@ static int ila_build_state(struct nlattr *nla,
struct lwtunnel_state *newts;
const struct fib6_config *cfg6 = cfg;
struct ila_addr *iaddr;
+ u8 ident_type = ILA_ATYPE_USE_FORMAT;
+ u8 hook_type = ILA_HOOK_ROUTE_OUTPUT;
+ u8 csum_mode = ILA_CSUM_NO_ACTION;
+ bool lwt_output = true;
+ u8 eff_ident_type;
int ret;
if (family != AF_INET6)
return -EINVAL;
- if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
- /* Need to have full locator and at least type field
- * included in destination
- */
+ ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[ILA_ATTR_LOCATOR])
return -EINVAL;
- }
iaddr = (struct ila_addr *)&cfg6->fc_dst;
- if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) {
- /* Don't allow translation for a non-ILA address or checksum
- * neutral flag to be set.
+ if (tb[ILA_ATTR_IDENT_TYPE])
+ ident_type = nla_get_u8(tb[ILA_ATTR_IDENT_TYPE]);
+
+ if (ident_type == ILA_ATYPE_USE_FORMAT) {
+ /* Infer identifier type from type field in formatted
+ * identifier.
*/
+
+ if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
+ /* Need to have full locator and at least type field
+ * included in destination
+ */
+ return -EINVAL;
+ }
+
+ eff_ident_type = iaddr->ident.type;
+ } else {
+ eff_ident_type = ident_type;
+ }
+
+ switch (eff_ident_type) {
+ case ILA_ATYPE_IID:
+ /* Don't allow ILA for IID type */
+ return -EINVAL;
+ case ILA_ATYPE_LUID:
+ break;
+ case ILA_ATYPE_VIRT_V4:
+ case ILA_ATYPE_VIRT_UNI_V6:
+ case ILA_ATYPE_VIRT_MULTI_V6:
+ case ILA_ATYPE_NONLOCAL_ADDR:
+ /* These ILA formats are not supported yet. */
+ default:
return -EINVAL;
}
- ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
- if (ret < 0)
- return ret;
+ if (tb[ILA_ATTR_HOOK_TYPE])
+ hook_type = nla_get_u8(tb[ILA_ATTR_HOOK_TYPE]);
+
+ switch (hook_type) {
+ case ILA_HOOK_ROUTE_OUTPUT:
+ lwt_output = true;
+ break;
+ case ILA_HOOK_ROUTE_INPUT:
+ lwt_output = false;
+ break;
+ default:
+ return -EINVAL;
+ }
- if (!tb[ILA_ATTR_LOCATOR])
+ if (tb[ILA_ATTR_CSUM_MODE])
+ csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
+
+ if (csum_mode == ILA_CSUM_NEUTRAL_MAP &&
+ ila_csum_neutral_set(iaddr->ident)) {
+ /* Don't allow translation if checksum neutral bit is
+ * configured and it's set in the SIR address.
+ */
return -EINVAL;
+ }
newts = lwtunnel_state_alloc(sizeof(*ilwt));
if (!newts)
@@ -165,19 +226,18 @@ static int ila_build_state(struct nlattr *nla,
return ret;
}
+ ilwt->lwt_output = !!lwt_output;
+
p = ila_params_lwtunnel(newts);
+ p->csum_mode = csum_mode;
+ p->ident_type = ident_type;
p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
/* Precompute checksum difference for translation since we
* know both the old locator and the new one.
*/
p->locator_match = iaddr->loc;
- p->csum_diff = compute_csum_diff8(
- (__be32 *)&p->locator_match, (__be32 *)&p->locator);
-
- if (tb[ILA_ATTR_CSUM_MODE])
- p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
ila_init_saved_csum(p);
@@ -202,13 +262,23 @@ static int ila_fill_encap_info(struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
{
struct ila_params *p = ila_params_lwtunnel(lwtstate);
+ struct ila_lwt *ilwt = ila_lwt_lwtunnel(lwtstate);
if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64,
ILA_ATTR_PAD))
goto nla_put_failure;
+
if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode))
goto nla_put_failure;
+ if (nla_put_u8(skb, ILA_ATTR_IDENT_TYPE, (__force u8)p->ident_type))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, ILA_ATTR_HOOK_TYPE,
+ ilwt->lwt_output ? ILA_HOOK_ROUTE_OUTPUT :
+ ILA_HOOK_ROUTE_INPUT))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -219,6 +289,8 @@ static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
{
return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */
nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */
+ nla_total_size(sizeof(u8)) + /* ILA_ATTR_IDENT_TYPE */
+ nla_total_size(sizeof(u8)) + /* ILA_ATTR_HOOK_TYPE */
0;
}
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 77f7f8c7d93d..6eb5e68f112a 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/jhash.h>
#include <linux/netfilter.h>
#include <linux/rcupdate.h>
@@ -120,6 +121,7 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
[ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
[ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
+ [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
};
static int parse_nl_config(struct genl_info *info,
@@ -137,6 +139,14 @@ static int parse_nl_config(struct genl_info *info,
if (info->attrs[ILA_ATTR_CSUM_MODE])
xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
+ else
+ xp->ip.csum_mode = ILA_CSUM_NO_ACTION;
+
+ if (info->attrs[ILA_ATTR_IDENT_TYPE])
+ xp->ip.ident_type = nla_get_u8(
+ info->attrs[ILA_ATTR_IDENT_TYPE]);
+ else
+ xp->ip.ident_type = ILA_ATYPE_USE_FORMAT;
if (info->attrs[ILA_ATTR_IFINDEX])
xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
@@ -197,7 +207,7 @@ static void ila_free_cb(void *ptr, void *arg)
}
}
-static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral);
+static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila);
static unsigned int
ila_nf_input(void *priv,
@@ -208,7 +218,7 @@ ila_nf_input(void *priv,
return NF_ACCEPT;
}
-static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = {
+static const struct nf_hook_ops ila_nf_hook_ops[] = {
{
.hook = ila_nf_input,
.pf = NFPROTO_IPV6,
@@ -395,7 +405,8 @@ static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
(__force u64)ila->xp.ip.locator_match.v64,
ILA_ATTR_PAD) ||
nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) ||
- nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode))
+ nla_put_u8(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode) ||
+ nla_put_u8(msg, ILA_ATTR_IDENT_TYPE, ila->xp.ip.ident_type))
return -1;
return 0;
@@ -606,7 +617,7 @@ static struct pernet_operations ila_net_ops = {
.size = sizeof(struct ila_net),
};
-static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
+static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
{
struct ila_map *ila;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -616,16 +627,16 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
/* Assumes skb contains a valid IPv6 header that is pulled */
- if (!ila_addr_is_ila(iaddr)) {
- /* Type indicates this is not an ILA address */
- return 0;
- }
+ /* No check here that ILA type in the mapping matches what is in the
+ * address. We assume that whatever sender gaves us can be translated.
+ * The checksum mode however is relevant.
+ */
rcu_read_lock();
ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan);
if (ila)
- ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral);
+ ila_update_ipv6_locator(skb, &ila->xp.ip, sir2ila);
rcu_read_unlock();
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b13b8f93079d..b01858f5deb1 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -56,7 +56,7 @@ struct sock *__inet6_lookup_established(struct net *net,
const __be16 sport,
const struct in6_addr *daddr,
const u16 hnum,
- const int dif)
+ const int dif, const int sdif)
{
struct sock *sk;
const struct hlist_nulls_node *node;
@@ -73,12 +73,12 @@ begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
+ if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))
continue;
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
- if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
+ if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) {
sock_gen_put(sk);
goto begin;
}
@@ -96,7 +96,7 @@ EXPORT_SYMBOL(__inet6_lookup_established);
static inline int compute_score(struct sock *sk, struct net *net,
const unsigned short hnum,
const struct in6_addr *daddr,
- const int dif, bool exact_dif)
+ const int dif, const int sdif, bool exact_dif)
{
int score = -1;
@@ -110,9 +110,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
score++;
}
if (sk->sk_bound_dev_if || exact_dif) {
- if (sk->sk_bound_dev_if != dif)
+ bool dev_match = (sk->sk_bound_dev_if == dif ||
+ sk->sk_bound_dev_if == sdif);
+
+ if (exact_dif && !dev_match)
return -1;
- score++;
+ if (sk->sk_bound_dev_if && dev_match)
+ score++;
}
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
@@ -126,7 +130,7 @@ struct sock *inet6_lookup_listener(struct net *net,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
const __be16 sport, const struct in6_addr *daddr,
- const unsigned short hnum, const int dif)
+ const unsigned short hnum, const int dif, const int sdif)
{
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -136,7 +140,7 @@ struct sock *inet6_lookup_listener(struct net *net,
u32 phash = 0;
sk_for_each(sk, &ilb->head) {
- score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
+ score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
if (score > hiscore) {
reuseport = sk->sk_reuseport;
if (reuseport) {
@@ -171,7 +175,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
bool refcounted;
sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
- ntohs(dport), dif, &refcounted);
+ ntohs(dport), dif, 0, &refcounted);
if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
return sk;
@@ -187,8 +191,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
const struct in6_addr *saddr = &sk->sk_v6_daddr;
const int dif = sk->sk_bound_dev_if;
- const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
struct net *net = sock_net(sk);
+ const int sdif = l3mdev_master_ifindex_by_index(net, dif);
+ const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
inet->inet_dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
@@ -203,7 +208,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
if (sk2->sk_hash != hash)
continue;
- if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
+ if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports,
+ dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
if (twsk_unique(sk, sk2, twp))
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index c0cbcb259f5a..ec43d18b5ff9 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <net/ip.h>
#include <net/udp.h>
#include <net/udplite.h>
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ebb299cf72b7..2e2804f5823e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -33,18 +33,11 @@
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/lwtunnel.h>
+#include <net/fib_notifier.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
-#define RT6_DEBUG 2
-
-#if RT6_DEBUG >= 3
-#define RT6_TRACE(x...) pr_debug(x)
-#else
-#define RT6_TRACE(x...) do { ; } while (0)
-#endif
-
static struct kmem_cache *fib6_node_kmem __read_mostly;
struct fib6_cleaner {
@@ -61,9 +54,12 @@ struct fib6_cleaner {
#define FWS_INIT FWS_L
#endif
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
-static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
-static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
+static struct rt6_info *fib6_find_prefix(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *fn);
+static struct fib6_node *fib6_repair_tree(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *fn);
static int fib6_walk(struct net *net, struct fib6_walker *w);
static int fib6_walk_continue(struct fib6_walker *w);
@@ -109,6 +105,20 @@ enum {
FIB6_NO_SERNUM_CHANGE = 0,
};
+void fib6_update_sernum(struct rt6_info *rt)
+{
+ struct fib6_table *table = rt->rt6i_table;
+ struct net *net = dev_net(rt->dst.dev);
+ struct fib6_node *fn;
+
+ spin_lock_bh(&table->tb6_lock);
+ fn = rcu_dereference_protected(rt->rt6i_node,
+ lockdep_is_held(&table->tb6_lock));
+ if (fn)
+ fn->fn_sernum = fib6_new_sernum(net);
+ spin_unlock_bh(&table->tb6_lock);
+}
+
/*
* Auxiliary address test functions for the radix tree.
*
@@ -139,21 +149,37 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
addr[fn_bit >> 5];
}
-static struct fib6_node *node_alloc(void)
+static struct fib6_node *node_alloc(struct net *net)
{
struct fib6_node *fn;
fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
+ if (fn)
+ net->ipv6.rt6_stats->fib_nodes++;
return fn;
}
-static void node_free(struct fib6_node *fn)
+static void node_free_immediate(struct net *net, struct fib6_node *fn)
+{
+ kmem_cache_free(fib6_node_kmem, fn);
+ net->ipv6.rt6_stats->fib_nodes--;
+}
+
+static void node_free_rcu(struct rcu_head *head)
{
+ struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
+
kmem_cache_free(fib6_node_kmem, fn);
}
-static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+static void node_free(struct net *net, struct fib6_node *fn)
+{
+ call_rcu(&fn->rcu, node_free_rcu);
+ net->ipv6.rt6_stats->fib_nodes--;
+}
+
+void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
{
int cpu;
@@ -172,18 +198,13 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
*ppcpu_rt = NULL;
}
}
-
- free_percpu(non_pcpu_rt->rt6i_pcpu);
- non_pcpu_rt->rt6i_pcpu = NULL;
}
+EXPORT_SYMBOL_GPL(rt6_free_pcpu);
-static void rt6_release(struct rt6_info *rt)
+static void fib6_free_table(struct fib6_table *table)
{
- if (atomic_dec_and_test(&rt->rt6i_ref)) {
- rt6_free_pcpu(rt);
- dst_dev_put(&rt->dst);
- dst_release(&rt->dst);
- }
+ inetpeer_invalidate_tree(&table->tb6_peers);
+ kfree(table);
}
static void fib6_link_table(struct net *net, struct fib6_table *tb)
@@ -194,8 +215,7 @@ static void fib6_link_table(struct net *net, struct fib6_table *tb)
* Initialize table lock at a single place to give lockdep a key,
* tables aren't visible prior to being linked to the list.
*/
- rwlock_init(&tb->tb6_lock);
-
+ spin_lock_init(&tb->tb6_lock);
h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
/*
@@ -214,7 +234,8 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
table = kzalloc(sizeof(*table), GFP_ATOMIC);
if (table) {
table->tb6_id = id;
- table->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ rcu_assign_pointer(table->tb6_root.leaf,
+ net->ipv6.ip6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
}
@@ -302,12 +323,114 @@ static void __net_init fib6_tables_init(struct net *net)
#endif
+unsigned int fib6_tables_seq_read(struct net *net)
+{
+ unsigned int h, fib_seq = 0;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ struct fib6_table *tb;
+
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist)
+ fib_seq += tb->fib_seq;
+ }
+ rcu_read_unlock();
+
+ return fib_seq;
+}
+
+static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct rt6_info *rt)
+{
+ struct fib6_entry_notifier_info info = {
+ .rt = rt,
+ };
+
+ return call_fib6_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_fib6_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct rt6_info *rt,
+ struct netlink_ext_ack *extack)
+{
+ struct fib6_entry_notifier_info info = {
+ .info.extack = extack,
+ .rt = rt,
+ };
+
+ rt->rt6i_table->fib_seq++;
+ return call_fib6_notifiers(net, event_type, &info.info);
+}
+
+struct fib6_dump_arg {
+ struct net *net;
+ struct notifier_block *nb;
+};
+
+static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
+{
+ if (rt == arg->net->ipv6.ip6_null_entry)
+ return;
+ call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
+}
+
+static int fib6_node_dump(struct fib6_walker *w)
+{
+ struct rt6_info *rt;
+
+ for_each_fib6_walker_rt(w)
+ fib6_rt_dump(rt, w->args);
+ w->leaf = NULL;
+ return 0;
+}
+
+static void fib6_table_dump(struct net *net, struct fib6_table *tb,
+ struct fib6_walker *w)
+{
+ w->root = &tb->tb6_root;
+ spin_lock_bh(&tb->tb6_lock);
+ fib6_walk(net, w);
+ spin_unlock_bh(&tb->tb6_lock);
+}
+
+/* Called with rcu_read_lock() */
+int fib6_tables_dump(struct net *net, struct notifier_block *nb)
+{
+ struct fib6_dump_arg arg;
+ struct fib6_walker *w;
+ unsigned int h;
+
+ w = kzalloc(sizeof(*w), GFP_ATOMIC);
+ if (!w)
+ return -ENOMEM;
+
+ w->func = fib6_node_dump;
+ arg.net = net;
+ arg.nb = nb;
+ w->args = &arg;
+
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ struct fib6_table *tb;
+
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist)
+ fib6_table_dump(net, tb, w);
+ }
+
+ kfree(w);
+
+ return 0;
+}
+
static int fib6_dump_node(struct fib6_walker *w)
{
int res;
struct rt6_info *rt;
- for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_walker_rt(w) {
res = rt6_dump_route(rt, w->args);
if (res < 0) {
/* Frame is full, suspend walking */
@@ -366,9 +489,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
w->count = 0;
w->skip = 0;
- read_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
res = fib6_walk(net, w);
- read_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
if (res > 0) {
cb->args[4] = 1;
cb->args[5] = w->root->fn_sernum;
@@ -383,9 +506,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
} else
w->skip = 0;
- read_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
res = fib6_walk_continue(w);
- read_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
if (res <= 0) {
fib6_walker_unlink(net, w);
cb->args[4] = 0;
@@ -466,11 +589,13 @@ out:
* node.
*/
-static struct fib6_node *fib6_add_1(struct fib6_node *root,
- struct in6_addr *addr, int plen,
- int offset, int allow_create,
- int replace_required, int sernum,
- struct netlink_ext_ack *extack)
+static struct fib6_node *fib6_add_1(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *root,
+ struct in6_addr *addr, int plen,
+ int offset, int allow_create,
+ int replace_required,
+ struct netlink_ext_ack *extack)
{
struct fib6_node *fn, *in, *ln;
struct fib6_node *pn = NULL;
@@ -485,7 +610,9 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
fn = root;
do {
- key = (struct rt6key *)((u8 *)fn->leaf + offset);
+ struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ key = (struct rt6key *)((u8 *)leaf + offset);
/*
* Prefix match
@@ -511,12 +638,10 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
if (plen == fn->fn_bit) {
/* clean up an intermediate node */
if (!(fn->fn_flags & RTN_RTINFO)) {
- rt6_release(fn->leaf);
- fn->leaf = NULL;
+ RCU_INIT_POINTER(fn->leaf, NULL);
+ rt6_release(leaf);
}
- fn->fn_sernum = sernum;
-
return fn;
}
@@ -525,10 +650,13 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
*/
/* Try to walk down on tree. */
- fn->fn_sernum = sernum;
dir = addr_bit_set(addr, fn->fn_bit);
pn = fn;
- fn = dir ? fn->right : fn->left;
+ fn = dir ?
+ rcu_dereference_protected(fn->right,
+ lockdep_is_held(&table->tb6_lock)) :
+ rcu_dereference_protected(fn->left,
+ lockdep_is_held(&table->tb6_lock));
} while (fn);
if (!allow_create) {
@@ -554,19 +682,17 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
* Create new leaf node without children.
*/
- ln = node_alloc();
+ ln = node_alloc(net);
if (!ln)
return ERR_PTR(-ENOMEM);
ln->fn_bit = plen;
-
- ln->parent = pn;
- ln->fn_sernum = sernum;
+ RCU_INIT_POINTER(ln->parent, pn);
if (dir)
- pn->right = ln;
+ rcu_assign_pointer(pn->right, ln);
else
- pn->left = ln;
+ rcu_assign_pointer(pn->left, ln);
return ln;
@@ -580,7 +706,8 @@ insert_above:
* and the current
*/
- pn = fn->parent;
+ pn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&table->tb6_lock));
/* find 1st bit in difference between the 2 addrs.
@@ -596,14 +723,14 @@ insert_above:
* (new leaf node)[ln] (old node)[fn]
*/
if (plen > bit) {
- in = node_alloc();
- ln = node_alloc();
+ in = node_alloc(net);
+ ln = node_alloc(net);
if (!in || !ln) {
if (in)
- node_free(in);
+ node_free_immediate(net, in);
if (ln)
- node_free(ln);
+ node_free_immediate(net, ln);
return ERR_PTR(-ENOMEM);
}
@@ -617,31 +744,28 @@ insert_above:
in->fn_bit = bit;
- in->parent = pn;
+ RCU_INIT_POINTER(in->parent, pn);
in->leaf = fn->leaf;
- atomic_inc(&in->leaf->rt6i_ref);
-
- in->fn_sernum = sernum;
+ atomic_inc(&rcu_dereference_protected(in->leaf,
+ lockdep_is_held(&table->tb6_lock))->rt6i_ref);
/* update parent pointer */
if (dir)
- pn->right = in;
+ rcu_assign_pointer(pn->right, in);
else
- pn->left = in;
+ rcu_assign_pointer(pn->left, in);
ln->fn_bit = plen;
- ln->parent = in;
- fn->parent = in;
-
- ln->fn_sernum = sernum;
+ RCU_INIT_POINTER(ln->parent, in);
+ rcu_assign_pointer(fn->parent, in);
if (addr_bit_set(addr, bit)) {
- in->right = ln;
- in->left = fn;
+ rcu_assign_pointer(in->right, ln);
+ rcu_assign_pointer(in->left, fn);
} else {
- in->left = ln;
- in->right = fn;
+ rcu_assign_pointer(in->left, ln);
+ rcu_assign_pointer(in->right, fn);
}
} else { /* plen <= bit */
@@ -651,28 +775,26 @@ insert_above:
* (old node)[fn] NULL
*/
- ln = node_alloc();
+ ln = node_alloc(net);
if (!ln)
return ERR_PTR(-ENOMEM);
ln->fn_bit = plen;
- ln->parent = pn;
-
- ln->fn_sernum = sernum;
-
- if (dir)
- pn->right = ln;
- else
- pn->left = ln;
+ RCU_INIT_POINTER(ln->parent, pn);
if (addr_bit_set(&key->addr, plen))
- ln->right = fn;
+ RCU_INIT_POINTER(ln->right, fn);
else
- ln->left = fn;
+ RCU_INIT_POINTER(ln->left, fn);
+
+ rcu_assign_pointer(fn->parent, ln);
- fn->parent = ln;
+ if (dir)
+ rcu_assign_pointer(pn->right, ln);
+ else
+ rcu_assign_pointer(pn->left, ln);
}
return ln;
}
@@ -718,6 +840,8 @@ static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc)
static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
struct net *net)
{
+ struct fib6_table *table = rt->rt6i_table;
+
if (atomic_read(&rt->rt6i_ref) != 1) {
/* This route is used as dummy address holder in some split
* nodes. It is not leaked, but it still holds other resources,
@@ -726,15 +850,18 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
* to still alive ones.
*/
while (fn) {
- if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) {
- fn->leaf = fib6_find_prefix(net, fn);
- atomic_inc(&fn->leaf->rt6i_ref);
+ struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *new_leaf;
+ if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
+ new_leaf = fib6_find_prefix(net, table, fn);
+ atomic_inc(&new_leaf->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, new_leaf);
rt6_release(rt);
}
- fn = fn->parent;
+ fn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&table->tb6_lock));
}
- /* No more references are possible at this point. */
- BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
}
}
@@ -743,11 +870,14 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
*/
static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
- struct nl_info *info, struct mx6_config *mxc)
+ struct nl_info *info, struct mx6_config *mxc,
+ struct netlink_ext_ack *extack)
{
+ struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
struct rt6_info *iter = NULL;
- struct rt6_info **ins;
- struct rt6_info **fallback_ins = NULL;
+ struct rt6_info __rcu **ins;
+ struct rt6_info __rcu **fallback_ins = NULL;
int replace = (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_REPLACE));
int add = (!info->nlh ||
@@ -762,7 +892,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
ins = &fn->leaf;
- for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) {
+ for (iter = leaf; iter;
+ iter = rcu_dereference_protected(iter->dst.rt6_next,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
/*
* Search for duplicates
*/
@@ -824,7 +956,8 @@ next_iter:
if (fallback_ins && !found) {
/* No ECMP-able route found, replace first non-ECMP one */
ins = fallback_ins;
- iter = *ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
found++;
}
@@ -838,7 +971,7 @@ next_iter:
struct rt6_info *sibling, *temp_sibling;
/* Find the first route that have the same metric */
- sibling = fn->leaf;
+ sibling = leaf;
while (sibling) {
if (sibling->rt6i_metric == rt->rt6i_metric &&
rt6_qualify_for_ecmp(sibling)) {
@@ -846,7 +979,8 @@ next_iter:
&sibling->rt6i_siblings);
break;
}
- sibling = sibling->dst.rt6_next;
+ sibling = rcu_dereference_protected(sibling->dst.rt6_next,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
}
/* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings
@@ -875,10 +1009,12 @@ add:
if (err)
return err;
- rt->dst.rt6_next = iter;
- *ins = rt;
- rt->rt6i_node = fn;
+ rcu_assign_pointer(rt->dst.rt6_next, iter);
atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(rt->rt6i_node, fn);
+ rcu_assign_pointer(*ins, rt);
+ call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
+ rt, extack);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -902,10 +1038,12 @@ add:
if (err)
return err;
- *ins = rt;
- rt->rt6i_node = fn;
- rt->dst.rt6_next = iter->dst.rt6_next;
atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(rt->rt6i_node, fn);
+ rt->dst.rt6_next = iter->dst.rt6_next;
+ rcu_assign_pointer(*ins, rt);
+ call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
+ rt, extack);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
@@ -913,25 +1051,34 @@ add:
fn->fn_flags |= RTN_RTINFO;
}
nsiblings = iter->rt6i_nsiblings;
+ iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
+ fn->rr_ptr = NULL;
rt6_release(iter);
if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
ins = &rt->dst.rt6_next;
- iter = *ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
while (iter) {
if (iter->rt6i_metric > rt->rt6i_metric)
break;
if (rt6_qualify_for_ecmp(iter)) {
*ins = iter->dst.rt6_next;
+ iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
+ fn->rr_ptr = NULL;
rt6_release(iter);
nsiblings--;
+ info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
} else {
ins = &iter->dst.rt6_next;
}
- iter = *ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
}
WARN_ON(nsiblings != 0);
}
@@ -955,16 +1102,33 @@ void fib6_force_start_gc(struct net *net)
jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
}
+static void fib6_update_sernum_upto_root(struct rt6_info *rt,
+ int sernum)
+{
+ struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+
+ /* paired with smp_rmb() in rt6_get_cookie_safe() */
+ smp_wmb();
+ while (fn) {
+ fn->fn_sernum = sernum;
+ fn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ }
+}
+
/*
* Add routing information to the routing tree.
* <destination addr>/<source addr>
* with source addr info in sub-trees
+ * Need to own table->tb6_lock
*/
int fib6_add(struct fib6_node *root, struct rt6_info *rt,
struct nl_info *info, struct mx6_config *mxc,
struct netlink_ext_ack *extack)
{
+ struct fib6_table *table = rt->rt6i_table;
struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
int allow_create = 1;
@@ -973,6 +1137,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
return -EINVAL;
+ if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
+ return -EINVAL;
if (info->nlh) {
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -983,9 +1149,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (!allow_create && !replace_required)
pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
- fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
+ fn = fib6_add_1(info->nl_net, table, root,
+ &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
offsetof(struct rt6_info, rt6i_dst), allow_create,
- replace_required, sernum, extack);
+ replace_required, extack);
if (IS_ERR(fn)) {
err = PTR_ERR(fn);
fn = NULL;
@@ -998,7 +1165,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (rt->rt6i_src.plen) {
struct fib6_node *sn;
- if (!fn->subtree) {
+ if (!rcu_access_pointer(fn->subtree)) {
struct fib6_node *sfn;
/*
@@ -1012,62 +1179,59 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
*/
/* Create subtree root node */
- sfn = node_alloc();
+ sfn = node_alloc(info->nl_net);
if (!sfn)
- goto st_failure;
+ goto failure;
- sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
+ rcu_assign_pointer(sfn->leaf,
+ info->nl_net->ipv6.ip6_null_entry);
sfn->fn_flags = RTN_ROOT;
- sfn->fn_sernum = sernum;
/* Now add the first leaf node to new subtree */
- sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
- rt->rt6i_src.plen,
+ sn = fib6_add_1(info->nl_net, table, sfn,
+ &rt->rt6i_src.addr, rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
- allow_create, replace_required, sernum,
- extack);
+ allow_create, replace_required, extack);
if (IS_ERR(sn)) {
/* If it is failed, discard just allocated
- root, and then (in st_failure) stale node
+ root, and then (in failure) stale node
in main tree.
*/
- node_free(sfn);
+ node_free_immediate(info->nl_net, sfn);
err = PTR_ERR(sn);
- goto st_failure;
+ goto failure;
}
/* Now link new subtree to main tree */
- sfn->parent = fn;
- fn->subtree = sfn;
+ rcu_assign_pointer(sfn->parent, fn);
+ rcu_assign_pointer(fn->subtree, sfn);
} else {
- sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
- rt->rt6i_src.plen,
+ sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
+ &rt->rt6i_src.addr, rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
- allow_create, replace_required, sernum,
- extack);
+ allow_create, replace_required, extack);
if (IS_ERR(sn)) {
err = PTR_ERR(sn);
- goto st_failure;
+ goto failure;
}
}
- if (!fn->leaf) {
- fn->leaf = rt;
+ if (!rcu_access_pointer(fn->leaf)) {
atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, rt);
}
fn = sn;
}
#endif
- err = fib6_add_rt2node(fn, rt, info, mxc);
+ err = fib6_add_rt2node(fn, rt, info, mxc, extack);
if (!err) {
+ fib6_update_sernum_upto_root(rt, sernum);
fib6_start_gc(info->nl_net, rt);
- if (!(rt->rt6i_flags & RTF_CACHE))
- fib6_prune_clones(info->nl_net, pn);
}
out:
@@ -1077,41 +1241,43 @@ out:
* If fib6_add_1 has cleared the old leaf pointer in the
* super-tree leaf node we have to find a new one for it.
*/
- if (pn != fn && pn->leaf == rt) {
- pn->leaf = NULL;
+ struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (pn != fn && pn_leaf == rt) {
+ pn_leaf = NULL;
+ RCU_INIT_POINTER(pn->leaf, NULL);
atomic_dec(&rt->rt6i_ref);
}
- if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) {
- pn->leaf = fib6_find_prefix(info->nl_net, pn);
+ if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
+ pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
#if RT6_DEBUG >= 2
- if (!pn->leaf) {
- WARN_ON(pn->leaf == NULL);
- pn->leaf = info->nl_net->ipv6.ip6_null_entry;
+ if (!pn_leaf) {
+ WARN_ON(!pn_leaf);
+ pn_leaf = info->nl_net->ipv6.ip6_null_entry;
}
#endif
- atomic_inc(&pn->leaf->rt6i_ref);
+ atomic_inc(&pn_leaf->rt6i_ref);
+ rcu_assign_pointer(pn->leaf, pn_leaf);
}
#endif
- /* Always release dst as dst->__refcnt is guaranteed
- * to be taken before entering this function
- */
- dst_release_immediate(&rt->dst);
+ goto failure;
}
return err;
-#ifdef CONFIG_IPV6_SUBTREES
- /* Subtree creation failed, probably main tree node
- is orphan. If it is, shoot it.
+failure:
+ /* fn->leaf could be NULL if fn is an intermediate node and we
+ * failed to add the new route to it in both subtree creation
+ * failure and fib6_add_rt2node() failure case.
+ * In both cases, fib6_repair_tree() should be called to fix
+ * fn->leaf.
*/
-st_failure:
if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
- fib6_repair_tree(info->nl_net, fn);
+ fib6_repair_tree(info->nl_net, table, fn);
/* Always release dst as dst->__refcnt is guaranteed
* to be taken before entering this function
*/
dst_release_immediate(&rt->dst);
return err;
-#endif
}
/*
@@ -1144,7 +1310,8 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
dir = addr_bit_set(args->addr, fn->fn_bit);
- next = dir ? fn->right : fn->left;
+ next = dir ? rcu_dereference(fn->right) :
+ rcu_dereference(fn->left);
if (next) {
fn = next;
@@ -1154,18 +1321,22 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
}
while (fn) {
- if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
+ struct fib6_node *subtree = FIB6_SUBTREE(fn);
+
+ if (subtree || fn->fn_flags & RTN_RTINFO) {
+ struct rt6_info *leaf = rcu_dereference(fn->leaf);
struct rt6key *key;
- key = (struct rt6key *) ((u8 *) fn->leaf +
- args->offset);
+ if (!leaf)
+ goto backtrack;
+
+ key = (struct rt6key *) ((u8 *)leaf + args->offset);
if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
#ifdef CONFIG_IPV6_SUBTREES
- if (fn->subtree) {
+ if (subtree) {
struct fib6_node *sfn;
- sfn = fib6_lookup_1(fn->subtree,
- args + 1);
+ sfn = fib6_lookup_1(subtree, args + 1);
if (!sfn)
goto backtrack;
fn = sfn;
@@ -1175,18 +1346,18 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
return fn;
}
}
-#ifdef CONFIG_IPV6_SUBTREES
backtrack:
-#endif
if (fn->fn_flags & RTN_ROOT)
break;
- fn = fn->parent;
+ fn = rcu_dereference(fn->parent);
}
return NULL;
}
+/* called with rcu_read_lock() held
+ */
struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
@@ -1217,54 +1388,87 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad
/*
* Get node with specified destination prefix (and source prefix,
* if subtrees are used)
+ * exact_match == true means we try to find fn with exact match of
+ * the passed in prefix addr
+ * exact_match == false means we try to find fn with longest prefix
+ * match of the passed in prefix addr. This is useful for finding fn
+ * for cached route as it will be stored in the exception table under
+ * the node with longest prefix length.
*/
static struct fib6_node *fib6_locate_1(struct fib6_node *root,
const struct in6_addr *addr,
- int plen, int offset)
+ int plen, int offset,
+ bool exact_match)
{
- struct fib6_node *fn;
+ struct fib6_node *fn, *prev = NULL;
for (fn = root; fn ; ) {
- struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
+ struct rt6_info *leaf = rcu_dereference(fn->leaf);
+ struct rt6key *key;
+
+ /* This node is being deleted */
+ if (!leaf) {
+ if (plen <= fn->fn_bit)
+ goto out;
+ else
+ goto next;
+ }
+
+ key = (struct rt6key *)((u8 *)leaf + offset);
/*
* Prefix match
*/
if (plen < fn->fn_bit ||
!ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
- return NULL;
+ goto out;
if (plen == fn->fn_bit)
return fn;
+ prev = fn;
+
+next:
/*
* We have more bits to go
*/
if (addr_bit_set(addr, fn->fn_bit))
- fn = fn->right;
+ fn = rcu_dereference(fn->right);
else
- fn = fn->left;
+ fn = rcu_dereference(fn->left);
}
- return NULL;
+out:
+ if (exact_match)
+ return NULL;
+ else
+ return prev;
}
struct fib6_node *fib6_locate(struct fib6_node *root,
const struct in6_addr *daddr, int dst_len,
- const struct in6_addr *saddr, int src_len)
+ const struct in6_addr *saddr, int src_len,
+ bool exact_match)
{
struct fib6_node *fn;
fn = fib6_locate_1(root, daddr, dst_len,
- offsetof(struct rt6_info, rt6i_dst));
+ offsetof(struct rt6_info, rt6i_dst),
+ exact_match);
#ifdef CONFIG_IPV6_SUBTREES
if (src_len) {
WARN_ON(saddr == NULL);
- if (fn && fn->subtree)
- fn = fib6_locate_1(fn->subtree, saddr, src_len,
- offsetof(struct rt6_info, rt6i_src));
+ if (fn) {
+ struct fib6_node *subtree = FIB6_SUBTREE(fn);
+
+ if (subtree) {
+ fn = fib6_locate_1(subtree, saddr, src_len,
+ offsetof(struct rt6_info, rt6i_src),
+ exact_match);
+ }
+ }
}
#endif
@@ -1280,16 +1484,26 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
*
*/
-static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
+static struct rt6_info *fib6_find_prefix(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *fn)
{
+ struct fib6_node *child_left, *child_right;
+
if (fn->fn_flags & RTN_ROOT)
return net->ipv6.ip6_null_entry;
while (fn) {
- if (fn->left)
- return fn->left->leaf;
- if (fn->right)
- return fn->right->leaf;
+ child_left = rcu_dereference_protected(fn->left,
+ lockdep_is_held(&table->tb6_lock));
+ child_right = rcu_dereference_protected(fn->right,
+ lockdep_is_held(&table->tb6_lock));
+ if (child_left)
+ return rcu_dereference_protected(child_left->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (child_right)
+ return rcu_dereference_protected(child_right->leaf,
+ lockdep_is_held(&table->tb6_lock));
fn = FIB6_SUBTREE(fn);
}
@@ -1299,31 +1513,49 @@ static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
/*
* Called to trim the tree of intermediate nodes when possible. "fn"
* is the node we want to try and remove.
+ * Need to own table->tb6_lock
*/
static struct fib6_node *fib6_repair_tree(struct net *net,
- struct fib6_node *fn)
+ struct fib6_table *table,
+ struct fib6_node *fn)
{
int children;
int nstate;
- struct fib6_node *child, *pn;
+ struct fib6_node *child;
struct fib6_walker *w;
int iter = 0;
for (;;) {
+ struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *fn_l = rcu_dereference_protected(fn->left,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *pn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *pn_r = rcu_dereference_protected(pn->right,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *new_fn_leaf;
+
RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
iter++;
WARN_ON(fn->fn_flags & RTN_RTINFO);
WARN_ON(fn->fn_flags & RTN_TL_ROOT);
- WARN_ON(fn->leaf);
+ WARN_ON(fn_leaf);
children = 0;
child = NULL;
- if (fn->right)
- child = fn->right, children |= 1;
- if (fn->left)
- child = fn->left, children |= 2;
+ if (fn_r)
+ child = fn_r, children |= 1;
+ if (fn_l)
+ child = fn_l, children |= 2;
if (children == 3 || FIB6_SUBTREE(fn)
#ifdef CONFIG_IPV6_SUBTREES
@@ -1331,36 +1563,36 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
|| (children && fn->fn_flags & RTN_ROOT)
#endif
) {
- fn->leaf = fib6_find_prefix(net, fn);
+ new_fn_leaf = fib6_find_prefix(net, table, fn);
#if RT6_DEBUG >= 2
- if (!fn->leaf) {
- WARN_ON(!fn->leaf);
- fn->leaf = net->ipv6.ip6_null_entry;
+ if (!new_fn_leaf) {
+ WARN_ON(!new_fn_leaf);
+ new_fn_leaf = net->ipv6.ip6_null_entry;
}
#endif
- atomic_inc(&fn->leaf->rt6i_ref);
- return fn->parent;
+ atomic_inc(&new_fn_leaf->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, new_fn_leaf);
+ return pn;
}
- pn = fn->parent;
#ifdef CONFIG_IPV6_SUBTREES
if (FIB6_SUBTREE(pn) == fn) {
WARN_ON(!(fn->fn_flags & RTN_ROOT));
- FIB6_SUBTREE(pn) = NULL;
+ RCU_INIT_POINTER(pn->subtree, NULL);
nstate = FWS_L;
} else {
WARN_ON(fn->fn_flags & RTN_ROOT);
#endif
- if (pn->right == fn)
- pn->right = child;
- else if (pn->left == fn)
- pn->left = child;
+ if (pn_r == fn)
+ rcu_assign_pointer(pn->right, child);
+ else if (pn_l == fn)
+ rcu_assign_pointer(pn->left, child);
#if RT6_DEBUG >= 2
else
WARN_ON(1);
#endif
if (child)
- child->parent = pn;
+ rcu_assign_pointer(child->parent, pn);
nstate = FWS_R;
#ifdef CONFIG_IPV6_SUBTREES
}
@@ -1369,19 +1601,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
read_lock(&net->ipv6.fib6_walker_lock);
FOR_WALKERS(net, w) {
if (!child) {
- if (w->root == fn) {
- w->root = w->node = NULL;
- RT6_TRACE("W %p adjusted by delroot 1\n", w);
- } else if (w->node == fn) {
+ if (w->node == fn) {
RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
w->node = pn;
w->state = nstate;
}
} else {
- if (w->root == fn) {
- w->root = child;
- RT6_TRACE("W %p adjusted by delroot 2\n", w);
- }
if (w->node == fn) {
w->node = child;
if (children&2) {
@@ -1396,33 +1621,39 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
}
read_unlock(&net->ipv6.fib6_walker_lock);
- node_free(fn);
+ node_free(net, fn);
if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
return pn;
- rt6_release(pn->leaf);
- pn->leaf = NULL;
+ RCU_INIT_POINTER(pn->leaf, NULL);
+ rt6_release(pn_leaf);
fn = pn;
}
}
-static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
- struct nl_info *info)
+static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
+ struct rt6_info __rcu **rtp, struct nl_info *info)
{
struct fib6_walker *w;
- struct rt6_info *rt = *rtp;
+ struct rt6_info *rt = rcu_dereference_protected(*rtp,
+ lockdep_is_held(&table->tb6_lock));
struct net *net = info->nl_net;
RT6_TRACE("fib6_del_route\n");
+ WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
+
/* Unlink it */
*rtp = rt->dst.rt6_next;
rt->rt6i_node = NULL;
net->ipv6.rt6_stats->fib_rt_entries--;
net->ipv6.rt6_stats->fib_discarded_routes++;
+ /* Flush all cached dst in exception table */
+ rt6_flush_exceptions(rt);
+
/* Reset round-robin state, if necessary */
- if (fn->rr_ptr == rt)
+ if (rcu_access_pointer(fn->rr_ptr) == rt)
fn->rr_ptr = NULL;
/* Remove this entry from other siblings */
@@ -1441,34 +1672,38 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
FOR_WALKERS(net, w) {
if (w->state == FWS_C && w->leaf == rt) {
RT6_TRACE("walker %p adjusted by delroute\n", w);
- w->leaf = rt->dst.rt6_next;
+ w->leaf = rcu_dereference_protected(rt->dst.rt6_next,
+ lockdep_is_held(&table->tb6_lock));
if (!w->leaf)
w->state = FWS_U;
}
}
read_unlock(&net->ipv6.fib6_walker_lock);
- rt->dst.rt6_next = NULL;
-
/* If it was last route, expunge its radix tree node */
- if (!fn->leaf) {
+ if (!rcu_access_pointer(fn->leaf)) {
fn->fn_flags &= ~RTN_RTINFO;
net->ipv6.rt6_stats->fib_route_nodes--;
- fn = fib6_repair_tree(net, fn);
+ fn = fib6_repair_tree(net, table, fn);
}
fib6_purge_rt(rt, fn, net);
+ call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
rt6_release(rt);
}
+/* Need to own table->tb6_lock */
int fib6_del(struct rt6_info *rt, struct nl_info *info)
{
+ struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ struct fib6_table *table = rt->rt6i_table;
struct net *net = info->nl_net;
- struct fib6_node *fn = rt->rt6i_node;
- struct rt6_info **rtp;
+ struct rt6_info __rcu **rtp;
+ struct rt6_info __rcu **rtp_next;
#if RT6_DEBUG >= 2
if (rt->dst.obsolete > 0) {
@@ -1481,28 +1716,22 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
WARN_ON(!(fn->fn_flags & RTN_RTINFO));
- if (!(rt->rt6i_flags & RTF_CACHE)) {
- struct fib6_node *pn = fn;
-#ifdef CONFIG_IPV6_SUBTREES
- /* clones of this route might be in another subtree */
- if (rt->rt6i_src.plen) {
- while (!(pn->fn_flags & RTN_ROOT))
- pn = pn->parent;
- pn = pn->parent;
- }
-#endif
- fib6_prune_clones(info->nl_net, pn);
- }
+ /* remove cached dst from exception table */
+ if (rt->rt6i_flags & RTF_CACHE)
+ return rt6_remove_exception_rt(rt);
/*
* Walk the leaf entries looking for ourself
*/
- for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) {
- if (*rtp == rt) {
- fib6_del_route(fn, rtp, info);
+ for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
+ struct rt6_info *cur = rcu_dereference_protected(*rtp,
+ lockdep_is_held(&table->tb6_lock));
+ if (rt == cur) {
+ fib6_del_route(table, fn, rtp, info);
return 0;
}
+ rtp_next = &cur->dst.rt6_next;
}
return -ENOENT;
}
@@ -1529,22 +1758,22 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
* 0 -> walk is complete.
* >0 -> walk is incomplete (i.e. suspended)
* <0 -> walk is terminated by an error.
+ *
+ * This function is called with tb6_lock held.
*/
static int fib6_walk_continue(struct fib6_walker *w)
{
- struct fib6_node *fn, *pn;
+ struct fib6_node *fn, *pn, *left, *right;
+
+ /* w->root should always be table->tb6_root */
+ WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
for (;;) {
fn = w->node;
if (!fn)
return 0;
- if (w->prune && fn != w->root &&
- fn->fn_flags & RTN_RTINFO && w->state < FWS_C) {
- w->state = FWS_C;
- w->leaf = fn->leaf;
- }
switch (w->state) {
#ifdef CONFIG_IPV6_SUBTREES
case FWS_S:
@@ -1554,21 +1783,26 @@ static int fib6_walk_continue(struct fib6_walker *w)
}
w->state = FWS_L;
#endif
+ /* fall through */
case FWS_L:
- if (fn->left) {
- w->node = fn->left;
+ left = rcu_dereference_protected(fn->left, 1);
+ if (left) {
+ w->node = left;
w->state = FWS_INIT;
continue;
}
w->state = FWS_R;
+ /* fall through */
case FWS_R:
- if (fn->right) {
- w->node = fn->right;
+ right = rcu_dereference_protected(fn->right, 1);
+ if (right) {
+ w->node = right;
w->state = FWS_INIT;
continue;
}
w->state = FWS_C;
- w->leaf = fn->leaf;
+ w->leaf = rcu_dereference_protected(fn->leaf, 1);
+ /* fall through */
case FWS_C:
if (w->leaf && fn->fn_flags & RTN_RTINFO) {
int err;
@@ -1587,10 +1821,13 @@ static int fib6_walk_continue(struct fib6_walker *w)
}
skip:
w->state = FWS_U;
+ /* fall through */
case FWS_U:
if (fn == w->root)
return 0;
- pn = fn->parent;
+ pn = rcu_dereference_protected(fn->parent, 1);
+ left = rcu_dereference_protected(pn->left, 1);
+ right = rcu_dereference_protected(pn->right, 1);
w->node = pn;
#ifdef CONFIG_IPV6_SUBTREES
if (FIB6_SUBTREE(pn) == fn) {
@@ -1599,13 +1836,13 @@ skip:
continue;
}
#endif
- if (pn->left == fn) {
+ if (left == fn) {
w->state = FWS_R;
continue;
}
- if (pn->right == fn) {
+ if (right == fn) {
w->state = FWS_C;
- w->leaf = w->node->leaf;
+ w->leaf = rcu_dereference_protected(w->node->leaf, 1);
continue;
}
#if RT6_DEBUG >= 2
@@ -1648,7 +1885,7 @@ static int fib6_clean_node(struct fib6_walker *w)
return 0;
}
- for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_walker_rt(w) {
res = c->func(rt, c->arg);
if (res < 0) {
w->leaf = rt;
@@ -1656,7 +1893,9 @@ static int fib6_clean_node(struct fib6_walker *w)
if (res) {
#if RT6_DEBUG >= 2
pr_debug("%s: del failed: rt=%p@%p err=%d\n",
- __func__, rt, rt->rt6i_node, res);
+ __func__, rt,
+ rcu_access_pointer(rt->rt6i_node),
+ res);
#endif
continue;
}
@@ -1674,20 +1913,16 @@ static int fib6_clean_node(struct fib6_walker *w)
* func is called on each route.
* It may return -1 -> delete this route.
* 0 -> continue walking
- *
- * prune==1 -> only immediate children of node (certainly,
- * ignoring pure split nodes) will be scanned.
*/
static void fib6_clean_tree(struct net *net, struct fib6_node *root,
int (*func)(struct rt6_info *, void *arg),
- bool prune, int sernum, void *arg)
+ int sernum, void *arg)
{
struct fib6_cleaner c;
c.w.root = root;
c.w.func = fib6_clean_node;
- c.w.prune = prune;
c.w.count = 0;
c.w.skip = 0;
c.func = func;
@@ -1710,10 +1945,10 @@ static void __fib6_clean_all(struct net *net,
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
head = &net->ipv6.fib_table_hash[h];
hlist_for_each_entry_rcu(table, head, tb6_hlist) {
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
fib6_clean_tree(net, &table->tb6_root,
- func, false, sernum, arg);
- write_unlock_bh(&table->tb6_lock);
+ func, sernum, arg);
+ spin_unlock_bh(&table->tb6_lock);
}
}
rcu_read_unlock();
@@ -1725,22 +1960,6 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *),
__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
}
-static int fib6_prune_clone(struct rt6_info *rt, void *arg)
-{
- if (rt->rt6i_flags & RTF_CACHE) {
- RT6_TRACE("pruning clone %p\n", rt);
- return -1;
- }
-
- return 0;
-}
-
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
-{
- fib6_clean_tree(net, fn, fib6_prune_clone, true,
- FIB6_NO_SERNUM_CHANGE, NULL);
-}
-
static void fib6_flush_trees(struct net *net)
{
int new_sernum = fib6_new_sernum(net);
@@ -1752,12 +1971,6 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-struct fib6_gc_args
-{
- int timeout;
- int more;
-};
-
static int fib6_age(struct rt6_info *rt, void *arg)
{
struct fib6_gc_args *gc_args = arg;
@@ -1766,9 +1979,6 @@ static int fib6_age(struct rt6_info *rt, void *arg)
/*
* check addrconf expiration here.
* Routes are expired even if they are in use.
- *
- * Also age clones. Note, that clones are aged out
- * only if they are not in use now.
*/
if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
@@ -1777,29 +1987,14 @@ static int fib6_age(struct rt6_info *rt, void *arg)
return -1;
}
gc_args->more++;
- } else if (rt->rt6i_flags & RTF_CACHE) {
- if (atomic_read(&rt->dst.__refcnt) == 1 &&
- time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
- RT6_TRACE("aging clone %p\n", rt);
- return -1;
- } else if (rt->rt6i_flags & RTF_GATEWAY) {
- struct neighbour *neigh;
- __u8 neigh_flags = 0;
-
- neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
- if (neigh) {
- neigh_flags = neigh->flags;
- neigh_release(neigh);
- }
- if (!(neigh_flags & NTF_ROUTER)) {
- RT6_TRACE("purging route %p via non-router but gateway\n",
- rt);
- return -1;
- }
- }
- gc_args->more++;
}
+ /* Also age clones in the exception table.
+ * Note, that clones are aged out
+ * only if they are not in use now.
+ */
+ rt6_age_exceptions(rt, gc_args, now);
+
return 0;
}
@@ -1839,6 +2034,11 @@ static void fib6_gc_timer_cb(unsigned long arg)
static int __net_init fib6_net_init(struct net *net)
{
size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
+ int err;
+
+ err = fib6_notifier_init(net);
+ if (err)
+ return err;
spin_lock_init(&net->ipv6.fib6_gc_lock);
rwlock_init(&net->ipv6.fib6_walker_lock);
@@ -1862,7 +2062,8 @@ static int __net_init fib6_net_init(struct net *net)
goto out_fib_table_hash;
net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
- net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
+ net->ipv6.ip6_null_entry);
net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
@@ -1873,7 +2074,8 @@ static int __net_init fib6_net_init(struct net *net)
if (!net->ipv6.fib6_local_tbl)
goto out_fib6_main_tbl;
net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
- net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
+ net->ipv6.ip6_null_entry);
net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
@@ -1891,22 +2093,31 @@ out_fib_table_hash:
out_rt6_stats:
kfree(net->ipv6.rt6_stats);
out_timer:
+ fib6_notifier_exit(net);
return -ENOMEM;
}
static void fib6_net_exit(struct net *net)
{
+ unsigned int i;
+
rt6_ifdown(net, NULL);
del_timer_sync(&net->ipv6.ip6_fib_timer);
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
- inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers);
- kfree(net->ipv6.fib6_local_tbl);
-#endif
- inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers);
- kfree(net->ipv6.fib6_main_tbl);
+ for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
+ struct hlist_head *head = &net->ipv6.fib_table_hash[i];
+ struct hlist_node *tmp;
+ struct fib6_table *tb;
+
+ hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) {
+ hlist_del(&tb->tb6_hlist);
+ fib6_free_table(tb);
+ }
+ }
+
kfree(net->ipv6.fib_table_hash);
kfree(net->ipv6.rt6_stats);
+ fib6_notifier_exit(net);
}
static struct pernet_operations fib6_net_ops = {
@@ -1930,7 +2141,7 @@ int __init fib6_init(void)
goto out_kmem_cache_create;
ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
- NULL);
+ 0);
if (ret)
goto out_unregister_subsys;
@@ -1994,7 +2205,9 @@ static int ipv6_route_yield(struct fib6_walker *w)
return 1;
do {
- iter->w.leaf = iter->w.leaf->dst.rt6_next;
+ iter->w.leaf = rcu_dereference_protected(
+ iter->w.leaf->dst.rt6_next,
+ lockdep_is_held(&iter->tbl->tb6_lock));
iter->skip--;
if (!iter->skip && iter->w.leaf)
return 1;
@@ -2059,7 +2272,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!v)
goto iter_table;
- n = ((struct rt6_info *)v)->dst.rt6_next;
+ n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next);
if (n) {
++*pos;
return n;
@@ -2067,9 +2280,9 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
iter_table:
ipv6_route_check_sernum(iter);
- read_lock(&iter->tbl->tb6_lock);
+ spin_lock_bh(&iter->tbl->tb6_lock);
r = fib6_walk_continue(&iter->w);
- read_unlock(&iter->tbl->tb6_lock);
+ spin_unlock_bh(&iter->tbl->tb6_lock);
if (r > 0) {
if (v)
++*pos;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 8081bafe441b..9f2e73c71768 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -47,7 +47,7 @@ static atomic_t fl_size = ATOMIC_INIT(0);
static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
static void ip6_fl_gc(unsigned long dummy);
-static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
+static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc);
/* FL hash table lock: it protects only of GC */
@@ -315,6 +315,7 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
}
opt_space->dst1opt = fopt->dst1opt;
opt_space->opt_flen = fopt->opt_flen;
+ opt_space->tot_len = fopt->tot_len;
return opt_space;
}
EXPORT_SYMBOL_GPL(fl6_merge_options);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 67ff2aaf5dcb..b90bad7a4e56 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -369,6 +369,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
+ struct net *net = dev_net(skb->dev);
const struct gre_base_hdr *greh;
const struct ipv6hdr *ipv6h;
int grehlen = sizeof(*greh);
@@ -402,19 +403,21 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
switch (type) {
- __u32 teli;
struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 mtu;
+ __u32 teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
- break;
+ if (code != ICMPV6_PORT_UNREACH)
+ break;
+ return;
case ICMPV6_TIME_EXCEED:
if (code == ICMPV6_EXC_HOPLIMIT) {
net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
t->parms.name);
+ break;
}
- break;
+ return;
case ICMPV6_PARAMPROB:
teli = 0;
if (code == ICMPV6_HDR_FIELD)
@@ -430,13 +433,14 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
t->parms.name);
}
- break;
+ return;
case ICMPV6_PKT_TOOBIG:
- mtu = be32_to_cpu(info) - offset;
- if (mtu < IPV6_MIN_MTU)
- mtu = IPV6_MIN_MTU;
- t->dev->mtu = mtu;
- break;
+ ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
+ return;
+ case NDISC_REDIRECT:
+ ip6_redirect(skb, net, skb->dev->ifindex, 0,
+ sock_net_uid(net, NULL));
+ return;
}
if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
@@ -498,8 +502,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
__u32 *pmtu, __be16 proto)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
- __be16 protocol = (dev->type == ARPHRD_ETHER) ?
- htons(ETH_P_TEB) : proto;
+ __be16 protocol;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -513,6 +516,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
tunnel->o_seqno++;
/* Push GRE header. */
+ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
@@ -938,24 +942,25 @@ done:
}
static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
- unsigned short type,
- const void *daddr, const void *saddr, unsigned int len)
+ unsigned short type, const void *daddr,
+ const void *saddr, unsigned int len)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct ipv6hdr *ipv6h = skb_push(skb, t->hlen);
- __be16 *p = (__be16 *)(ipv6h+1);
+ struct ipv6hdr *ipv6h;
+ __be16 *p;
- ip6_flow_hdr(ipv6h, 0,
- ip6_make_flowlabel(dev_net(dev), skb,
- t->fl.u.ip6.flowlabel, true,
- &t->fl.u.ip6));
+ ipv6h = skb_push(skb, t->hlen + sizeof(*ipv6h));
+ ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb,
+ t->fl.u.ip6.flowlabel,
+ true, &t->fl.u.ip6));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = NEXTHDR_GRE;
ipv6h->saddr = t->parms.laddr;
ipv6h->daddr = t->parms.raddr;
- p[0] = t->parms.o_flags;
- p[1] = htons(type);
+ p = (__be16 *)(ipv6h + 1);
+ p[0] = t->parms.o_flags;
+ p[1] = htons(type);
/*
* Set the source hardware address.
@@ -1153,19 +1158,21 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6gre_exit_net(struct net *net)
+static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list)
{
+ struct net *net;
LIST_HEAD(list);
rtnl_lock();
- ip6gre_destroy_tunnels(net, &list);
+ list_for_each_entry(net, net_list, exit_list)
+ ip6gre_destroy_tunnels(net, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations ip6gre_net_ops = {
.init = ip6gre_init_net,
- .exit = ip6gre_exit_net,
+ .exit_batch = ip6gre_exit_batch_net,
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
};
@@ -1308,6 +1315,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netif_keep_dst(dev);
}
static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 713676f14a0e..02045494c24c 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include <linux/icmpv6.h>
#include <linux/mutex.h>
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index cdb3728faca7..4a87f9428ca5 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -105,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
for (skb = segs; skb; skb = skb->next) {
ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
- if (gso_partial)
+ if (gso_partial && skb_is_gso(skb))
payload_len = skb_shinfo(skb)->gso_size +
SKB_GSO_CB(skb)->data_offset +
skb->head - (unsigned char *)(ipv6h + 1);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2dfe50d8d609..5110a418cc4d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1110,69 +1110,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
}
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
-static inline int ip6_ufo_append_data(struct sock *sk,
- struct sk_buff_head *queue,
- int getfrag(void *from, char *to, int offset, int len,
- int odd, struct sk_buff *skb),
- void *from, int length, int hh_len, int fragheaderlen,
- int exthdrlen, int transhdrlen, int mtu,
- unsigned int flags, const struct flowi6 *fl6)
-
-{
- struct sk_buff *skb;
- int err;
-
- /* There is support for UDP large send offload by network
- * device, so create one single skb packet containing complete
- * udp datagram
- */
- skb = skb_peek_tail(queue);
- if (!skb) {
- skb = sock_alloc_send_skb(sk,
- hh_len + fragheaderlen + transhdrlen + 20,
- (flags & MSG_DONTWAIT), &err);
- if (!skb)
- return err;
-
- /* reserve space for Hardware header */
- skb_reserve(skb, hh_len);
-
- /* create space for UDP/IP header */
- skb_put(skb, fragheaderlen + transhdrlen);
-
- /* initialize network header pointer */
- skb_set_network_header(skb, exthdrlen);
-
- /* initialize protocol header pointer */
- skb->transport_header = skb->network_header + fragheaderlen;
-
- skb->protocol = htons(ETH_P_IPV6);
- skb->csum = 0;
-
- if (flags & MSG_CONFIRM)
- skb_set_dst_pending_confirm(skb, 1);
-
- __skb_queue_tail(queue, skb);
- } else if (skb_is_gso(skb)) {
- goto append;
- }
-
- skb->ip_summed = CHECKSUM_PARTIAL;
- /* Specify the length of each IPv6 datagram fragment.
- * It has to be a multiple of 8.
- */
- skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
- sizeof(struct frag_hdr)) & ~7;
- skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
- skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
- &fl6->daddr,
- &fl6->saddr);
-
-append:
- return skb_append_datato_frags(sk, skb, getfrag, from,
- (length - transhdrlen));
-}
-
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
gfp_t gfp)
{
@@ -1224,11 +1161,11 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
if (WARN_ON(v6_cork->opt))
return -EINVAL;
- v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
+ v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
if (unlikely(!v6_cork->opt))
return -ENOBUFS;
- v6_cork->opt->tot_len = opt->tot_len;
+ v6_cork->opt->tot_len = sizeof(*opt);
v6_cork->opt->opt_flen = opt->opt_flen;
v6_cork->opt->opt_nflen = opt->opt_nflen;
@@ -1381,20 +1318,6 @@ emsgsize:
*/
cork->length += length;
- if ((skb && skb_is_gso(skb)) ||
- (((length + (skb ? skb->len : headersize)) > mtu) &&
- (skb_queue_len(queue) <= 1) &&
- (sk->sk_protocol == IPPROTO_UDP) &&
- (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
- (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) {
- err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
- hh_len, fragheaderlen, exthdrlen,
- transhdrlen, mtu, flags, fl6);
- if (err)
- goto error;
- return 0;
- }
-
if (!skb)
goto alloc_new_skb;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3a0ba2ae4b0f..3d3092adf1d2 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -171,7 +171,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
}
t = rcu_dereference(ip6n->collect_md_tun);
- if (t)
+ if (t && t->dev->flags & IFF_UP)
return t;
t = rcu_dereference(ip6n->tnls_wc[0]);
@@ -471,15 +471,16 @@ static int
ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
u8 *type, u8 *code, int *msg, __u32 *info, int offset)
{
- const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
- struct ip6_tnl *t;
- int rel_msg = 0;
+ const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
+ struct net *net = dev_net(skb->dev);
u8 rel_type = ICMPV6_DEST_UNREACH;
u8 rel_code = ICMPV6_ADDR_UNREACH;
- u8 tproto;
__u32 rel_info = 0;
- __u16 len;
+ struct ip6_tnl *t;
int err = -ENOENT;
+ int rel_msg = 0;
+ u8 tproto;
+ __u16 len;
/* If the packet doesn't contain the original IPv6 header we are
in trouble since we might need the source address for further
@@ -490,16 +491,15 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
if (!t)
goto out;
- tproto = ACCESS_ONCE(t->parms.proto);
+ tproto = READ_ONCE(t->parms.proto);
if (tproto != ipproto && tproto != 0)
goto out;
err = 0;
switch (*type) {
- __u32 teli;
struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 mtu;
+ __u32 mtu, teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@@ -530,11 +530,11 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
}
break;
case ICMPV6_PKT_TOOBIG:
+ ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
+ sock_net_uid(net, NULL));
mtu = *info - offset;
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- t->dev->mtu = mtu;
-
len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
if (len > mtu) {
rel_type = ICMPV6_PKT_TOOBIG;
@@ -543,6 +543,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
rel_msg = 1;
}
break;
+ case NDISC_REDIRECT:
+ ip6_redirect(skb, net, skb->dev->ifindex, 0,
+ sock_net_uid(net, NULL));
+ break;
}
*type = rel_type;
@@ -559,13 +563,12 @@ static int
ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- int rel_msg = 0;
- u8 rel_type = type;
- u8 rel_code = code;
__u32 rel_info = ntohl(info);
- int err;
- struct sk_buff *skb2;
const struct iphdr *eiph;
+ struct sk_buff *skb2;
+ int err, rel_msg = 0;
+ u8 rel_type = type;
+ u8 rel_code = code;
struct rtable *rt;
struct flowi4 fl4;
@@ -590,9 +593,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
rel_type = ICMP_DEST_UNREACH;
rel_code = ICMP_FRAG_NEEDED;
break;
- case NDISC_REDIRECT:
- rel_type = ICMP_REDIRECT;
- rel_code = ICMP_REDIR_HOST;
default:
return 0;
}
@@ -611,33 +611,26 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
eiph = ip_hdr(skb2);
/* Try to guess incoming interface */
- rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
- eiph->saddr, 0,
- 0, 0,
- IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
+ 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
if (IS_ERR(rt))
goto out;
skb2->dev = rt->dst.dev;
+ ip_rt_put(rt);
/* route "incoming" packet */
if (rt->rt_flags & RTCF_LOCAL) {
- ip_rt_put(rt);
- rt = NULL;
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
- eiph->daddr, eiph->saddr,
- 0, 0,
- IPPROTO_IPIP,
- RT_TOS(eiph->tos), 0);
- if (IS_ERR(rt) ||
- rt->dst.dev->type != ARPHRD_TUNNEL) {
+ eiph->daddr, eiph->saddr, 0, 0,
+ IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
if (!IS_ERR(rt))
ip_rt_put(rt);
goto out;
}
skb_dst_set(skb2, &rt->dst);
} else {
- ip_rt_put(rt);
if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
skb2->dev) ||
skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
@@ -649,10 +642,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rel_info > dst_mtu(skb_dst(skb2)))
goto out;
- skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
+ skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2,
+ rel_info);
}
- if (rel_type == ICMP_REDIRECT)
- skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
@@ -665,11 +657,10 @@ static int
ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- int rel_msg = 0;
+ __u32 rel_info = ntohl(info);
+ int err, rel_msg = 0;
u8 rel_type = type;
u8 rel_code = code;
- __u32 rel_info = ntohl(info);
- int err;
err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
&rel_msg, &rel_info, offset);
@@ -769,7 +760,8 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
if ((ipv6_addr_is_multicast(laddr) ||
likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
- likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
+ ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
+ likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
ret = 1;
}
return ret;
@@ -899,7 +891,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
if (t) {
- u8 tproto = ACCESS_ONCE(t->parms.proto);
+ u8 tproto = READ_ONCE(t->parms.proto);
if (tproto != ipproto && tproto != 0)
goto drop;
@@ -999,7 +991,8 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
pr_warn("%s xmit: Local address not yet configured!\n",
p->name);
- else if (!ipv6_addr_is_multicast(raddr) &&
+ else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
+ !ipv6_addr_is_multicast(raddr) &&
unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
p->name);
@@ -1043,6 +1036,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
struct dst_entry *dst = NULL, *ndst = NULL;
struct net_device *tdev;
int mtu;
+ unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0;
unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
unsigned int max_headroom = psh_hlen;
bool use_cache = false;
@@ -1124,7 +1118,7 @@ route_lookup:
t->parms.name);
goto tx_err_dst_release;
}
- mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen;
+ mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen;
if (encap_limit >= 0) {
max_headroom += 8;
mtu -= 8;
@@ -1133,7 +1127,7 @@ route_lookup:
mtu = IPV6_MIN_MTU;
if (skb_dst(skb) && !t->parms.collect_md)
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
- if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) {
+ if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
*pmtu = mtu;
err = -EMSGSIZE;
goto tx_err_dst_release;
@@ -1184,6 +1178,7 @@ route_lookup:
init_tel_txopt(&opt, encap_limit);
ipv6_push_frag_opts(skb, &opt.ops, &proto);
}
+ hop_limit = hop_limit ? : ip6_dst_hoplimit(dst);
/* Calculate max headroom for all the headers and adjust
* needed_headroom if necessary.
@@ -1231,7 +1226,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- tproto = ACCESS_ONCE(t->parms.proto);
+ tproto = READ_ONCE(t->parms.proto);
if (tproto != IPPROTO_IPIP && tproto != 0)
return -1;
@@ -1301,7 +1296,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
- tproto = ACCESS_ONCE(t->parms.proto);
+ tproto = READ_ONCE(t->parms.proto);
if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
ip6_tnl_addr_conflict(t, ipv6h))
return -1;
@@ -2166,17 +2161,16 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
.priority = 1,
};
-static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
+static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
{
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct net_device *dev, *aux;
int h;
struct ip6_tnl *t;
- LIST_HEAD(list);
for_each_netdev_safe(net, dev, aux)
if (dev->rtnl_link_ops == &ip6_link_ops)
- unregister_netdevice_queue(dev, &list);
+ unregister_netdevice_queue(dev, list);
for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
@@ -2185,12 +2179,10 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
* been added to the list by the previous loop.
*/
if (!net_eq(dev_net(t->dev), net))
- unregister_netdevice_queue(t->dev, &list);
+ unregister_netdevice_queue(t->dev, list);
t = rtnl_dereference(t->next);
}
}
-
- unregister_netdevice_many(&list);
}
static int __net_init ip6_tnl_init_net(struct net *net)
@@ -2234,16 +2226,21 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6_tnl_exit_net(struct net *net)
+static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
{
+ struct net *net;
+ LIST_HEAD(list);
+
rtnl_lock();
- ip6_tnl_destroy_tunnels(net);
+ list_for_each_entry(net, net_list, exit_list)
+ ip6_tnl_destroy_tunnels(net, &list);
+ unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations ip6_tnl_net_ops = {
.init = ip6_tnl_init_net,
- .exit = ip6_tnl_exit_net,
+ .exit_batch = ip6_tnl_exit_batch_net,
.id = &ip6_tnl_net_id,
.size = sizeof(struct ip6_tnl_net),
};
@@ -2258,6 +2255,9 @@ static int __init ip6_tunnel_init(void)
{
int err;
+ if (!ipv6_mod_enabled())
+ return -EOPNOTSUPP;
+
err = register_pernet_device(&ip6_tnl_net_ops);
if (err < 0)
goto out_pernet;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 486c2305f53c..dbb74f3c57a7 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -445,6 +445,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev;
struct xfrm_state *x;
+ int pkt_len = skb->len;
int err = -1;
int mtu;
@@ -502,7 +503,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
u64_stats_update_begin(&tstats->syncp);
- tstats->tx_bytes += skb->len;
+ tstats->tx_bytes += pkt_len;
tstats->tx_packets++;
u64_stats_update_end(&tstats->syncp);
} else {
@@ -1052,23 +1053,22 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = {
.get_link_net = ip6_tnl_get_link_net,
};
-static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
+static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n,
+ struct list_head *list)
{
int h;
struct ip6_tnl *t;
- LIST_HEAD(list);
for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
while (t) {
- unregister_netdevice_queue(t->dev, &list);
+ unregister_netdevice_queue(t->dev, list);
t = rtnl_dereference(t->next);
}
}
t = rtnl_dereference(ip6n->tnls_wc[0]);
- unregister_netdevice_queue(t->dev, &list);
- unregister_netdevice_many(&list);
+ unregister_netdevice_queue(t->dev, list);
}
static int __net_init vti6_init_net(struct net *net)
@@ -1108,18 +1108,24 @@ err_alloc_dev:
return err;
}
-static void __net_exit vti6_exit_net(struct net *net)
+static void __net_exit vti6_exit_batch_net(struct list_head *net_list)
{
- struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ struct vti6_net *ip6n;
+ struct net *net;
+ LIST_HEAD(list);
rtnl_lock();
- vti6_destroy_tunnels(ip6n);
+ list_for_each_entry(net, net_list, exit_list) {
+ ip6n = net_generic(net, vti6_net_id);
+ vti6_destroy_tunnels(ip6n, &list);
+ }
+ unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations vti6_net_ops = {
.init = vti6_init_net,
- .exit = vti6_exit_net,
+ .exit_batch = vti6_exit_batch_net,
.id = &vti6_net_id,
.size = sizeof(struct vti6_net),
};
@@ -1145,33 +1151,6 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
.priority = 100,
};
-static bool is_vti6_tunnel(const struct net_device *dev)
-{
- return dev->netdev_ops == &vti6_netdev_ops;
-}
-
-static int vti6_device_event(struct notifier_block *unused,
- unsigned long event, void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct ip6_tnl *t = netdev_priv(dev);
-
- if (!is_vti6_tunnel(dev))
- return NOTIFY_DONE;
-
- switch (event) {
- case NETDEV_DOWN:
- if (!net_eq(t->net, dev_net(dev)))
- xfrm_garbage_collect(t->net);
- break;
- }
- return NOTIFY_DONE;
-}
-
-static struct notifier_block vti6_notifier_block __read_mostly = {
- .notifier_call = vti6_device_event,
-};
-
/**
* vti6_tunnel_init - register protocol and reserve needed resources
*
@@ -1182,8 +1161,6 @@ static int __init vti6_tunnel_init(void)
const char *msg;
int err;
- register_netdevice_notifier(&vti6_notifier_block);
-
msg = "tunnel device";
err = register_pernet_device(&vti6_net_ops);
if (err < 0)
@@ -1216,7 +1193,6 @@ xfrm_proto_ah_failed:
xfrm_proto_esp_failed:
unregister_pernet_device(&vti6_net_ops);
pernet_dev_failed:
- unregister_netdevice_notifier(&vti6_notifier_block);
pr_err("vti6 init: failed to register %s\n", msg);
return err;
}
@@ -1231,7 +1207,6 @@ static void __exit vti6_tunnel_cleanup(void)
xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti6_net_ops);
- unregister_netdevice_notifier(&vti6_notifier_block);
}
module_init(vti6_tunnel_init);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7454850f2098..9c24b85949c1 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1427,7 +1427,7 @@ int __init ip6_mr_init(void)
}
#endif
rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
- ip6mr_rtm_dumproute, NULL);
+ ip6mr_rtm_dumproute, 0);
return 0;
#ifdef CONFIG_IPV6_PIMSM_V2
add_proto_fail:
@@ -1617,6 +1617,10 @@ int ip6mr_sk_done(struct sock *sk)
struct net *net = sock_net(sk);
struct mr6_table *mrt;
+ if (sk->sk_type != SOCK_RAW ||
+ inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+ return err;
+
rtnl_lock();
ip6mr_for_each_table(mrt, net) {
if (sk == mrt->mroute6_sk) {
@@ -1722,6 +1726,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_ADD_MFC:
case MRT6_DEL_MFC:
parent = -1;
+ /* fall through */
case MRT6_ADD_MFC_PROXY:
case MRT6_DEL_MFC_PROXY:
if (optlen < sizeof(mfc))
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 02d795fe3d7f..b9404feabd78 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -242,7 +242,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
pktopt = xchg(&np->pktoptions, NULL);
kfree_skb(pktopt);
- sk->sk_destruct = inet_sock_destruct;
/*
* ... and add it to the refcnt debug socks count
* in the new family. -acme
@@ -378,6 +377,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = 0;
break;
+ case IPV6_FREEBIND:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ /* we also don't have a separate freebind bit for IPV6 */
+ inet_sk(sk)->freebind = valbool;
+ retv = 0;
+ break;
+
case IPV6_RECVORIGDSTADDR:
if (optlen < sizeof(int))
goto e_inval;
@@ -1215,6 +1222,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = inet_sk(sk)->transparent;
break;
+ case IPV6_FREEBIND:
+ val = inet_sk(sk)->freebind;
+ break;
+
case IPV6_RECVORIGDSTADDR:
val = np->rxopt.bits.rxorigdstaddr;
break;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0327c1f2e6fc..b3cea200c85e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -46,6 +46,7 @@
#endif
#include <linux/if_addr.h>
+#include <linux/if_ether.h>
#include <linux/if_arp.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
@@ -127,7 +128,7 @@ struct neigh_table nd_tbl = {
[NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
- [NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
+ [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
[NEIGH_VAR_PROXY_QLEN] = 64,
[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
@@ -426,12 +427,19 @@ static void ip6_nd_hdr(struct sk_buff *skb,
int hop_limit, int len)
{
struct ipv6hdr *hdr;
+ struct inet6_dev *idev;
+ unsigned tclass;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(skb->dev);
+ tclass = idev ? idev->cnf.ndisc_tclass : 0;
+ rcu_read_unlock();
skb_push(skb, sizeof(*hdr));
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
- ip6_flow_hdr(hdr, 0, 0);
+ ip6_flow_hdr(hdr, tclass, 0);
hdr->payload_len = htons(len);
hdr->nexthdr = IPPROTO_ICMPV6;
@@ -822,7 +830,7 @@ have_ifp:
* who is doing DAD
* so fail our DAD process
*/
- addrconf_dad_failure(ifp);
+ addrconf_dad_failure(skb, ifp);
return;
} else {
/*
@@ -975,7 +983,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
if (ifp) {
if (skb->pkt_type != PACKET_LOOPBACK
&& (ifp->flags & IFA_F_TENTATIVE)) {
- addrconf_dad_failure(ifp);
+ addrconf_dad_failure(skb, ifp);
return;
}
/* What should we make now? The advertisement
@@ -989,8 +997,8 @@ static void ndisc_recv_na(struct sk_buff *skb)
*/
if (skb->pkt_type != PACKET_LOOPBACK)
ND_PRINTK(1, warn,
- "NA: someone advertises our address %pI6 on %s!\n",
- &ifp->addr, ifp->idev->dev->name);
+ "NA: %pM advertised our address %pI6c on %s!\n",
+ eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name);
in6_ifa_put(ifp);
return;
}
@@ -1779,6 +1787,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
static struct notifier_block ndisc_netdev_notifier = {
.notifier_call = ndisc_netdev_event,
+ .priority = ADDRCONF_NOTIFY_PRIORITY - 5,
};
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index fe180c96040e..c6ee0cdd0ba9 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the netfilter modules on top of IPv6.
#
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1f90644056ac..f06e25065a34 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -39,12 +39,6 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("IPv6 packet filter");
-#ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x) WARN_ON(!(x))
-#else
-#define IP_NF_ASSERT(x)
-#endif
-
void *ip6t_alloc_initial_table(const struct xt_table *info)
{
return xt_alloc_initial_table(ip6t, IP6T);
@@ -176,7 +170,7 @@ static const char *const comments[] = {
[NF_IP6_TRACE_COMMENT_POLICY] = "policy",
};
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
@@ -284,7 +278,7 @@ ip6t_do_table(struct sk_buff *skb,
acpar.hotdrop = false;
acpar.state = state;
- IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+ WARN_ON(!(table->valid_hooks & (1 << hook)));
local_bh_disable();
addend = xt_write_recseq_begin();
@@ -315,7 +309,7 @@ ip6t_do_table(struct sk_buff *skb,
const struct xt_entry_match *ematch;
struct xt_counters *counter;
- IP_NF_ASSERT(e);
+ WARN_ON(!e);
acpar.thoff = 0;
if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
&acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) {
@@ -335,7 +329,7 @@ ip6t_do_table(struct sk_buff *skb,
ADD_COUNTER(*counter, skb->len, 1);
t = ip6t_get_target_c(e);
- IP_NF_ASSERT(t->u.kernel.target);
+ WARN_ON(!t->u.kernel.target);
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
/* The packet is traced: log it */
@@ -801,7 +795,27 @@ get_counters(const struct xt_table_info *t,
ADD_COUNTER(counters[i], bcnt, pcnt);
++i;
+ cond_resched();
+ }
+ }
+}
+
+static void get_old_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
+{
+ struct ip6t_entry *iter;
+ unsigned int cpu, i;
+
+ for_each_possible_cpu(cpu) {
+ i = 0;
+ xt_entry_foreach(iter, t->entries, t->size) {
+ const struct xt_counters *tmp;
+
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
+ ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
+ ++i;
}
+ cond_resched();
}
}
@@ -1095,8 +1109,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters, and synchronize with replace */
- get_counters(oldinfo, counters);
+ get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index ce203dd729e0..437af8c95277 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -353,7 +353,7 @@ static unsigned int ipv6_synproxy_hook(void *priv,
nexthdr = ipv6_hdr(skb)->nexthdr;
thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
&frag_off);
- if (thoff < 0)
+ if (thoff < 0 || nexthdr != IPPROTO_TCP)
return NF_ACCEPT;
th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
@@ -438,7 +438,7 @@ static unsigned int ipv6_synproxy_hook(void *priv,
return NF_ACCEPT;
}
-static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv6_synproxy_ops[] = {
{
.hook = ipv6_synproxy_hook,
.pf = NFPROTO_IPV6,
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 7d2bd940291f..991512576c8c 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -69,7 +69,7 @@ static unsigned int ip6table_nat_local_fn(void *priv,
return nf_nat_ipv6_local_fn(priv, skb, state, ip6table_nat_do_chain);
}
-static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
+static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
/* Before packet filtering, change destination */
{
.hook = ip6table_nat_in,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 4e3402486833..3b80a38f62b8 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -67,13 +67,6 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-static void ipv6_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "src=%pI6 dst=%pI6 ",
- tuple->src.u3.ip6, tuple->dst.u3.ip6);
-}
-
static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
@@ -191,7 +184,7 @@ static unsigned int ipv6_conntrack_local(void *priv,
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
-static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv6_conntrack_ops[] = {
{
.hook = ipv6_conntrack_in,
.pf = NFPROTO_IPV6,
@@ -308,11 +301,6 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
-
-static int ipv6_nlattr_tuple_size(void)
-{
- return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1);
-}
#endif
static int ipv6_hooks_register(struct net *net)
@@ -351,18 +339,17 @@ static void ipv6_hooks_unregister(struct net *net)
mutex_unlock(&register_ipv6_hooks);
}
-struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
+const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
.l3proto = PF_INET6,
- .name = "ipv6",
.pkt_to_tuple = ipv6_pkt_to_tuple,
.invert_tuple = ipv6_invert_tuple,
- .print_tuple = ipv6_print_tuple,
.get_l4proto = ipv6_get_l4proto,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = ipv6_tuple_to_nlattr,
- .nlattr_tuple_size = ipv6_nlattr_tuple_size,
.nlattr_to_tuple = ipv6_nlattr_to_tuple,
.nla_policy = ipv6_nla_policy,
+ .nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])) +
+ NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])),
#endif
.net_ns_get = ipv6_hooks_register,
.net_ns_put = ipv6_hooks_unregister,
@@ -398,25 +385,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
static int ipv6_net_init(struct net *net)
{
- int ret = 0;
-
- ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
- ARRAY_SIZE(builtin_l4proto6));
- if (ret < 0)
- return ret;
-
- ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6);
- if (ret < 0) {
- pr_err("nf_conntrack_ipv6: pernet registration failed.\n");
- nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
- ARRAY_SIZE(builtin_l4proto6));
- }
- return ret;
+ return nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
+ ARRAY_SIZE(builtin_l4proto6));
}
static void ipv6_net_exit(struct net *net)
{
- nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6);
nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
ARRAY_SIZE(builtin_l4proto6));
}
@@ -434,6 +408,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
need_conntrack();
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ if (WARN_ON(nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1) !=
+ nf_conntrack_l3proto_ipv6.nla_size))
+ return -EINVAL;
+#endif
+
ret = nf_register_sockopt(&so_getorigdst6);
if (ret < 0) {
pr_err("Unable to register netfilter socket option\n");
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index d5f028e33f65..3ac0d826afc4 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -84,16 +84,6 @@ static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void icmpv6_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "type=%u code=%u id=%u ",
- tuple->dst.u.icmp.type,
- tuple->dst.u.icmp.code,
- ntohs(tuple->src.u.icmp.id));
-}
-
static unsigned int *icmpv6_get_timeouts(struct net *net)
{
return &icmpv6_pernet(net)->timeout;
@@ -104,8 +94,6 @@ static int icmpv6_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeout)
{
/* Do not immediately delete the connection after the first
@@ -131,11 +119,6 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
pr_debug("icmpv6: can't create new conn with type %u\n",
type + 128);
nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
- if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
- nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL,
- NULL, NULL,
- "nf_ct_icmpv6: invalid new with type %d ",
- type + 128);
return false;
}
return true;
@@ -144,8 +127,7 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
static int
icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
- unsigned int icmp6off,
- unsigned int hooknum)
+ unsigned int icmp6off)
{
struct nf_conntrack_tuple intuple, origtuple;
const struct nf_conntrack_tuple_hash *h;
@@ -153,7 +135,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
- NF_CT_ASSERT(!skb_nfct(skb));
+ WARN_ON(skb_nfct(skb));
/* Are they talking about one of our connections? */
if (!nf_ct_get_tuplepr(skb,
@@ -193,6 +175,12 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
return NF_ACCEPT;
}
+static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
+}
+
static int
icmpv6_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb, unsigned int dataoff,
@@ -204,17 +192,13 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
if (icmp6h == NULL) {
- if (LOG_INVALID(net, IPPROTO_ICMPV6))
- nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmpv6: short packet ");
+ icmpv6_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
- if (LOG_INVALID(net, IPPROTO_ICMPV6))
- nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmpv6: ICMPv6 checksum failed ");
+ icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
return -NF_ACCEPT;
}
@@ -229,7 +213,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
if (icmp6h->icmp6_type >= 128)
return NF_ACCEPT;
- return icmpv6_error_message(net, tmpl, skb, dataoff, hooknum);
+ return icmpv6_error_message(net, tmpl, skb, dataoff);
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -275,9 +259,14 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
-static int icmpv6_nlattr_tuple_size(void)
+static unsigned int icmpv6_nlattr_tuple_size(void)
{
- return nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+ static unsigned int size __read_mostly;
+
+ if (!size)
+ size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+
+ return size;
}
#endif
@@ -367,10 +356,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_ICMPV6,
- .name = "icmpv6",
.pkt_to_tuple = icmpv6_pkt_to_tuple,
.invert_tuple = icmpv6_invert_tuple,
- .print_tuple = icmpv6_print_tuple,
.packet = icmpv6_packet,
.get_timeouts = icmpv6_get_timeouts,
.new = icmpv6_new,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 986d4ca38832..977d8900cfd1 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -169,12 +169,13 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q)
return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
}
-static void nf_ct_frag6_expire(unsigned long data)
+static void nf_ct_frag6_expire(struct timer_list *t)
{
+ struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
struct net *net;
- fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);
ip6_expire_frag_queue(net, fq, &nf_frags);
@@ -622,18 +623,12 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
static int nf_ct_net_init(struct net *net)
{
- int res;
-
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
- res = inet_frags_init_net(&net->nf_frag.frags);
- if (res)
- return res;
- res = nf_ct_frag6_sysctl_register(net);
- if (res)
- inet_frags_uninit_net(&net->nf_frag.frags);
- return res;
+ inet_frags_init_net(&net->nf_frag.frags);
+
+ return nf_ct_frag6_sysctl_register(net);
}
static void nf_ct_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index ada60d1a991b..b326da59257f 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -74,7 +74,7 @@ static unsigned int ipv6_defrag(void *priv,
return err == 0 ? NF_ACCEPT : NF_DROP;
}
-static struct nf_hook_ops ipv6_defrag_ops[] = {
+static const struct nf_hook_ops ipv6_defrag_ops[] = {
{
.hook = ipv6_defrag,
.pf = NFPROTO_IPV6,
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index 97c724224da7..b397a8fe88b9 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -25,7 +25,7 @@
#include <linux/netfilter/xt_LOG.h>
#include <net/netfilter/nf_log.h>
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index b2b4f031b3a1..1d2fb9267d6f 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -196,7 +196,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
struct nf_conntrack_tuple target;
unsigned long statusbit;
- NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+ WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
return 0;
@@ -290,7 +290,8 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
else
return NF_ACCEPT;
}
- /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ /* Only ICMPs can be IP_CT_IS_REPLY: */
+ /* fall through */
case IP_CT_NEW:
/* Seen it before? This can happen for loopback, retrans,
* or local packets.
@@ -319,8 +320,8 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
default:
/* ESTABLISHED */
- NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
- ctinfo == IP_CT_ESTABLISHED_REPLY);
+ WARN_ON(ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED_REPLY);
if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
goto oif_changed;
}
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index d7b679037bae..98f61fcb9108 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -36,8 +36,8 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
struct nf_nat_range newrange;
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY));
+ WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY)));
if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
&ipv6_hdr(skb)->daddr, 0, &src) < 0)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 43f91d9b086c..54b5899543ef 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -25,9 +25,9 @@ static int get_ifindex(const struct net_device *dev)
static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
const struct nft_pktinfo *pkt,
- const struct net_device *dev)
+ const struct net_device *dev,
+ struct ipv6hdr *iph)
{
- const struct ipv6hdr *iph = ipv6_hdr(pkt->skb);
int lookup_flags = 0;
if (priv->flags & NFTA_FIB_F_DADDR) {
@@ -55,7 +55,8 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
}
static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
- const struct nft_pktinfo *pkt)
+ const struct nft_pktinfo *pkt,
+ struct ipv6hdr *iph)
{
const struct net_device *dev = NULL;
const struct nf_ipv6_ops *v6ops;
@@ -77,7 +78,7 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
else if (priv->flags & NFTA_FIB_F_OIF)
dev = nft_out(pkt);
- nft_fib6_flowi_init(&fl6, priv, pkt, dev);
+ nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
v6ops = nf_get_ipv6_ops();
if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
@@ -131,9 +132,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_fib *priv = nft_expr_priv(expr);
+ int noff = skb_network_offset(pkt->skb);
u32 *dest = &regs->data[priv->dreg];
+ struct ipv6hdr *iph, _iph;
- *dest = __nft_fib6_eval_type(priv, pkt);
+ iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+ if (!iph) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
+ *dest = __nft_fib6_eval_type(priv, pkt, iph);
}
EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
@@ -141,8 +150,10 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_fib *priv = nft_expr_priv(expr);
+ int noff = skb_network_offset(pkt->skb);
const struct net_device *oif = NULL;
u32 *dest = &regs->data[priv->dreg];
+ struct ipv6hdr *iph, _iph;
struct flowi6 fl6 = {
.flowi6_iif = LOOPBACK_IFINDEX,
.flowi6_proto = pkt->tprot,
@@ -155,7 +166,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
else if (priv->flags & NFTA_FIB_F_OIF)
oif = nft_out(pkt);
- lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif);
+ iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+ if (!iph) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
+ lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index abb2c307fbe8..4a7e5ffa5108 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -31,37 +31,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
return id;
}
-/* This function exists only for tap drivers that must support broken
- * clients requesting UFO without specifying an IPv6 fragment ID.
- *
- * This is similar to ipv6_select_ident() but we use an independent hash
- * seed to limit information leakage.
- *
- * The network header must be set before calling this.
- */
-void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
-{
- static u32 ip6_proxy_idents_hashrnd __read_mostly;
- struct in6_addr buf[2];
- struct in6_addr *addrs;
- u32 id;
-
- addrs = skb_header_pointer(skb,
- skb_network_offset(skb) +
- offsetof(struct ipv6hdr, saddr),
- sizeof(buf), buf);
- if (!addrs)
- return;
-
- net_get_random_once(&ip6_proxy_idents_hashrnd,
- sizeof(ip6_proxy_idents_hashrnd));
-
- id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
- &addrs[1], &addrs[0]);
- skb_shinfo(skb)->ip6_frag_id = htonl(id);
-}
-EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
-
__be32 ipv6_select_ident(struct net *net,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
@@ -86,7 +55,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
while (offset <= packet_len) {
struct ipv6_opt_hdr *exthdr;
- unsigned int len;
switch (**nexthdr) {
@@ -112,10 +80,9 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
offset);
- len = ipv6_optlen(exthdr);
- if (len + offset >= IPV6_MAXPLEN)
+ offset += ipv6_optlen(exthdr);
+ if (offset > IPV6_MAXPLEN)
return -EINVAL;
- offset += len;
*nexthdr = &exthdr->nexthdr;
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index ac826dd338ff..d12c55dad7d1 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -154,9 +154,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
- err = icmpv6_push_pending_frames(sk, &fl6,
- (struct icmp6hdr *) &pfh.icmph,
- len);
+ icmpv6_push_pending_frames(sk, &fl6,
+ (struct icmp6hdr *)&pfh.icmph, len);
}
release_sock(sk);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 60be012fe708..761a473a07c5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(raw_v6_hashinfo);
struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
unsigned short num, const struct in6_addr *loc_addr,
- const struct in6_addr *rmt_addr, int dif)
+ const struct in6_addr *rmt_addr, int dif, int sdif)
{
bool is_multicast = ipv6_addr_is_multicast(loc_addr);
@@ -86,7 +86,9 @@ struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
!ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
continue;
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+ if (sk->sk_bound_dev_if &&
+ sk->sk_bound_dev_if != dif &&
+ sk->sk_bound_dev_if != sdif)
continue;
if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
@@ -178,7 +180,8 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
goto out;
net = dev_net(skb->dev);
- sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, inet6_iif(skb));
+ sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr,
+ inet6_iif(skb), inet6_sdif(skb));
while (sk) {
int filtered;
@@ -222,7 +225,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
}
}
sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
- inet6_iif(skb));
+ inet6_iif(skb), inet6_sdif(skb));
}
out:
read_unlock(&raw_v6_hashinfo.lock);
@@ -378,7 +381,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
net = dev_net(skb->dev);
while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
- inet6_iif(skb)))) {
+ inet6_iif(skb), inet6_iif(skb)))) {
rawv6_err(sk, skb, NULL, type, code,
inner_offset, info);
sk = sk_next(sk);
@@ -1052,6 +1055,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
break;
+ /* fall through */
default:
return ipv6_setsockopt(sk, level, optname, optval, optlen);
}
@@ -1074,6 +1078,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
break;
+ /* fall through */
default:
return compat_ipv6_setsockopt(sk, level, optname,
optval, optlen);
@@ -1135,6 +1140,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
break;
+ /* fall through */
default:
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
@@ -1157,6 +1163,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
break;
+ /* fall through */
default:
return compat_ipv6_getsockopt(sk, level, optname,
optval, optlen);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e1da5b888cc4..afbc000ad4f2 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -170,12 +170,13 @@ out:
}
EXPORT_SYMBOL(ip6_expire_frag_queue);
-static void ip6_frag_expire(unsigned long data)
+static void ip6_frag_expire(struct timer_list *t)
{
+ struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
struct net *net;
- fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ipv6.frags);
ip6_expire_frag_queue(net, fq, &ip6_frags);
@@ -714,19 +715,13 @@ static void ip6_frags_sysctl_unregister(void)
static int __net_init ipv6_frags_init_net(struct net *net)
{
- int res;
-
net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
- res = inet_frags_init_net(&net->ipv6.frags);
- if (res)
- return res;
- res = ip6_frags_ns_sysctl_register(net);
- if (res)
- inet_frags_uninit_net(&net->ipv6.frags);
- return res;
+ inet_frags_init_net(&net->ipv6.frags);
+
+ return ip6_frags_ns_sysctl_register(net);
}
static void __net_exit ipv6_frags_exit_net(struct net *net)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 94d6a13d47f0..05eb7bc36156 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -44,6 +44,7 @@
#include <linux/seq_file.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>
+#include <linux/jhash.h>
#include <net/net_namespace.h>
#include <net/snmp.h>
#include <net/ipv6.h>
@@ -104,6 +105,9 @@ static int rt6_fill_node(struct net *net,
struct in6_addr *dst, struct in6_addr *src,
int iif, int type, u32 portid, u32 seq,
unsigned int flags);
+static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+ struct in6_addr *daddr,
+ struct in6_addr *saddr);
#ifdef CONFIG_IPV6_ROUTE_INFO
static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -139,9 +143,11 @@ static void rt6_uncached_list_del(struct rt6_info *rt)
{
if (!list_empty(&rt->rt6i_uncached)) {
struct uncached_list *ul = rt->rt6i_uncached_list;
+ struct net *net = dev_net(rt->dst.dev);
spin_lock_bh(&ul->lock);
list_del(&rt->rt6i_uncached);
+ atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
spin_unlock_bh(&ul->lock);
}
}
@@ -355,8 +361,10 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
1, DST_OBSOLETE_FORCE_CHK, flags);
- if (rt)
+ if (rt) {
rt6_info_init(rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
+ }
return rt;
}
@@ -369,17 +377,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net,
if (rt) {
rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
- if (rt->rt6i_pcpu) {
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct rt6_info **p;
-
- p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
- /* no one shares rt */
- *p = NULL;
- }
- } else {
+ if (!rt->rt6i_pcpu) {
dst_release_immediate(&rt->dst);
return NULL;
}
@@ -392,6 +390,7 @@ EXPORT_SYMBOL(ip6_dst_alloc);
static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_exception_bucket *bucket;
struct dst_entry *from = dst->from;
struct inet6_dev *idev;
@@ -404,6 +403,11 @@ static void ip6_dst_destroy(struct dst_entry *dst)
rt->rt6i_idev = NULL;
in6_dev_put(idev);
}
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
+ if (bucket) {
+ rt->rt6i_exception_bucket = NULL;
+ kfree(bucket);
+ }
dst->from = NULL;
dst_release(from);
@@ -440,21 +444,12 @@ static bool rt6_check_expired(const struct rt6_info *rt)
if (time_after(jiffies, rt->dst.expires))
return true;
} else if (rt->dst.from) {
- return rt6_check_expired((struct rt6_info *) rt->dst.from);
+ return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
+ rt6_check_expired((struct rt6_info *)rt->dst.from);
}
return false;
}
-/* Multipath route selection:
- * Hash based function using packet header and flowlabel.
- * Adapted from fib_info_hashfn()
- */
-static int rt6_info_hash_nhsfn(unsigned int candidate_count,
- const struct flowi6 *fl6)
-{
- return get_hash_from_flowi6(fl6) % candidate_count;
-}
-
static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
struct flowi6 *fl6, int oif,
int strict)
@@ -462,7 +457,13 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
struct rt6_info *sibling, *next_sibling;
int route_choosen;
- route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
+ /* We might have already computed the hash for ICMPv6 errors. In such
+ * case it will always be non-zero. Otherwise now is the time to do it.
+ */
+ if (!fl6->mp_hash)
+ fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+
+ route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
/* Don't change the route, if route_choosen == 0
* (siblings does not include ourself)
*/
@@ -481,7 +482,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
}
/*
- * Route lookup. Any table->tb6_lock is implied.
+ * Route lookup. rcu_read_lock() should be held.
*/
static inline struct rt6_info *rt6_device_match(struct net *net,
@@ -496,7 +497,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
if (!oif && ipv6_addr_any(saddr))
goto out;
- for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
+ for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
struct net_device *dev = sprt->dst.dev;
if (oif) {
@@ -705,6 +706,7 @@ out:
}
static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
+ struct rt6_info *leaf,
struct rt6_info *rr_head,
u32 metric, int oif, int strict,
bool *do_rr)
@@ -714,7 +716,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = NULL;
cont = NULL;
- for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
+ for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -723,7 +725,8 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = find_match(rt, oif, strict, &mpri, match, do_rr);
}
- for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
+ for (rt = leaf; rt && rt != rr_head;
+ rt = rcu_dereference(rt->dst.rt6_next)) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -735,37 +738,59 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
if (match || !cont)
return match;
- for (rt = cont; rt; rt = rt->dst.rt6_next)
+ for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
match = find_match(rt, oif, strict, &mpri, match, do_rr);
return match;
}
-static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
+static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
+ int oif, int strict)
{
+ struct rt6_info *leaf = rcu_dereference(fn->leaf);
struct rt6_info *match, *rt0;
- struct net *net;
bool do_rr = false;
+ int key_plen;
- rt0 = fn->rr_ptr;
+ if (!leaf || leaf == net->ipv6.ip6_null_entry)
+ return net->ipv6.ip6_null_entry;
+
+ rt0 = rcu_dereference(fn->rr_ptr);
if (!rt0)
- fn->rr_ptr = rt0 = fn->leaf;
+ rt0 = leaf;
+
+ /* Double check to make sure fn is not an intermediate node
+ * and fn->leaf does not points to its child's leaf
+ * (This might happen if all routes under fn are deleted from
+ * the tree and fib6_repair_tree() is called on the node.)
+ */
+ key_plen = rt0->rt6i_dst.plen;
+#ifdef CONFIG_IPV6_SUBTREES
+ if (rt0->rt6i_src.plen)
+ key_plen = rt0->rt6i_src.plen;
+#endif
+ if (fn->fn_bit != key_plen)
+ return net->ipv6.ip6_null_entry;
- match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
+ match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
&do_rr);
if (do_rr) {
- struct rt6_info *next = rt0->dst.rt6_next;
+ struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
/* no entries matched; do round-robin */
if (!next || next->rt6i_metric != rt0->rt6i_metric)
- next = fn->leaf;
-
- if (next != rt0)
- fn->rr_ptr = next;
+ next = leaf;
+
+ if (next != rt0) {
+ spin_lock_bh(&leaf->rt6i_table->tb6_lock);
+ /* make sure next is not being deleted from the tree */
+ if (next->rt6i_node)
+ rcu_assign_pointer(fn->rr_ptr, next);
+ spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
+ }
}
- net = dev_net(rt0->dst.dev);
return match ? match : net->ipv6.ip6_null_entry;
}
@@ -853,13 +878,14 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
struct in6_addr *saddr)
{
- struct fib6_node *pn;
+ struct fib6_node *pn, *sn;
while (1) {
if (fn->fn_flags & RTN_TL_ROOT)
return NULL;
- pn = fn->parent;
- if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
- fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
+ pn = rcu_dereference(fn->parent);
+ sn = FIB6_SUBTREE(pn);
+ if (sn && sn != fn)
+ fn = fib6_lookup(sn, NULL, saddr);
else
fn = pn;
if (fn->fn_flags & RTN_RTINFO)
@@ -867,29 +893,59 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
}
}
+static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
+ bool null_fallback)
+{
+ struct rt6_info *rt = *prt;
+
+ if (dst_hold_safe(&rt->dst))
+ return true;
+ if (null_fallback) {
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ } else {
+ rt = NULL;
+ }
+ *prt = rt;
+ return false;
+}
+
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6, int flags)
{
+ struct rt6_info *rt, *rt_cache;
struct fib6_node *fn;
- struct rt6_info *rt;
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
- rt = fn->leaf;
- rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
- if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
- rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
+ rt = rcu_dereference(fn->leaf);
+ if (!rt) {
+ rt = net->ipv6.ip6_null_entry;
+ } else {
+ rt = rt6_device_match(net, rt, &fl6->saddr,
+ fl6->flowi6_oif, flags);
+ if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
+ rt = rt6_multipath_select(rt, fl6,
+ fl6->flowi6_oif, flags);
+ }
if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
goto restart;
}
- dst_use(&rt->dst, jiffies);
- read_unlock_bh(&table->tb6_lock);
+ /* Search through exception table */
+ rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
+ if (rt_cache)
+ rt = rt_cache;
+
+ if (ip6_hold_safe(net, &rt, true))
+ dst_use_noref(&rt->dst, jiffies);
+
+ rcu_read_unlock();
- trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
+ trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
@@ -941,9 +997,9 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
struct fib6_table *table;
table = rt->rt6i_table;
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
- write_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
return err;
}
@@ -958,10 +1014,34 @@ int ip6_ins_rt(struct rt6_info *rt)
return __ip6_ins_rt(rt, &info, &mxc, NULL);
}
+/* called with rcu_lock held */
+static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
+{
+ struct net_device *dev = rt->dst.dev;
+
+ if (rt->rt6i_flags & RTF_LOCAL) {
+ /* for copies of local routes, dst->dev needs to be the
+ * device if it is a master device, the master device if
+ * device is enslaved, and the loopback as the default
+ */
+ if (netif_is_l3_slave(dev) &&
+ !rt6_need_strict(&rt->rt6i_dst.addr))
+ dev = l3mdev_master_dev_rcu(dev);
+ else if (!netif_is_l3_master(dev))
+ dev = dev_net(dev)->loopback_dev;
+ /* last case is netif_is_l3_master(dev) is true in which
+ * case we want dev returned to be dev
+ */
+ }
+
+ return dev;
+}
+
static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
+ struct net_device *dev;
struct rt6_info *rt;
/*
@@ -971,8 +1051,10 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
ort = (struct rt6_info *)ort->dst.from;
- rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
-
+ rcu_read_lock();
+ dev = ip6_rt_get_dev_rcu(ort);
+ rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
+ rcu_read_unlock();
if (!rt)
return NULL;
@@ -1000,11 +1082,13 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
{
+ struct net_device *dev;
struct rt6_info *pcpu_rt;
- pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
- rt->dst.dev, rt->dst.flags);
-
+ rcu_read_lock();
+ dev = ip6_rt_get_dev_rcu(rt);
+ pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
+ rcu_read_unlock();
if (!pcpu_rt)
return NULL;
ip6_rt_copy_init(pcpu_rt, rt);
@@ -1013,7 +1097,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
return pcpu_rt;
}
-/* It should be called with read_lock_bh(&tb6_lock) acquired */
+/* It should be called with rcu_read_lock() acquired */
static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
{
struct rt6_info *pcpu_rt, **p;
@@ -1021,16 +1105,14 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
p = this_cpu_ptr(rt->rt6i_pcpu);
pcpu_rt = *p;
- if (pcpu_rt) {
- dst_hold(&pcpu_rt->dst);
+ if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
rt6_dst_from_metrics_check(pcpu_rt);
- }
+
return pcpu_rt;
}
static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
{
- struct fib6_table *table = rt->rt6i_table;
struct rt6_info *pcpu_rt, *prev, **p;
pcpu_rt = ip6_rt_pcpu_alloc(rt);
@@ -1041,36 +1123,526 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
return net->ipv6.ip6_null_entry;
}
- read_lock_bh(&table->tb6_lock);
- if (rt->rt6i_pcpu) {
- p = this_cpu_ptr(rt->rt6i_pcpu);
- prev = cmpxchg(p, NULL, pcpu_rt);
- if (prev) {
- /* If someone did it before us, return prev instead */
- dst_release_immediate(&pcpu_rt->dst);
- pcpu_rt = prev;
- }
- } else {
- /* rt has been removed from the fib6 tree
- * before we have a chance to acquire the read_lock.
- * In this case, don't brother to create a pcpu rt
- * since rt is going away anyway. The next
- * dst_check() will trigger a re-lookup.
- */
- dst_release_immediate(&pcpu_rt->dst);
- pcpu_rt = rt;
- }
dst_hold(&pcpu_rt->dst);
+ p = this_cpu_ptr(rt->rt6i_pcpu);
+ prev = cmpxchg(p, NULL, pcpu_rt);
+ BUG_ON(prev);
+
rt6_dst_from_metrics_check(pcpu_rt);
- read_unlock_bh(&table->tb6_lock);
return pcpu_rt;
}
+/* exception hash table implementation
+ */
+static DEFINE_SPINLOCK(rt6_exception_lock);
+
+/* Remove rt6_ex from hash table and free the memory
+ * Caller must hold rt6_exception_lock
+ */
+static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
+ struct rt6_exception *rt6_ex)
+{
+ struct net *net;
+
+ if (!bucket || !rt6_ex)
+ return;
+
+ net = dev_net(rt6_ex->rt6i->dst.dev);
+ rt6_ex->rt6i->rt6i_node = NULL;
+ hlist_del_rcu(&rt6_ex->hlist);
+ rt6_release(rt6_ex->rt6i);
+ kfree_rcu(rt6_ex, rcu);
+ WARN_ON_ONCE(!bucket->depth);
+ bucket->depth--;
+ net->ipv6.rt6_stats->fib_rt_cache--;
+}
+
+/* Remove oldest rt6_ex in bucket and free the memory
+ * Caller must hold rt6_exception_lock
+ */
+static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
+{
+ struct rt6_exception *rt6_ex, *oldest = NULL;
+
+ if (!bucket)
+ return;
+
+ hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+ if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
+ oldest = rt6_ex;
+ }
+ rt6_remove_exception(bucket, oldest);
+}
+
+static u32 rt6_exception_hash(const struct in6_addr *dst,
+ const struct in6_addr *src)
+{
+ static u32 seed __read_mostly;
+ u32 val;
+
+ net_get_random_once(&seed, sizeof(seed));
+ val = jhash(dst, sizeof(*dst), seed);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (src)
+ val = jhash(src, sizeof(*src), val);
+#endif
+ return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
+}
+
+/* Helper function to find the cached rt in the hash table
+ * and update bucket pointer to point to the bucket for this
+ * (daddr, saddr) pair
+ * Caller must hold rt6_exception_lock
+ */
+static struct rt6_exception *
+__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
+{
+ struct rt6_exception *rt6_ex;
+ u32 hval;
+
+ if (!(*bucket) || !daddr)
+ return NULL;
+
+ hval = rt6_exception_hash(daddr, saddr);
+ *bucket += hval;
+
+ hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
+ struct rt6_info *rt6 = rt6_ex->rt6i;
+ bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (matched && saddr)
+ matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
+#endif
+ if (matched)
+ return rt6_ex;
+ }
+ return NULL;
+}
+
+/* Helper function to find the cached rt in the hash table
+ * and update bucket pointer to point to the bucket for this
+ * (daddr, saddr) pair
+ * Caller must hold rcu_read_lock()
+ */
+static struct rt6_exception *
+__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
+{
+ struct rt6_exception *rt6_ex;
+ u32 hval;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ if (!(*bucket) || !daddr)
+ return NULL;
+
+ hval = rt6_exception_hash(daddr, saddr);
+ *bucket += hval;
+
+ hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
+ struct rt6_info *rt6 = rt6_ex->rt6i;
+ bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (matched && saddr)
+ matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
+#endif
+ if (matched)
+ return rt6_ex;
+ }
+ return NULL;
+}
+
+static int rt6_insert_exception(struct rt6_info *nrt,
+ struct rt6_info *ort)
+{
+ struct net *net = dev_net(ort->dst.dev);
+ struct rt6_exception_bucket *bucket;
+ struct in6_addr *src_key = NULL;
+ struct rt6_exception *rt6_ex;
+ int err = 0;
+
+ /* ort can't be a cache or pcpu route */
+ if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
+ ort = (struct rt6_info *)ort->dst.from;
+ WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
+
+ spin_lock_bh(&rt6_exception_lock);
+
+ if (ort->exception_bucket_flushed) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+ if (!bucket) {
+ bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
+ GFP_ATOMIC);
+ if (!bucket) {
+ err = -ENOMEM;
+ goto out;
+ }
+ rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
+ }
+
+#ifdef CONFIG_IPV6_SUBTREES
+ /* rt6i_src.plen != 0 indicates ort is in subtree
+ * and exception table is indexed by a hash of
+ * both rt6i_dst and rt6i_src.
+ * Otherwise, the exception table is indexed by
+ * a hash of only rt6i_dst.
+ */
+ if (ort->rt6i_src.plen)
+ src_key = &nrt->rt6i_src.addr;
+#endif
+
+ /* Update rt6i_prefsrc as it could be changed
+ * in rt6_remove_prefsrc()
+ */
+ nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
+ /* rt6_mtu_change() might lower mtu on ort.
+ * Only insert this exception route if its mtu
+ * is less than ort's mtu value.
+ */
+ if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
+ src_key);
+ if (rt6_ex)
+ rt6_remove_exception(bucket, rt6_ex);
+
+ rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
+ if (!rt6_ex) {
+ err = -ENOMEM;
+ goto out;
+ }
+ rt6_ex->rt6i = nrt;
+ rt6_ex->stamp = jiffies;
+ atomic_inc(&nrt->rt6i_ref);
+ nrt->rt6i_node = ort->rt6i_node;
+ hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
+ bucket->depth++;
+ net->ipv6.rt6_stats->fib_rt_cache++;
+
+ if (bucket->depth > FIB6_MAX_DEPTH)
+ rt6_exception_remove_oldest(bucket);
+
+out:
+ spin_unlock_bh(&rt6_exception_lock);
+
+ /* Update fn->fn_sernum to invalidate all cached dst */
+ if (!err) {
+ fib6_update_sernum(ort);
+ fib6_force_start_gc(net);
+ }
+
+ return err;
+}
+
+void rt6_flush_exceptions(struct rt6_info *rt)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ struct hlist_node *tmp;
+ int i;
+
+ spin_lock_bh(&rt6_exception_lock);
+ /* Prevent rt6_insert_exception() to recreate the bucket list */
+ rt->exception_bucket_flushed = 1;
+
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+ if (!bucket)
+ goto out;
+
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
+ rt6_remove_exception(bucket, rt6_ex);
+ WARN_ON_ONCE(bucket->depth);
+ bucket++;
+ }
+
+out:
+ spin_unlock_bh(&rt6_exception_lock);
+}
+
+/* Find cached rt in the hash table inside passed in rt
+ * Caller has to hold rcu_read_lock()
+ */
+static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+ struct in6_addr *daddr,
+ struct in6_addr *saddr)
+{
+ struct rt6_exception_bucket *bucket;
+ struct in6_addr *src_key = NULL;
+ struct rt6_exception *rt6_ex;
+ struct rt6_info *res = NULL;
+
+ bucket = rcu_dereference(rt->rt6i_exception_bucket);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ /* rt6i_src.plen != 0 indicates rt is in subtree
+ * and exception table is indexed by a hash of
+ * both rt6i_dst and rt6i_src.
+ * Otherwise, the exception table is indexed by
+ * a hash of only rt6i_dst.
+ */
+ if (rt->rt6i_src.plen)
+ src_key = saddr;
+#endif
+ rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
+
+ if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
+ res = rt6_ex->rt6i;
+
+ return res;
+}
+
+/* Remove the passed in cached rt from the hash table that contains it */
+int rt6_remove_exception_rt(struct rt6_info *rt)
+{
+ struct rt6_info *from = (struct rt6_info *)rt->dst.from;
+ struct rt6_exception_bucket *bucket;
+ struct in6_addr *src_key = NULL;
+ struct rt6_exception *rt6_ex;
+ int err;
+
+ if (!from ||
+ !(rt->rt6i_flags & RTF_CACHE))
+ return -EINVAL;
+
+ if (!rcu_access_pointer(from->rt6i_exception_bucket))
+ return -ENOENT;
+
+ spin_lock_bh(&rt6_exception_lock);
+ bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+#ifdef CONFIG_IPV6_SUBTREES
+ /* rt6i_src.plen != 0 indicates 'from' is in subtree
+ * and exception table is indexed by a hash of
+ * both rt6i_dst and rt6i_src.
+ * Otherwise, the exception table is indexed by
+ * a hash of only rt6i_dst.
+ */
+ if (from->rt6i_src.plen)
+ src_key = &rt->rt6i_src.addr;
+#endif
+ rt6_ex = __rt6_find_exception_spinlock(&bucket,
+ &rt->rt6i_dst.addr,
+ src_key);
+ if (rt6_ex) {
+ rt6_remove_exception(bucket, rt6_ex);
+ err = 0;
+ } else {
+ err = -ENOENT;
+ }
+
+ spin_unlock_bh(&rt6_exception_lock);
+ return err;
+}
+
+/* Find rt6_ex which contains the passed in rt cache and
+ * refresh its stamp
+ */
+static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+{
+ struct rt6_info *from = (struct rt6_info *)rt->dst.from;
+ struct rt6_exception_bucket *bucket;
+ struct in6_addr *src_key = NULL;
+ struct rt6_exception *rt6_ex;
+
+ if (!from ||
+ !(rt->rt6i_flags & RTF_CACHE))
+ return;
+
+ rcu_read_lock();
+ bucket = rcu_dereference(from->rt6i_exception_bucket);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ /* rt6i_src.plen != 0 indicates 'from' is in subtree
+ * and exception table is indexed by a hash of
+ * both rt6i_dst and rt6i_src.
+ * Otherwise, the exception table is indexed by
+ * a hash of only rt6i_dst.
+ */
+ if (from->rt6i_src.plen)
+ src_key = &rt->rt6i_src.addr;
+#endif
+ rt6_ex = __rt6_find_exception_rcu(&bucket,
+ &rt->rt6i_dst.addr,
+ src_key);
+ if (rt6_ex)
+ rt6_ex->stamp = jiffies;
+
+ rcu_read_unlock();
+}
+
+static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ int i;
+
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+ rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
+ }
+ bucket++;
+ }
+ }
+}
+
+static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ int i;
+
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+ struct rt6_info *entry = rt6_ex->rt6i;
+ /* For RTF_CACHE with rt6i_pmtu == 0
+ * (i.e. a redirected route),
+ * the metrics of its rt->dst.from has already
+ * been updated.
+ */
+ if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
+ entry->rt6i_pmtu = mtu;
+ }
+ bucket++;
+ }
+ }
+}
+
+#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
+
+static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
+ struct in6_addr *gateway)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ struct hlist_node *tmp;
+ int i;
+
+ if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ return;
+
+ spin_lock_bh(&rt6_exception_lock);
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry_safe(rt6_ex, tmp,
+ &bucket->chain, hlist) {
+ struct rt6_info *entry = rt6_ex->rt6i;
+
+ if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
+ RTF_CACHE_GATEWAY &&
+ ipv6_addr_equal(gateway,
+ &entry->rt6i_gateway)) {
+ rt6_remove_exception(bucket, rt6_ex);
+ }
+ }
+ bucket++;
+ }
+ }
+
+ spin_unlock_bh(&rt6_exception_lock);
+}
+
+static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
+ struct rt6_exception *rt6_ex,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
+{
+ struct rt6_info *rt = rt6_ex->rt6i;
+
+ /* we are pruning and obsoleting aged-out and non gateway exceptions
+ * even if others have still references to them, so that on next
+ * dst_check() such references can be dropped.
+ * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
+ * expired, independently from their aging, as per RFC 8201 section 4
+ */
+ if (!(rt->rt6i_flags & RTF_EXPIRES) &&
+ time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
+ RT6_TRACE("aging clone %p\n", rt);
+ rt6_remove_exception(bucket, rt6_ex);
+ return;
+ } else if (rt->rt6i_flags & RTF_GATEWAY) {
+ struct neighbour *neigh;
+ __u8 neigh_flags = 0;
+
+ neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
+ if (neigh) {
+ neigh_flags = neigh->flags;
+ neigh_release(neigh);
+ }
+ if (!(neigh_flags & NTF_ROUTER)) {
+ RT6_TRACE("purging route %p via non-router but gateway\n",
+ rt);
+ rt6_remove_exception(bucket, rt6_ex);
+ return;
+ }
+ } else if (__rt6_check_expired(rt)) {
+ RT6_TRACE("purging expired route %p\n", rt);
+ rt6_remove_exception(bucket, rt6_ex);
+ return;
+ }
+ gc_args->more++;
+}
+
+void rt6_age_exceptions(struct rt6_info *rt,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ struct hlist_node *tmp;
+ int i;
+
+ if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ return;
+
+ spin_lock_bh(&rt6_exception_lock);
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry_safe(rt6_ex, tmp,
+ &bucket->chain, hlist) {
+ rt6_age_examine_exception(bucket, rt6_ex,
+ gc_args, now);
+ }
+ bucket++;
+ }
+ }
+ spin_unlock_bh(&rt6_exception_lock);
+}
+
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6, int flags)
{
struct fib6_node *fn, *saved_fn;
- struct rt6_info *rt;
+ struct rt6_info *rt, *rt_cache;
int strict = 0;
strict |= flags & RT6_LOOKUP_F_IFACE;
@@ -1078,7 +1650,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
if (net->ipv6.devconf_all->forwarding == 0)
strict |= RT6_LOOKUP_F_REACHABLE;
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
@@ -1087,7 +1659,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
oif = 0;
redo_rt6_select:
- rt = rt6_select(fn, oif, strict);
+ rt = rt6_select(net, fn, oif, strict);
if (rt->rt6i_nsiblings)
rt = rt6_multipath_select(rt, fl6, oif, strict);
if (rt == net->ipv6.ip6_null_entry) {
@@ -1102,14 +1674,23 @@ redo_rt6_select:
}
}
+ /*Search through exception table */
+ rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
+ if (rt_cache)
+ rt = rt_cache;
- if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
- dst_use(&rt->dst, jiffies);
- read_unlock_bh(&table->tb6_lock);
-
- rt6_dst_from_metrics_check(rt);
-
- trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
+ if (rt == net->ipv6.ip6_null_entry) {
+ rcu_read_unlock();
+ dst_hold(&rt->dst);
+ trace_fib6_table_lookup(net, rt, table, fl6);
+ return rt;
+ } else if (rt->rt6i_flags & RTF_CACHE) {
+ if (ip6_hold_safe(net, &rt, true)) {
+ dst_use_noref(&rt->dst, jiffies);
+ rt6_dst_from_metrics_check(rt);
+ }
+ rcu_read_unlock();
+ trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
!(rt->rt6i_flags & RTF_GATEWAY))) {
@@ -1121,8 +1702,14 @@ redo_rt6_select:
struct rt6_info *uncached_rt;
- dst_use(&rt->dst, jiffies);
- read_unlock_bh(&table->tb6_lock);
+ if (ip6_hold_safe(net, &rt, true)) {
+ dst_use_noref(&rt->dst, jiffies);
+ } else {
+ rcu_read_unlock();
+ uncached_rt = rt;
+ goto uncached_rt_out;
+ }
+ rcu_read_unlock();
uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
dst_release(&rt->dst);
@@ -1132,12 +1719,14 @@ redo_rt6_select:
* No need for another dst_hold()
*/
rt6_uncached_list_add(uncached_rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
} else {
uncached_rt = net->ipv6.ip6_null_entry;
dst_hold(&uncached_rt->dst);
}
- trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
+uncached_rt_out:
+ trace_fib6_table_lookup(net, uncached_rt, table, fl6);
return uncached_rt;
} else {
@@ -1145,26 +1734,28 @@ redo_rt6_select:
struct rt6_info *pcpu_rt;
- rt->dst.lastuse = jiffies;
- rt->dst.__use++;
+ dst_use_noref(&rt->dst, jiffies);
+ local_bh_disable();
pcpu_rt = rt6_get_pcpu_route(rt);
- if (pcpu_rt) {
- read_unlock_bh(&table->tb6_lock);
- } else {
- /* We have to do the read_unlock first
- * because rt6_make_pcpu_route() may trigger
- * ip6_dst_gc() which will take the write_lock.
- */
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
- pcpu_rt = rt6_make_pcpu_route(rt);
- dst_release(&rt->dst);
+ if (!pcpu_rt) {
+ /* atomic_inc_not_zero() is needed when using rcu */
+ if (atomic_inc_not_zero(&rt->rt6i_ref)) {
+ /* No dst_hold() on rt is needed because grabbing
+ * rt->rt6i_ref makes sure rt can't be released.
+ */
+ pcpu_rt = rt6_make_pcpu_route(rt);
+ rt6_release(rt);
+ } else {
+ /* rt is already removed from tree */
+ pcpu_rt = net->ipv6.ip6_null_entry;
+ dst_hold(&pcpu_rt->dst);
+ }
}
-
- trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
+ local_bh_enable();
+ rcu_read_unlock();
+ trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
return pcpu_rt;
-
}
}
EXPORT_SYMBOL_GPL(ip6_pol_route);
@@ -1186,6 +1777,54 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
}
EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
+static void ip6_multipath_l3_keys(const struct sk_buff *skb,
+ struct flow_keys *keys)
+{
+ const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
+ const struct ipv6hdr *key_iph = outer_iph;
+ const struct ipv6hdr *inner_iph;
+ const struct icmp6hdr *icmph;
+ struct ipv6hdr _inner_iph;
+
+ if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
+ goto out;
+
+ icmph = icmp6_hdr(skb);
+ if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
+ icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
+ icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
+ icmph->icmp6_type != ICMPV6_PARAMPROB)
+ goto out;
+
+ inner_iph = skb_header_pointer(skb,
+ skb_transport_offset(skb) + sizeof(*icmph),
+ sizeof(_inner_iph), &_inner_iph);
+ if (!inner_iph)
+ goto out;
+
+ key_iph = inner_iph;
+out:
+ memset(keys, 0, sizeof(*keys));
+ keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ keys->addrs.v6addrs.src = key_iph->saddr;
+ keys->addrs.v6addrs.dst = key_iph->daddr;
+ keys->tags.flow_label = ip6_flowinfo(key_iph);
+ keys->basic.ip_proto = key_iph->nexthdr;
+}
+
+/* if skb is set it will be used and fl6 can be NULL */
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+{
+ struct flow_keys hash_keys;
+
+ if (skb) {
+ ip6_multipath_l3_keys(skb, &hash_keys);
+ return flow_hash_from_keys(&hash_keys);
+ }
+
+ return get_hash_from_flowi6(fl6);
+}
+
void ip6_route_input(struct sk_buff *skb)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -1204,6 +1843,8 @@ void ip6_route_input(struct sk_buff *skb)
tun_info = skb_tunnel_info(skb);
if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+ if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
+ fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
skb_dst_drop(skb);
skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
}
@@ -1250,9 +1891,10 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
struct dst_entry *new = NULL;
rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
- DST_OBSOLETE_NONE, 0);
+ DST_OBSOLETE_DEAD, 0);
if (rt) {
rt6_info_init(rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
new = &rt->dst;
new->__use = 1;
@@ -1289,7 +1931,9 @@ static void rt6_dst_from_metrics_check(struct rt6_info *rt)
static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
{
- if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+ u32 rt_cookie = 0;
+
+ if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
return NULL;
if (rt6_check_expired(rt))
@@ -1357,8 +2001,14 @@ static void ip6_link_failure(struct sk_buff *skb)
if (rt->rt6i_flags & RTF_CACHE) {
if (dst_hold_safe(&rt->dst))
ip6_del_rt(rt);
- } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
- rt->rt6i_node->fn_sernum = -1;
+ } else {
+ struct fib6_node *fn;
+
+ rcu_read_lock();
+ fn = rcu_dereference(rt->rt6i_node);
+ if (fn && (rt->rt6i_flags & RTF_DEFAULT))
+ fn->fn_sernum = -1;
+ rcu_read_unlock();
}
}
}
@@ -1375,7 +2025,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
{
return !(rt->rt6i_flags & RTF_CACHE) &&
- (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
+ (rt->rt6i_flags & RTF_PCPU ||
+ rcu_access_pointer(rt->rt6i_node));
}
static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
@@ -1407,23 +2058,17 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (!rt6_cache_allowed_for_pmtu(rt6)) {
rt6_do_update_pmtu(rt6, mtu);
+ /* update rt6_ex->stamp for cache */
+ if (rt6->rt6i_flags & RTF_CACHE)
+ rt6_update_exception_stamp_rt(rt6);
} else if (daddr) {
struct rt6_info *nrt6;
nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
-
- /* ip6_ins_rt(nrt6) will bump the
- * rt6->rt6i_node->fn_sernum
- * which will fail the next rt6_check() and
- * invalidate the sk->sk_dst_cache.
- */
- ip6_ins_rt(nrt6);
- /* Release the reference taken in
- * ip6_rt_cache_alloc()
- */
- dst_release(&nrt6->dst);
+ if (rt6_insert_exception(nrt6, rt6))
+ dst_release_immediate(&nrt6->dst);
}
}
}
@@ -1487,7 +2132,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
int flags)
{
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
- struct rt6_info *rt;
+ struct rt6_info *rt, *rt_cache;
struct fib6_node *fn;
/* Get the "current" route for this destination and
@@ -1500,10 +2145,10 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
* routes.
*/
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_node_rt_rcu(fn) {
if (rt6_check_expired(rt))
continue;
if (rt->dst.error)
@@ -1512,8 +2157,23 @@ restart:
continue;
if (fl6->flowi6_oif != rt->dst.dev->ifindex)
continue;
- if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
+ /* rt_cache's gateway might be different from its 'parent'
+ * in the case of an ip redirect.
+ * So we keep searching in the exception table if the gateway
+ * is different.
+ */
+ if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
+ rt_cache = rt6_find_cached_rt(rt,
+ &fl6->daddr,
+ &fl6->saddr);
+ if (rt_cache &&
+ ipv6_addr_equal(&rdfl->gateway,
+ &rt_cache->rt6i_gateway)) {
+ rt = rt_cache;
+ break;
+ }
continue;
+ }
break;
}
@@ -1531,11 +2191,11 @@ restart:
}
out:
- dst_hold(&rt->dst);
+ ip6_hold_safe(net, &rt, true);
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
- trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
+ trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
};
@@ -1682,6 +2342,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
* do proper release of the net_device
*/
rt6_uncached_list_add(rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
@@ -1717,6 +2378,7 @@ out:
static int ip6_convert_metrics(struct mx6_config *mxc,
const struct fib6_config *cfg)
{
+ struct net *net = cfg->fc_nlinfo.nl_net;
bool ecn_ca = false;
struct nlattr *nla;
int remaining;
@@ -1742,7 +2404,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
char tmp[TCP_CA_NAME_MAX];
nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+ val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
if (val == TCP_CA_UNSPEC)
goto err;
} else {
@@ -1817,6 +2479,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
+ /* RTF_CACHE is an internal flag; can not be set by userspace */
+ if (cfg->fc_flags & RTF_CACHE) {
+ NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
+ goto out;
+ }
+
if (cfg->fc_dst_len > 128) {
NL_SET_ERR_MSG(extack, "Invalid prefix length");
goto out;
@@ -2132,9 +2800,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
}
table = rt->rt6i_table;
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
err = fib6_del(rt, info);
- write_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
out:
ip6_rt_put(rt);
@@ -2160,7 +2828,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
if (rt == net->ipv6.ip6_null_entry)
goto out_put;
table = rt->rt6i_table;
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
struct rt6_info *sibling, *next_sibling;
@@ -2190,7 +2858,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
err = fib6_del(rt, info);
out_unlock:
- write_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
out_put:
ip6_rt_put(rt);
@@ -2204,9 +2872,9 @@ out_put:
static int ip6_route_del(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
+ struct rt6_info *rt, *rt_cache;
struct fib6_table *table;
struct fib6_node *fn;
- struct rt6_info *rt;
int err = -ESRCH;
table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
@@ -2215,17 +2883,22 @@ static int ip6_route_del(struct fib6_config *cfg,
return err;
}
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_locate(&table->tb6_root,
&cfg->fc_dst, cfg->fc_dst_len,
- &cfg->fc_src, cfg->fc_src_len);
+ &cfg->fc_src, cfg->fc_src_len,
+ !(cfg->fc_flags & RTF_CACHE));
if (fn) {
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
- if ((rt->rt6i_flags & RTF_CACHE) &&
- !(cfg->fc_flags & RTF_CACHE))
- continue;
+ for_each_fib6_node_rt_rcu(fn) {
+ if (cfg->fc_flags & RTF_CACHE) {
+ rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
+ &cfg->fc_src);
+ if (!rt_cache)
+ continue;
+ rt = rt_cache;
+ }
if (cfg->fc_ifindex &&
(!rt->dst.dev ||
rt->dst.dev->ifindex != cfg->fc_ifindex))
@@ -2237,8 +2910,9 @@ static int ip6_route_del(struct fib6_config *cfg,
continue;
if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
continue;
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
+ if (!dst_hold_safe(&rt->dst))
+ break;
+ rcu_read_unlock();
/* if gateway was specified only delete the one hop */
if (cfg->fc_flags & RTF_GATEWAY)
@@ -2247,7 +2921,7 @@ static int ip6_route_del(struct fib6_config *cfg,
return __ip6_del_rt_siblings(rt, cfg);
}
}
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
return err;
}
@@ -2351,8 +3025,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
nrt->rt6i_protocol = RTPROT_REDIRECT;
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
- if (ip6_ins_rt(nrt))
- goto out_release;
+ /* No need to remove rt from the exception table if rt is
+ * a cached route because rt6_insert_exception() will
+ * takes care of it
+ */
+ if (rt6_insert_exception(nrt, rt)) {
+ dst_release_immediate(&nrt->dst);
+ goto out;
+ }
netevent.old = &rt->dst;
netevent.new = &nrt->dst;
@@ -2360,17 +3040,6 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
netevent.neigh = neigh;
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
- if (rt->rt6i_flags & RTF_CACHE) {
- rt = (struct rt6_info *) dst_clone(&rt->dst);
- ip6_del_rt(rt);
- }
-
-out_release:
- /* Release the reference taken in
- * ip6_rt_cache_alloc()
- */
- dst_release(&nrt->dst);
-
out:
neigh_release(neigh);
}
@@ -2427,23 +3096,23 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
if (!table)
return NULL;
- read_lock_bh(&table->tb6_lock);
- fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
+ rcu_read_lock();
+ fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
if (!fn)
goto out;
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_node_rt_rcu(fn) {
if (rt->dst.dev->ifindex != ifindex)
continue;
if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
continue;
if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
continue;
- dst_hold(&rt->dst);
+ ip6_hold_safe(NULL, &rt, false);
break;
}
out:
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
return rt;
}
@@ -2489,16 +3158,16 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
if (!table)
return NULL;
- read_lock_bh(&table->tb6_lock);
- for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
+ rcu_read_lock();
+ for_each_fib6_node_rt_rcu(&table->tb6_root) {
if (dev == rt->dst.dev &&
((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ipv6_addr_equal(&rt->rt6i_gateway, addr))
break;
}
if (rt)
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
+ ip6_hold_safe(NULL, &rt, false);
+ rcu_read_unlock();
return rt;
}
@@ -2536,17 +3205,20 @@ static void __rt6_purge_dflt_routers(struct fib6_table *table)
struct rt6_info *rt;
restart:
- read_lock_bh(&table->tb6_lock);
- for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
+ rcu_read_lock();
+ for_each_fib6_node_rt_rcu(&table->tb6_root) {
if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
(!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
- ip6_del_rt(rt);
+ if (dst_hold_safe(&rt->dst)) {
+ rcu_read_unlock();
+ ip6_del_rt(rt);
+ } else {
+ rcu_read_unlock();
+ }
goto restart;
}
}
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
}
@@ -2688,15 +3360,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
{
u32 tb_id;
struct net *net = dev_net(idev->dev);
- struct net_device *dev = net->loopback_dev;
+ struct net_device *dev = idev->dev;
struct rt6_info *rt;
- /* use L3 Master device as loopback for host routes if device
- * is enslaved and address is not link local or multicast
- */
- if (!rt6_need_strict(addr))
- dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
-
rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
if (!rt)
return ERR_PTR(-ENOMEM);
@@ -2740,8 +3406,12 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
if (((void *)rt->dst.dev == dev || !dev) &&
rt != net->ipv6.ip6_null_entry &&
ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
+ spin_lock_bh(&rt6_exception_lock);
/* remove prefsrc entry */
rt->rt6i_prefsrc.plen = 0;
+ /* need to update cache as well */
+ rt6_exceptions_remove_prefsrc(rt);
+ spin_unlock_bh(&rt6_exception_lock);
}
return 0;
}
@@ -2758,18 +3428,23 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
}
#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
-#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
/* Remove routers and update dst entries when gateway turn into host. */
static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
{
struct in6_addr *gateway = (struct in6_addr *)arg;
- if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
- ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
- ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
+ if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
+ ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
return -1;
}
+
+ /* Further clean up cached routes in exception table.
+ * This is needed because cached route may have a different
+ * gateway than its 'parent' in the case of an ip redirect.
+ */
+ rt6_exceptions_clean_tohost(rt, gateway);
+
return 0;
}
@@ -2848,19 +3523,14 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
if (rt->dst.dev == arg->dev &&
dst_metric_raw(&rt->dst, RTAX_MTU) &&
!dst_metric_locked(&rt->dst, RTAX_MTU)) {
- if (rt->rt6i_flags & RTF_CACHE) {
- /* For RTF_CACHE with rt6i_pmtu == 0
- * (i.e. a redirected route),
- * the metrics of its rt->dst.from has already
- * been updated.
- */
- if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
- rt->rt6i_pmtu = arg->mtu;
- } else if (dst_mtu(&rt->dst) >= arg->mtu ||
- (dst_mtu(&rt->dst) < arg->mtu &&
- dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
+ spin_lock_bh(&rt6_exception_lock);
+ if (dst_mtu(&rt->dst) >= arg->mtu ||
+ (dst_mtu(&rt->dst) < arg->mtu &&
+ dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
}
+ rt6_exceptions_update_pmtu(rt, arg->mtu);
+ spin_unlock_bh(&rt6_exception_lock);
}
return 0;
}
@@ -3327,6 +3997,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
goto nla_put_failure;
}
+ if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
+ *flags |= RTNH_F_OFFLOAD;
+
/* not needed for multipath encoding b/c it has a rtnexthop struct */
if (!skip_oif && rt->dst.dev &&
nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
@@ -3605,8 +4278,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct net_device *dev;
int flags = 0;
- dev = __dev_get_by_index(net, iif);
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, iif);
if (!dev) {
+ rcu_read_unlock();
err = -ENODEV;
goto errout;
}
@@ -3618,15 +4294,19 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (!fibmatch)
dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+ else
+ dst = ip6_route_lookup(net, &fl6, 0);
+
+ rcu_read_unlock();
} else {
fl6.flowi6_oif = oif;
if (!fibmatch)
dst = ip6_route_output(net, NULL, &fl6);
+ else
+ dst = ip6_route_lookup(net, &fl6, 0);
}
- if (fibmatch)
- dst = ip6_route_lookup(net, &fl6, 0);
rt = container_of(dst, struct rt6_info, dst);
if (rt->dst.error) {
@@ -3751,7 +4431,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
net->ipv6.rt6_stats->fib_nodes,
net->ipv6.rt6_stats->fib_route_nodes,
- net->ipv6.rt6_stats->fib_rt_alloc,
+ atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
net->ipv6.rt6_stats->fib_rt_entries,
net->ipv6.rt6_stats->fib_rt_cache,
dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
@@ -3918,6 +4598,7 @@ static int __net_init ip6_route_net_init(struct net *net)
ip6_template_metrics, true);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ net->ipv6.fib6_has_custom_rules = false;
net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
sizeof(*net->ipv6.ip6_prohibit_entry),
GFP_KERNEL);
@@ -4093,9 +4774,10 @@ int __init ip6_route_init(void)
goto fib6_rules_init;
ret = -ENOBUFS;
- if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
- __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
- __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
+ if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
+ __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
+ __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
+ RTNL_FLAG_DOIT_UNLOCKED))
goto out_register_late_subsys;
ret = register_netdevice_notifier(&ip6_route_dev_notifier);
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 15fba55e3da8..c81407770956 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -40,7 +40,7 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
if (((srh->hdrlen + 1) << 3) != len)
return false;
- if (srh->segments_left != srh->first_segment)
+ if (srh->segments_left > srh->first_segment)
return false;
tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4);
@@ -456,6 +456,10 @@ int __init seg6_init(void)
err = seg6_iptunnel_init();
if (err)
goto out_unregister_pernet;
+
+ err = seg6_local_init();
+ if (err)
+ goto out_unregister_pernet;
#endif
#ifdef CONFIG_IPV6_SEG6_HMAC
@@ -471,6 +475,7 @@ out:
#ifdef CONFIG_IPV6_SEG6_HMAC
out_unregister_iptun:
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+ seg6_local_exit();
seg6_iptunnel_exit();
#endif
#endif
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index f950cb53d5e3..33fb35cbfac1 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -12,6 +12,7 @@
*/
#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
@@ -110,7 +111,7 @@ static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id)
struct seg6_hmac_algo *algo;
int i, alg_count;
- alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+ alg_count = ARRAY_SIZE(hmac_algos);
for (i = 0; i < alg_count; i++) {
algo = &hmac_algos[i];
if (algo->alg_id == alg_id)
@@ -360,7 +361,7 @@ static int seg6_hmac_init_algo(void)
struct shash_desc *shash;
int i, alg_count, cpu;
- alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+ alg_count = ARRAY_SIZE(hmac_algos);
for (i = 0; i < alg_count; i++) {
struct crypto_shash **p_tfm;
@@ -421,7 +422,7 @@ void seg6_hmac_exit(void)
struct seg6_hmac_algo *algo = NULL;
int i, alg_count, cpu;
- alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+ alg_count = ARRAY_SIZE(hmac_algos);
for (i = 0; i < alg_count; i++) {
algo = &hmac_algos[i];
for_each_possible_cpu(cpu) {
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 264d772d3c7d..bd6cc688bd19 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -91,7 +91,7 @@ static void set_tun_src(struct net *net, struct net_device *dev,
}
/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
-static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
{
struct net *net = dev_net(skb_dst(skb)->dev);
struct ipv6hdr *hdr, *inner_hdr;
@@ -116,15 +116,22 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
* hlim will be decremented in ip6_forward() afterwards and
* decapsulation will overwrite inner hlim with outer hlim
*/
- ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
- ip6_flowlabel(inner_hdr));
- hdr->hop_limit = inner_hdr->hop_limit;
+
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
+ ip6_flowlabel(inner_hdr));
+ hdr->hop_limit = inner_hdr->hop_limit;
+ } else {
+ ip6_flow_hdr(hdr, 0, 0);
+ hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+ }
+
hdr->nexthdr = NEXTHDR_ROUTING;
isrh = (void *)hdr + sizeof(*hdr);
memcpy(isrh, osrh, hdrlen);
- isrh->nexthdr = NEXTHDR_IPV6;
+ isrh->nexthdr = proto;
hdr->daddr = isrh->segments[isrh->first_segment];
set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
@@ -141,10 +148,10 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
return 0;
}
+EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
/* insert an SRH within an IPv6 packet, just after the IPv6 header */
-#ifdef CONFIG_IPV6_SEG6_INLINE
-static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
{
struct ipv6hdr *hdr, *oldhdr;
struct ipv6_sr_hdr *isrh;
@@ -193,13 +200,13 @@ static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
return 0;
}
-#endif
+EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
static int seg6_do_srh(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct seg6_iptunnel_encap *tinfo;
- int err = 0;
+ int proto, err = 0;
tinfo = seg6_encap_lwtunnel(dst->lwtstate);
@@ -209,19 +216,47 @@ static int seg6_do_srh(struct sk_buff *skb)
}
switch (tinfo->mode) {
-#ifdef CONFIG_IPV6_SEG6_INLINE
case SEG6_IPTUN_MODE_INLINE:
+ if (skb->protocol != htons(ETH_P_IPV6))
+ return -EINVAL;
+
err = seg6_do_srh_inline(skb, tinfo->srh);
+ if (err)
+ return err;
+
skb_reset_inner_headers(skb);
break;
-#endif
case SEG6_IPTUN_MODE_ENCAP:
- err = seg6_do_srh_encap(skb, tinfo->srh);
+ if (skb->protocol == htons(ETH_P_IPV6))
+ proto = IPPROTO_IPV6;
+ else if (skb->protocol == htons(ETH_P_IP))
+ proto = IPPROTO_IPIP;
+ else
+ return -EINVAL;
+
+ err = seg6_do_srh_encap(skb, tinfo->srh, proto);
+ if (err)
+ return err;
+
+ skb->protocol = htons(ETH_P_IPV6);
break;
- }
+ case SEG6_IPTUN_MODE_L2ENCAP:
+ if (!skb_mac_header_was_set(skb))
+ return -EINVAL;
- if (err)
- return err;
+ if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
+ return -ENOMEM;
+
+ skb_mac_header_rebuild(skb);
+ skb_push(skb, skb->mac_len);
+
+ err = seg6_do_srh_encap(skb, tinfo->srh, NEXTHDR_NONE);
+ if (err)
+ return err;
+
+ skb->protocol = htons(ETH_P_IPV6);
+ break;
+ }
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
@@ -336,6 +371,9 @@ static int seg6_build_state(struct nlattr *nla,
struct seg6_lwt *slwt;
int err;
+ if (family != AF_INET && family != AF_INET6)
+ return -EINVAL;
+
err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
seg6_iptunnel_policy, extack);
@@ -357,12 +395,15 @@ static int seg6_build_state(struct nlattr *nla,
return -EINVAL;
switch (tuninfo->mode) {
-#ifdef CONFIG_IPV6_SEG6_INLINE
case SEG6_IPTUN_MODE_INLINE:
+ if (family != AF_INET6)
+ return -EINVAL;
+
break;
-#endif
case SEG6_IPTUN_MODE_ENCAP:
break;
+ case SEG6_IPTUN_MODE_L2ENCAP:
+ break;
default:
return -EINVAL;
}
@@ -386,8 +427,11 @@ static int seg6_build_state(struct nlattr *nla,
memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
newts->type = LWTUNNEL_ENCAP_SEG6;
- newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
- LWTUNNEL_STATE_INPUT_REDIRECT;
+ newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
+
+ if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP)
+ newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+
newts->headroom = seg6_lwt_headroom(tuninfo);
*ts = newts;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
new file mode 100644
index 000000000000..825b8e01f947
--- /dev/null
+++ b/net/ipv6/seg6_local.c
@@ -0,0 +1,934 @@
+/*
+ * SR-IPv6 implementation
+ *
+ * Author:
+ * David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/lwtunnel.h>
+#include <net/netevent.h>
+#include <net/netns/generic.h>
+#include <net/ip6_fib.h>
+#include <net/route.h>
+#include <net/seg6.h>
+#include <linux/seg6.h>
+#include <linux/seg6_local.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/dst_cache.h>
+#ifdef CONFIG_IPV6_SEG6_HMAC
+#include <net/seg6_hmac.h>
+#endif
+#include <linux/etherdevice.h>
+
+struct seg6_local_lwt;
+
+struct seg6_action_desc {
+ int action;
+ unsigned long attrs;
+ int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
+ int static_headroom;
+};
+
+struct seg6_local_lwt {
+ int action;
+ struct ipv6_sr_hdr *srh;
+ int table;
+ struct in_addr nh4;
+ struct in6_addr nh6;
+ int iif;
+ int oif;
+
+ int headroom;
+ struct seg6_action_desc *desc;
+};
+
+static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
+{
+ return (struct seg6_local_lwt *)lwt->data;
+}
+
+static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
+{
+ struct ipv6_sr_hdr *srh;
+ int len, srhoff = 0;
+
+ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ return NULL;
+
+ if (!pskb_may_pull(skb, srhoff + sizeof(*srh)))
+ return NULL;
+
+ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+
+ len = (srh->hdrlen + 1) << 3;
+
+ if (!pskb_may_pull(skb, srhoff + len))
+ return NULL;
+
+ if (!seg6_validate_srh(srh, len))
+ return NULL;
+
+ return srh;
+}
+
+static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
+{
+ struct ipv6_sr_hdr *srh;
+
+ srh = get_srh(skb);
+ if (!srh)
+ return NULL;
+
+ if (srh->segments_left == 0)
+ return NULL;
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ if (!seg6_hmac_validate_skb(skb))
+ return NULL;
+#endif
+
+ return srh;
+}
+
+static bool decap_and_validate(struct sk_buff *skb, int proto)
+{
+ struct ipv6_sr_hdr *srh;
+ unsigned int off = 0;
+
+ srh = get_srh(skb);
+ if (srh && srh->segments_left > 0)
+ return false;
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ if (srh && !seg6_hmac_validate_skb(skb))
+ return false;
+#endif
+
+ if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
+ return false;
+
+ if (!pskb_pull(skb, off))
+ return false;
+
+ skb_postpull_rcsum(skb, skb_network_header(skb), off);
+
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb->encapsulation = 0;
+
+ return true;
+}
+
+static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
+{
+ struct in6_addr *addr;
+
+ srh->segments_left--;
+ addr = srh->segments + srh->segments_left;
+ *daddr = *addr;
+}
+
+static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+ u32 tbl_id)
+{
+ struct net *net = dev_net(skb->dev);
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+ int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct dst_entry *dst = NULL;
+ struct rt6_info *rt;
+ struct flowi6 fl6;
+
+ fl6.flowi6_iif = skb->dev->ifindex;
+ fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
+ fl6.saddr = hdr->saddr;
+ fl6.flowlabel = ip6_flowinfo(hdr);
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = hdr->nexthdr;
+
+ if (nhaddr)
+ fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
+
+ if (!tbl_id) {
+ dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
+ } else {
+ struct fib6_table *table;
+
+ table = fib6_get_table(net, tbl_id);
+ if (!table)
+ goto out;
+
+ rt = ip6_pol_route(net, table, 0, &fl6, flags);
+ dst = &rt->dst;
+ }
+
+ if (dst && dst->dev->flags & IFF_LOOPBACK && !dst->error) {
+ dst_release(dst);
+ dst = NULL;
+ }
+
+out:
+ if (!dst) {
+ rt = net->ipv6.ip6_blk_hole_entry;
+ dst = &rt->dst;
+ dst_hold(dst);
+ }
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+}
+
+/* regular endpoint function */
+static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ struct ipv6_sr_hdr *srh;
+
+ srh = get_and_validate_srh(skb);
+ if (!srh)
+ goto drop;
+
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+ lookup_nexthop(skb, NULL, 0);
+
+ return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+/* regular endpoint, and forward to specified nexthop */
+static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ struct ipv6_sr_hdr *srh;
+
+ srh = get_and_validate_srh(skb);
+ if (!srh)
+ goto drop;
+
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+ lookup_nexthop(skb, &slwt->nh6, 0);
+
+ return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ struct ipv6_sr_hdr *srh;
+
+ srh = get_and_validate_srh(skb);
+ if (!srh)
+ goto drop;
+
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+ lookup_nexthop(skb, NULL, slwt->table);
+
+ return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+/* decapsulate and forward inner L2 frame on specified interface */
+static int input_action_end_dx2(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct net *net = dev_net(skb->dev);
+ struct net_device *odev;
+ struct ethhdr *eth;
+
+ if (!decap_and_validate(skb, NEXTHDR_NONE))
+ goto drop;
+
+ if (!pskb_may_pull(skb, ETH_HLEN))
+ goto drop;
+
+ skb_reset_mac_header(skb);
+ eth = (struct ethhdr *)skb->data;
+
+ /* To determine the frame's protocol, we assume it is 802.3. This avoids
+ * a call to eth_type_trans(), which is not really relevant for our
+ * use case.
+ */
+ if (!eth_proto_is_802_3(eth->h_proto))
+ goto drop;
+
+ odev = dev_get_by_index_rcu(net, slwt->oif);
+ if (!odev)
+ goto drop;
+
+ /* As we accept Ethernet frames, make sure the egress device is of
+ * the correct type.
+ */
+ if (odev->type != ARPHRD_ETHER)
+ goto drop;
+
+ if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
+ goto drop;
+
+ skb_orphan(skb);
+
+ if (skb_warn_if_lro(skb))
+ goto drop;
+
+ skb_forward_csum(skb);
+
+ if (skb->len - ETH_HLEN > odev->mtu)
+ goto drop;
+
+ skb->dev = odev;
+ skb->protocol = eth->h_proto;
+
+ return dev_queue_xmit(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+/* decapsulate and forward to specified nexthop */
+static int input_action_end_dx6(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct in6_addr *nhaddr = NULL;
+
+ /* this function accepts IPv6 encapsulated packets, with either
+ * an SRH with SL=0, or no SRH.
+ */
+
+ if (!decap_and_validate(skb, IPPROTO_IPV6))
+ goto drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ goto drop;
+
+ /* The inner packet is not associated to any local interface,
+ * so we do not call netif_rx().
+ *
+ * If slwt->nh6 is set to ::, then lookup the nexthop for the
+ * inner packet's DA. Otherwise, use the specified nexthop.
+ */
+
+ if (!ipv6_addr_any(&slwt->nh6))
+ nhaddr = &slwt->nh6;
+
+ lookup_nexthop(skb, nhaddr, 0);
+
+ return dst_input(skb);
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int input_action_end_dx4(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct iphdr *iph;
+ __be32 nhaddr;
+ int err;
+
+ if (!decap_and_validate(skb, IPPROTO_IPIP))
+ goto drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto drop;
+
+ skb->protocol = htons(ETH_P_IP);
+
+ iph = ip_hdr(skb);
+
+ nhaddr = slwt->nh4.s_addr ?: iph->daddr;
+
+ skb_dst_drop(skb);
+
+ err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
+ if (err)
+ goto drop;
+
+ return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int input_action_end_dt6(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ if (!decap_and_validate(skb, IPPROTO_IPV6))
+ goto drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ goto drop;
+
+ lookup_nexthop(skb, NULL, slwt->table);
+
+ return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+/* push an SRH on top of the current one */
+static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ struct ipv6_sr_hdr *srh;
+ int err = -EINVAL;
+
+ srh = get_and_validate_srh(skb);
+ if (!srh)
+ goto drop;
+
+ err = seg6_do_srh_inline(skb, slwt->srh);
+ if (err)
+ goto drop;
+
+ ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+ lookup_nexthop(skb, NULL, 0);
+
+ return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return err;
+}
+
+/* encapsulate within an outer IPv6 header and a specified SRH */
+static int input_action_end_b6_encap(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct ipv6_sr_hdr *srh;
+ int err = -EINVAL;
+
+ srh = get_and_validate_srh(skb);
+ if (!srh)
+ goto drop;
+
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+
+ err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
+ if (err)
+ goto drop;
+
+ ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+ lookup_nexthop(skb, NULL, 0);
+
+ return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return err;
+}
+
+static struct seg6_action_desc seg6_action_table[] = {
+ {
+ .action = SEG6_LOCAL_ACTION_END,
+ .attrs = 0,
+ .input = input_action_end,
+ },
+ {
+ .action = SEG6_LOCAL_ACTION_END_X,
+ .attrs = (1 << SEG6_LOCAL_NH6),
+ .input = input_action_end_x,
+ },
+ {
+ .action = SEG6_LOCAL_ACTION_END_T,
+ .attrs = (1 << SEG6_LOCAL_TABLE),
+ .input = input_action_end_t,
+ },
+ {
+ .action = SEG6_LOCAL_ACTION_END_DX2,
+ .attrs = (1 << SEG6_LOCAL_OIF),
+ .input = input_action_end_dx2,
+ },
+ {
+ .action = SEG6_LOCAL_ACTION_END_DX6,
+ .attrs = (1 << SEG6_LOCAL_NH6),
+ .input = input_action_end_dx6,
+ },
+ {
+ .action = SEG6_LOCAL_ACTION_END_DX4,
+ .attrs = (1 << SEG6_LOCAL_NH4),
+ .input = input_action_end_dx4,
+ },
+ {
+ .action = SEG6_LOCAL_ACTION_END_DT6,
+ .attrs = (1 << SEG6_LOCAL_TABLE),
+ .input = input_action_end_dt6,
+ },
+ {
+ .action = SEG6_LOCAL_ACTION_END_B6,
+ .attrs = (1 << SEG6_LOCAL_SRH),
+ .input = input_action_end_b6,
+ },
+ {
+ .action = SEG6_LOCAL_ACTION_END_B6_ENCAP,
+ .attrs = (1 << SEG6_LOCAL_SRH),
+ .input = input_action_end_b6_encap,
+ .static_headroom = sizeof(struct ipv6hdr),
+ }
+};
+
+static struct seg6_action_desc *__get_action_desc(int action)
+{
+ struct seg6_action_desc *desc;
+ int i, count;
+
+ count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc);
+ for (i = 0; i < count; i++) {
+ desc = &seg6_action_table[i];
+ if (desc->action == action)
+ return desc;
+ }
+
+ return NULL;
+}
+
+static int seg6_local_input(struct sk_buff *skb)
+{
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct seg6_action_desc *desc;
+ struct seg6_local_lwt *slwt;
+
+ if (skb->protocol != htons(ETH_P_IPV6)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
+ desc = slwt->desc;
+
+ return desc->input(skb, slwt);
+}
+
+static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
+ [SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
+ [SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
+ [SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
+ [SEG6_LOCAL_NH4] = { .type = NLA_BINARY,
+ .len = sizeof(struct in_addr) },
+ [SEG6_LOCAL_NH6] = { .type = NLA_BINARY,
+ .len = sizeof(struct in6_addr) },
+ [SEG6_LOCAL_IIF] = { .type = NLA_U32 },
+ [SEG6_LOCAL_OIF] = { .type = NLA_U32 },
+};
+
+static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+ struct ipv6_sr_hdr *srh;
+ int len;
+
+ srh = nla_data(attrs[SEG6_LOCAL_SRH]);
+ len = nla_len(attrs[SEG6_LOCAL_SRH]);
+
+ /* SRH must contain at least one segment */
+ if (len < sizeof(*srh) + sizeof(struct in6_addr))
+ return -EINVAL;
+
+ if (!seg6_validate_srh(srh, len))
+ return -EINVAL;
+
+ slwt->srh = kmalloc(len, GFP_KERNEL);
+ if (!slwt->srh)
+ return -ENOMEM;
+
+ memcpy(slwt->srh, srh, len);
+
+ slwt->headroom += len;
+
+ return 0;
+}
+
+static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ struct ipv6_sr_hdr *srh;
+ struct nlattr *nla;
+ int len;
+
+ srh = slwt->srh;
+ len = (srh->hdrlen + 1) << 3;
+
+ nla = nla_reserve(skb, SEG6_LOCAL_SRH, len);
+ if (!nla)
+ return -EMSGSIZE;
+
+ memcpy(nla_data(nla), srh, len);
+
+ return 0;
+}
+
+static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+ int len = (a->srh->hdrlen + 1) << 3;
+
+ if (len != ((b->srh->hdrlen + 1) << 3))
+ return 1;
+
+ return memcmp(a->srh, b->srh, len);
+}
+
+static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+ slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
+
+ return 0;
+}
+
+static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+ if (a->table != b->table)
+ return 1;
+
+ return 0;
+}
+
+static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+ memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
+ sizeof(struct in_addr));
+
+ return 0;
+}
+
+static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ struct nlattr *nla;
+
+ nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr));
+ if (!nla)
+ return -EMSGSIZE;
+
+ memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr));
+
+ return 0;
+}
+
+static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+ return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
+}
+
+static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+ memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
+ sizeof(struct in6_addr));
+
+ return 0;
+}
+
+static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ struct nlattr *nla;
+
+ nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr));
+ if (!nla)
+ return -EMSGSIZE;
+
+ memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr));
+
+ return 0;
+}
+
+static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+ return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
+}
+
+static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+ slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
+
+ return 0;
+}
+
+static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+ if (a->iif != b->iif)
+ return 1;
+
+ return 0;
+}
+
+static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+ slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
+
+ return 0;
+}
+
+static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+ if (a->oif != b->oif)
+ return 1;
+
+ return 0;
+}
+
+struct seg6_action_param {
+ int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
+ int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
+ int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
+};
+
+static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
+ [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh,
+ .put = put_nla_srh,
+ .cmp = cmp_nla_srh },
+
+ [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table,
+ .put = put_nla_table,
+ .cmp = cmp_nla_table },
+
+ [SEG6_LOCAL_NH4] = { .parse = parse_nla_nh4,
+ .put = put_nla_nh4,
+ .cmp = cmp_nla_nh4 },
+
+ [SEG6_LOCAL_NH6] = { .parse = parse_nla_nh6,
+ .put = put_nla_nh6,
+ .cmp = cmp_nla_nh6 },
+
+ [SEG6_LOCAL_IIF] = { .parse = parse_nla_iif,
+ .put = put_nla_iif,
+ .cmp = cmp_nla_iif },
+
+ [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif,
+ .put = put_nla_oif,
+ .cmp = cmp_nla_oif },
+};
+
+static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+ struct seg6_action_param *param;
+ struct seg6_action_desc *desc;
+ int i, err;
+
+ desc = __get_action_desc(slwt->action);
+ if (!desc)
+ return -EINVAL;
+
+ if (!desc->input)
+ return -EOPNOTSUPP;
+
+ slwt->desc = desc;
+ slwt->headroom += desc->static_headroom;
+
+ for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+ if (desc->attrs & (1 << i)) {
+ if (!attrs[i])
+ return -EINVAL;
+
+ param = &seg6_action_params[i];
+
+ err = param->parse(attrs, slwt);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int seg6_local_build_state(struct nlattr *nla, unsigned int family,
+ const void *cfg, struct lwtunnel_state **ts,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[SEG6_LOCAL_MAX + 1];
+ struct lwtunnel_state *newts;
+ struct seg6_local_lwt *slwt;
+ int err;
+
+ if (family != AF_INET6)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy,
+ extack);
+
+ if (err < 0)
+ return err;
+
+ if (!tb[SEG6_LOCAL_ACTION])
+ return -EINVAL;
+
+ newts = lwtunnel_state_alloc(sizeof(*slwt));
+ if (!newts)
+ return -ENOMEM;
+
+ slwt = seg6_local_lwtunnel(newts);
+ slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
+
+ err = parse_nla_action(tb, slwt);
+ if (err < 0)
+ goto out_free;
+
+ newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
+ newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
+ newts->headroom = slwt->headroom;
+
+ *ts = newts;
+
+ return 0;
+
+out_free:
+ kfree(slwt->srh);
+ kfree(newts);
+ return err;
+}
+
+static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
+{
+ struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
+
+ kfree(slwt->srh);
+}
+
+static int seg6_local_fill_encap(struct sk_buff *skb,
+ struct lwtunnel_state *lwt)
+{
+ struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
+ struct seg6_action_param *param;
+ int i, err;
+
+ if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
+ return -EMSGSIZE;
+
+ for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+ if (slwt->desc->attrs & (1 << i)) {
+ param = &seg6_action_params[i];
+ err = param->put(skb, slwt);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
+{
+ struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
+ unsigned long attrs;
+ int nlsize;
+
+ nlsize = nla_total_size(4); /* action */
+
+ attrs = slwt->desc->attrs;
+
+ if (attrs & (1 << SEG6_LOCAL_SRH))
+ nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
+
+ if (attrs & (1 << SEG6_LOCAL_TABLE))
+ nlsize += nla_total_size(4);
+
+ if (attrs & (1 << SEG6_LOCAL_NH4))
+ nlsize += nla_total_size(4);
+
+ if (attrs & (1 << SEG6_LOCAL_NH6))
+ nlsize += nla_total_size(16);
+
+ if (attrs & (1 << SEG6_LOCAL_IIF))
+ nlsize += nla_total_size(4);
+
+ if (attrs & (1 << SEG6_LOCAL_OIF))
+ nlsize += nla_total_size(4);
+
+ return nlsize;
+}
+
+static int seg6_local_cmp_encap(struct lwtunnel_state *a,
+ struct lwtunnel_state *b)
+{
+ struct seg6_local_lwt *slwt_a, *slwt_b;
+ struct seg6_action_param *param;
+ int i;
+
+ slwt_a = seg6_local_lwtunnel(a);
+ slwt_b = seg6_local_lwtunnel(b);
+
+ if (slwt_a->action != slwt_b->action)
+ return 1;
+
+ if (slwt_a->desc->attrs != slwt_b->desc->attrs)
+ return 1;
+
+ for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+ if (slwt_a->desc->attrs & (1 << i)) {
+ param = &seg6_action_params[i];
+ if (param->cmp(slwt_a, slwt_b))
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static const struct lwtunnel_encap_ops seg6_local_ops = {
+ .build_state = seg6_local_build_state,
+ .destroy_state = seg6_local_destroy_state,
+ .input = seg6_local_input,
+ .fill_encap = seg6_local_fill_encap,
+ .get_encap_size = seg6_local_get_encap_size,
+ .cmp_encap = seg6_local_cmp_encap,
+ .owner = THIS_MODULE,
+};
+
+int __init seg6_local_init(void)
+{
+ return lwtunnel_encap_add_ops(&seg6_local_ops,
+ LWTUNNEL_ENCAP_SEG6_LOCAL);
+}
+
+void seg6_local_exit(void)
+{
+ lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);
+}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index ac912bb21747..d60ddcb0bfe2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -91,29 +91,35 @@ struct sit_net {
* Must be invoked with rcu_read_lock
*/
static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
- struct net_device *dev, __be32 remote, __be32 local)
+ struct net_device *dev,
+ __be32 remote, __be32 local,
+ int sifindex)
{
unsigned int h0 = HASH(remote);
unsigned int h1 = HASH(local);
struct ip_tunnel *t;
struct sit_net *sitn = net_generic(net, sit_net_id);
+ int ifindex = dev ? dev->ifindex : 0;
for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
- (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
+ (!dev || !t->parms.link || ifindex == t->parms.link ||
+ sifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
if (remote == t->parms.iph.daddr &&
- (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
+ (!dev || !t->parms.link || ifindex == t->parms.link ||
+ sifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
if (local == t->parms.iph.saddr &&
- (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
+ (!dev || !t->parms.link || ifindex == t->parms.link ||
+ sifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
@@ -486,6 +492,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
const int code = icmp_hdr(skb)->code;
unsigned int data_len = 0;
struct ip_tunnel *t;
+ int sifindex;
int err;
switch (type) {
@@ -517,10 +524,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
err = -ENOENT;
- t = ipip6_tunnel_lookup(dev_net(skb->dev),
- skb->dev,
- iph->daddr,
- iph->saddr);
+ sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
+ t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
+ iph->daddr, iph->saddr, sifindex);
if (!t)
goto out;
@@ -633,10 +639,12 @@ static int ipip6_rcv(struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
struct ip_tunnel *tunnel;
+ int sifindex;
int err;
+ sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
- iph->saddr, iph->daddr);
+ iph->saddr, iph->daddr, sifindex);
if (tunnel) {
struct pcpu_sw_netstats *tstats;
@@ -704,10 +712,13 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
const struct iphdr *iph;
struct ip_tunnel *tunnel;
+ int sifindex;
+
+ sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
iph = ip_hdr(skb);
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
- iph->saddr, iph->daddr);
+ iph->saddr, iph->daddr, sifindex);
if (tunnel) {
const struct tnl_ptk_info *tpi;
@@ -1848,19 +1859,22 @@ err_alloc_dev:
return err;
}
-static void __net_exit sit_exit_net(struct net *net)
+static void __net_exit sit_exit_batch_net(struct list_head *net_list)
{
LIST_HEAD(list);
+ struct net *net;
rtnl_lock();
- sit_destroy_tunnels(net, &list);
+ list_for_each_entry(net, net_list, exit_list)
+ sit_destroy_tunnels(net, &list);
+
unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations sit_net_ops = {
.init = sit_init_net,
- .exit = sit_exit_net,
+ .exit_batch = sit_exit_batch_net,
.id = &sit_net_id,
.size = sizeof(struct sit_net),
};
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 4e7817abc0b9..e7a3a6b6cf56 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -244,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
}
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
- tcp_select_initial_window(tcp_full_space(sk), req->mss,
+ tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 69c50e737c54..a789a8ac6a64 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem.
*
@@ -90,6 +91,41 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "flowlabel_reflect",
+ .data = &init_net.ipv6.sysctl.flowlabel_reflect,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "max_dst_opts_number",
+ .data = &init_net.ipv6.sysctl.max_dst_opts_cnt,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "max_hbh_opts_number",
+ .data = &init_net.ipv6.sysctl.max_hbh_opts_cnt,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "max_dst_opts_length",
+ .data = &init_net.ipv6.sysctl.max_dst_opts_len,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "max_hbh_length",
+ .data = &init_net.ipv6.sysctl.max_hbh_opts_len,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
@@ -149,6 +185,11 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay;
ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind;
+ ipv6_table[9].data = &net->ipv6.sysctl.flowlabel_reflect;
+ ipv6_table[10].data = &net->ipv6.sysctl.max_dst_opts_cnt;
+ ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
+ ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
+ ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 206210125fd7..6bb98c93edfe 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -69,6 +69,8 @@
#include <crypto/hash.h>
#include <linux/scatterlist.h>
+#include <trace/events/tcp.h>
+
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
@@ -350,7 +352,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk = __inet6_lookup_established(net, &tcp_hashinfo,
&hdr->daddr, th->dest,
&hdr->saddr, ntohs(th->source),
- skb->dev->ifindex);
+ skb->dev->ifindex, inet6_sdif(skb));
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
@@ -890,7 +892,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
int genhash;
struct sock *sk1 = NULL;
#endif
- int oif;
+ int oif = 0;
if (th->rst)
return;
@@ -918,7 +920,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
&tcp_hashinfo, NULL, 0,
&ipv6h->saddr,
th->source, &ipv6h->daddr,
- ntohs(th->source), tcp_v6_iif(skb));
+ ntohs(th->source), tcp_v6_iif(skb),
+ tcp_v6_sdif(skb));
if (!sk1)
goto out;
@@ -938,7 +941,11 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
(th->doff << 2);
- oif = sk ? sk->sk_bound_dev_if : 0;
+ if (sk) {
+ oif = sk->sk_bound_dev_if;
+ trace_tcp_send_reset(sk, skb);
+ }
+
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
#ifdef CONFIG_TCP_MD5SIG
@@ -1296,7 +1303,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
}
}
- tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
+ tcp_rcv_established(sk, skb, tcp_hdr(skb));
if (opt_skb)
goto ipv6_pktoptions;
return 0;
@@ -1393,10 +1400,13 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;
+ TCP_SKB_CB(skb)->has_rxtstamp =
+ skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
}
static int tcp_v6_rcv(struct sk_buff *skb)
{
+ int sdif = inet6_sdif(skb);
const struct tcphdr *th;
const struct ipv6hdr *hdr;
bool refcounted;
@@ -1430,7 +1440,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
lookup:
sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
- th->source, th->dest, inet6_iif(skb),
+ th->source, th->dest, inet6_iif(skb), sdif,
&refcounted);
if (!sk)
goto no_tcp_socket;
@@ -1456,9 +1466,9 @@ process:
}
sock_hold(sk);
refcounted = true;
- if (tcp_filter(sk, skb))
- goto discard_and_relse;
- nsk = tcp_check_req(sk, skb, req, false);
+ nsk = NULL;
+ if (!tcp_filter(sk, skb))
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
goto discard_and_relse;
@@ -1505,8 +1515,7 @@ process:
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
- if (!tcp_prequeue(sk, skb))
- ret = tcp_v6_do_rcv(sk, skb);
+ ret = tcp_v6_do_rcv(sk, skb);
} else if (tcp_add_backlog(sk, skb)) {
goto discard_and_relse;
}
@@ -1564,7 +1573,8 @@ do_time_wait:
skb, __tcp_hdrlen(th),
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
- ntohs(th->dest), tcp_v6_iif(skb));
+ ntohs(th->dest), tcp_v6_iif(skb),
+ sdif);
if (sk2) {
struct inet_timewait_sock *tw = inet_twsk(sk);
inet_twsk_deschedule_put(tw);
@@ -1573,8 +1583,9 @@ do_time_wait:
refcounted = false;
goto process;
}
- /* Fall through to ACK */
}
+ /* to ACK */
+ /* fall through */
case TCP_TW_ACK:
tcp_v6_timewait_ack(sk, skb);
break;
@@ -1611,7 +1622,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
&hdr->saddr, th->source,
&hdr->daddr, ntohs(th->dest),
- inet6_iif(skb));
+ inet6_iif(skb), inet6_sdif(skb));
if (sk) {
skb->sk = sk;
skb->destructor = sock_edemux;
@@ -1929,8 +1940,8 @@ struct proto tcpv6_prot = {
.memory_pressure = &tcp_memory_pressure,
.orphan_count = &tcp_orphan_count,
.sysctl_mem = sysctl_tcp_mem,
- .sysctl_wmem = sysctl_tcp_wmem,
- .sysctl_rmem = sysctl_tcp_rmem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
@@ -1945,6 +1956,9 @@ struct proto tcpv6_prot = {
.diag_destroy = tcp_abort,
};
+/* thinking of making this const? Don't.
+ * early_demux can change based on sysctl.
+ */
static struct inet6_protocol tcpv6_protocol = {
.early_demux = tcp_v6_early_demux,
.early_demux_handler = tcp_v6_early_demux,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 578142b7ca3e..3f30fa313bf2 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -129,7 +129,7 @@ static void udp_v6_rehash(struct sock *sk)
static int compute_score(struct sock *sk, struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned short hnum,
- int dif, bool exact_dif)
+ int dif, int sdif, bool exact_dif)
{
int score;
struct inet_sock *inet;
@@ -161,9 +161,13 @@ static int compute_score(struct sock *sk, struct net *net,
}
if (sk->sk_bound_dev_if || exact_dif) {
- if (sk->sk_bound_dev_if != dif)
+ bool dev_match = (sk->sk_bound_dev_if == dif ||
+ sk->sk_bound_dev_if == sdif);
+
+ if (exact_dif && !dev_match)
return -1;
- score++;
+ if (sk->sk_bound_dev_if && dev_match)
+ score++;
}
if (sk->sk_incoming_cpu == raw_smp_processor_id())
@@ -175,9 +179,9 @@ static int compute_score(struct sock *sk, struct net *net,
/* called with rcu_read_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
- const struct in6_addr *daddr, unsigned int hnum, int dif,
- bool exact_dif, struct udp_hslot *hslot2,
- struct sk_buff *skb)
+ const struct in6_addr *daddr, unsigned int hnum,
+ int dif, int sdif, bool exact_dif,
+ struct udp_hslot *hslot2, struct sk_buff *skb)
{
struct sock *sk, *result;
int score, badness, matches = 0, reuseport = 0;
@@ -187,7 +191,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
badness = -1;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, exact_dif);
+ daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
reuseport = sk->sk_reuseport;
if (reuseport) {
@@ -214,10 +218,10 @@ static struct sock *udp6_lib_lookup2(struct net *net,
/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(struct net *net,
- const struct in6_addr *saddr, __be16 sport,
- const struct in6_addr *daddr, __be16 dport,
- int dif, struct udp_table *udptable,
- struct sk_buff *skb)
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr, __be16 dport,
+ int dif, int sdif, struct udp_table *udptable,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
unsigned short hnum = ntohs(dport);
@@ -235,7 +239,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
goto begin;
result = udp6_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif, exact_dif,
+ daddr, hnum, dif, sdif, exact_dif,
hslot2, skb);
if (!result) {
unsigned int old_slot2 = slot2;
@@ -250,7 +254,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
goto begin;
result = udp6_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif,
+ daddr, hnum, dif, sdif,
exact_dif, hslot2,
skb);
}
@@ -261,7 +265,7 @@ begin:
badness = -1;
sk_for_each_rcu(sk, &hslot->head) {
score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
- exact_dif);
+ sdif, exact_dif);
if (score > badness) {
reuseport = sk->sk_reuseport;
if (reuseport) {
@@ -294,7 +298,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
&iph->daddr, dport, inet6_iif(skb),
- udptable, skb);
+ inet6_sdif(skb), udptable, skb);
}
struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
@@ -304,7 +308,7 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
&iph->daddr, dport, inet6_iif(skb),
- &udp_table, skb);
+ inet6_sdif(skb), &udp_table, skb);
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
@@ -320,7 +324,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be
struct sock *sk;
sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport,
- dif, &udp_table, NULL);
+ dif, 0, &udp_table, NULL);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
return sk;
@@ -362,7 +366,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
try_again:
- peeking = off = sk_peek_offset(sk, flags);
+ peeking = flags & MSG_PEEK;
+ off = sk_peek_offset(sk, flags);
skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
if (!skb)
return err;
@@ -501,7 +506,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct net *net = dev_net(skb->dev);
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
- inet6_iif(skb), udptable, skb);
+ inet6_iif(skb), 0, udptable, skb);
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
@@ -574,8 +579,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
static struct static_key udpv6_encap_needed __read_mostly;
void udpv6_encap_enable(void)
{
- if (!static_key_enabled(&udpv6_encap_needed))
- static_key_slow_inc(&udpv6_encap_needed);
+ static_key_enable(&udpv6_encap_needed);
}
EXPORT_SYMBOL(udpv6_encap_enable);
@@ -602,7 +606,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
*/
/* if we're overly short, let UDP handle it */
- encap_rcv = ACCESS_ONCE(up->encap_rcv);
+ encap_rcv = READ_ONCE(up->encap_rcv);
if (encap_rcv) {
int ret;
@@ -767,6 +771,15 @@ start_lookup:
return 0;
}
+static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+{
+ if (udp_sk_rx_dst_set(sk, dst)) {
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+ inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
+ }
+}
+
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int proto)
{
@@ -816,7 +829,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int ret;
if (unlikely(sk->sk_rx_dst != dst))
- udp_sk_rx_dst_set(sk, dst);
+ udp6_sk_rx_dst_set(sk, dst);
ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
@@ -893,7 +906,7 @@ discard:
static struct sock *__udp6_lib_demux_lookup(struct net *net,
__be16 loc_port, const struct in6_addr *loc_addr,
__be16 rmt_port, const struct in6_addr *rmt_addr,
- int dif)
+ int dif, int sdif)
{
unsigned short hnum = ntohs(loc_port);
unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
@@ -904,7 +917,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
if (sk->sk_state == TCP_ESTABLISHED &&
- INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+ INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif))
return sk;
/* Only check first socket in chain */
break;
@@ -919,6 +932,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
struct sock *sk;
struct dst_entry *dst;
int dif = skb->dev->ifindex;
+ int sdif = inet6_sdif(skb);
if (!pskb_may_pull(skb, skb_transport_offset(skb) +
sizeof(struct udphdr)))
@@ -930,7 +944,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
sk = __udp6_lib_demux_lookup(net, uh->dest,
&ipv6_hdr(skb)->daddr,
uh->source, &ipv6_hdr(skb)->saddr,
- dif);
+ dif, sdif);
else
return;
@@ -1001,6 +1015,7 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
*/
offset = skb_transport_offset(skb);
skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+ csum = skb->csum;
skb->ip_summed = CHECKSUM_NONE;
@@ -1417,7 +1432,7 @@ void udpv6_destroy_sock(struct sock *sk)
if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
void (*encap_destroy)(struct sock *sk);
- encap_destroy = ACCESS_ONCE(up->encap_destroy);
+ encap_destroy = READ_ONCE(up->encap_destroy);
if (encap_destroy)
encap_destroy(sk);
}
@@ -1466,6 +1481,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
}
#endif
+/* thinking of making this const? Don't.
+ * early_demux can change based on sysctl.
+ */
static struct inet6_protocol udpv6_protocol = {
.early_demux = udp_v6_early_demux,
.early_demux_handler = udp_v6_early_demux,
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index f180b3d85e31..7903e21c178b 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _UDP6_IMPL_H
#define _UDP6_IMPL_H
#include <net/udp.h>
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index e7d378c032cb..455fd4e39333 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -17,109 +17,15 @@
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
-static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
- netdev_features_t features)
+static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
- unsigned int mss;
- unsigned int unfrag_ip6hlen, unfrag_len;
- struct frag_hdr *fptr;
- u8 *packet_start, *prevhdr;
- u8 nexthdr;
- u8 frag_hdr_sz = sizeof(struct frag_hdr);
- __wsum csum;
- int tnl_hlen;
- int err;
-
- mss = skb_shinfo(skb)->gso_size;
- if (unlikely(skb->len <= mss))
- goto out;
-
- if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
- /* Packet is from an untrusted source, reset gso_segs. */
-
- skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
-
- /* Set the IPv6 fragment id if not set yet */
- if (!skb_shinfo(skb)->ip6_frag_id)
- ipv6_proxy_select_ident(dev_net(skb->dev), skb);
-
- segs = NULL;
- goto out;
- }
if (skb->encapsulation && skb_shinfo(skb)->gso_type &
(SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
segs = skb_udp_tunnel_segment(skb, features, true);
- else {
- const struct ipv6hdr *ipv6h;
- struct udphdr *uh;
-
- if (!pskb_may_pull(skb, sizeof(struct udphdr)))
- goto out;
-
- /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
- * do checksum of UDP packets sent as multiple IP fragments.
- */
-
- uh = udp_hdr(skb);
- ipv6h = ipv6_hdr(skb);
-
- uh->check = 0;
- csum = skb_checksum(skb, 0, skb->len, 0);
- uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
- &ipv6h->daddr, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
-
- /* If there is no outer header we can fake a checksum offload
- * due to the fact that we have already done the checksum in
- * software prior to segmenting the frame.
- */
- if (!skb->encap_hdr_csum)
- features |= NETIF_F_HW_CSUM;
-
- /* Check if there is enough headroom to insert fragment header. */
- tnl_hlen = skb_tnl_header_len(skb);
- if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
- if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
- goto out;
- }
-
- /* Find the unfragmentable header and shift it left by frag_hdr_sz
- * bytes to insert fragment header.
- */
- err = ip6_find_1stfragopt(skb, &prevhdr);
- if (err < 0)
- return ERR_PTR(err);
- unfrag_ip6hlen = err;
- nexthdr = *prevhdr;
- *prevhdr = NEXTHDR_FRAGMENT;
- unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
- unfrag_ip6hlen + tnl_hlen;
- packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
- memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
-
- SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
- skb->mac_header -= frag_hdr_sz;
- skb->network_header -= frag_hdr_sz;
-
- fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
- fptr->nexthdr = nexthdr;
- fptr->reserved = 0;
- if (!skb_shinfo(skb)->ip6_frag_id)
- ipv6_proxy_select_ident(dev_net(skb->dev), skb);
- fptr->identification = skb_shinfo(skb)->ip6_frag_id;
-
- /* Fragment the skb. ipv6 header and the remaining fields of the
- * fragment header are updated in ipv6_gso_segment()
- */
- segs = skb_segment(skb, features);
- }
-out:
return segs;
}
@@ -169,7 +75,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
static const struct net_offload udpv6_offload = {
.callbacks = {
- .gso_segment = udp6_ufo_fragment,
+ .gso_segment = udp6_tunnel_segment,
.gro_receive = udp6_gro_receive,
.gro_complete = udp6_gro_complete,
},
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 3ef5d913e7a3..fe04e23af986 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* xfrm6_input.c: based on net/ipv4/xfrm4_input.c
*
@@ -34,6 +35,7 @@ EXPORT_SYMBOL(xfrm6_rcv_spi);
int xfrm6_transport_finish(struct sk_buff *skb, int async)
{
struct xfrm_offload *xo = xfrm_offload(skb);
+ int nhlen = skb->data - skb_network_header(skb);
skb_network_header(skb)[IP6CB(skb)->nhoff] =
XFRM_MODE_SKB_CB(skb)->protocol;
@@ -43,8 +45,9 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
return 1;
#endif
- __skb_push(skb, skb->data - skb_network_header(skb));
+ __skb_push(skb, nhlen);
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ skb_postpush_rcsum(skb, skb_network_header(skb), nhlen);
if (xo && (xo->flags & XFRM_GRO)) {
skb_mac_header_rebuild(skb);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 79651bc71bf0..885ade234a49 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* xfrm6_policy.c: based on xfrm4_policy.c
*
@@ -27,7 +28,8 @@
static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
const xfrm_address_t *saddr,
- const xfrm_address_t *daddr)
+ const xfrm_address_t *daddr,
+ u32 mark)
{
struct flowi6 fl6;
struct dst_entry *dst;
@@ -36,6 +38,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif);
fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
+ fl6.flowi6_mark = mark;
memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
if (saddr)
memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
@@ -52,12 +55,13 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
}
static int xfrm6_get_saddr(struct net *net, int oif,
- xfrm_address_t *saddr, xfrm_address_t *daddr)
+ xfrm_address_t *saddr, xfrm_address_t *daddr,
+ u32 mark)
{
struct dst_entry *dst;
struct net_device *dev;
- dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr);
+ dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark);
if (IS_ERR(dst))
return -EHOSTUNREACH;
@@ -149,6 +153,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
switch (nexthdr) {
case NEXTHDR_FRAGMENT:
onlyproto = 1;
+ /* fall through */
case NEXTHDR_ROUTING:
case NEXTHDR_HOP:
case NEXTHDR_DEST:
@@ -214,14 +219,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
}
}
-static inline int xfrm6_garbage_collect(struct dst_ops *ops)
-{
- struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
-
- xfrm_garbage_collect_deferred(net);
- return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
-}
-
static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu)
{
@@ -279,14 +276,13 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static struct dst_ops xfrm6_dst_ops_template = {
.family = AF_INET6,
- .gc = xfrm6_garbage_collect,
.update_pmtu = xfrm6_update_pmtu,
.redirect = xfrm6_redirect,
.cow_metrics = dst_cow_metrics_generic,
.destroy = xfrm6_dst_destroy,
.ifdown = xfrm6_dst_ifdown,
.local_out = __ip6_local_out,
- .gc_thresh = INT_MAX,
+ .gc_thresh = 32768,
};
static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 8a1f9c0d2a13..b15075a5c227 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* xfrm6_state.c: based on xfrm4_state.c
*
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 4e438bc7ee87..f85f0d7480ac 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -338,6 +338,14 @@ static int __net_init xfrm6_tunnel_net_init(struct net *net)
static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
{
+ struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
+ unsigned int i;
+
+ for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
+ WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
+
+ for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
+ WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byspi[i]));
}
static struct pernet_operations xfrm6_tunnel_net_ops = {
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index ac598ec90589..d21a9d128d3e 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1867,6 +1867,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
rc = -EPERM;
if (!capable(CAP_NET_ADMIN))
break;
+ /* fall through */
case SIOCGIFADDR:
rc = ipxitf_ioctl(cmd, argp);
break;
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 7d75e4c5c75d..38a3d51d9ead 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* IPX proc routines
*
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index b5d91447f3dc..3cf93aa9f284 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Implements the IPX routing routines.
* Code moved from af_ipx.c.
diff --git a/net/ipx/pe2.c b/net/ipx/pe2.c
index 32dcd601ab32..ba7d4214bbff 100644
--- a/net/ipx/pe2.c
+++ b/net/ipx/pe2.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/in.h>
#include <linux/mm.h>
#include <linux/module.h>
diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c
index 0dafcc561ed6..c3eef457db88 100644
--- a/net/ipx/sysctl_net_ipx.c
+++ b/net/ipx/sysctl_net_ipx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* -*- linux-c -*-
* sysctl_net_ipx.c: sysctl interface to net IPX subsystem.
*
diff --git a/net/irda/Kconfig b/net/irda/Kconfig
deleted file mode 100644
index c8671a7ffb3c..000000000000
--- a/net/irda/Kconfig
+++ /dev/null
@@ -1,96 +0,0 @@
-#
-# IrDA protocol configuration
-#
-
-menuconfig IRDA
- depends on NET && !S390
- tristate "IrDA (infrared) subsystem support"
- select CRC_CCITT
- ---help---
- Say Y here if you want to build support for the IrDA (TM) protocols.
- The Infrared Data Associations (tm) specifies standards for wireless
- infrared communication and is supported by most laptops and PDA's.
-
- To use Linux support for the IrDA (tm) protocols, you will also need
- some user-space utilities like irattach. For more information, see
- the file <file:Documentation/networking/irda.txt>. You also want to
- read the IR-HOWTO, available at
- <http://www.tldp.org/docs.html#howto>.
-
- If you want to exchange bits of data (vCal, vCard) with a PDA, you
- will need to install some OBEX application, such as OpenObex :
- <http://sourceforge.net/projects/openobex/>
-
- To compile this support as a module, choose M here: the module will
- be called irda.
-
-comment "IrDA protocols"
- depends on IRDA
-
-source "net/irda/irlan/Kconfig"
-
-source "net/irda/irnet/Kconfig"
-
-source "net/irda/ircomm/Kconfig"
-
-config IRDA_ULTRA
- bool "Ultra (connectionless) protocol"
- depends on IRDA
- help
- Say Y here to support the connectionless Ultra IRDA protocol.
- Ultra allows to exchange data over IrDA with really simple devices
- (watch, beacon) without the overhead of the IrDA protocol (no handshaking,
- no management frames, simple fixed header).
- Ultra is available as a special socket : socket(AF_IRDA, SOCK_DGRAM, 1);
-
-comment "IrDA options"
- depends on IRDA
-
-config IRDA_CACHE_LAST_LSAP
- bool "Cache last LSAP"
- depends on IRDA
- help
- Say Y here if you want IrLMP to cache the last LSAP used. This
- makes sense since most frames will be sent/received on the same
- connection. Enabling this option will save a hash-lookup per frame.
-
- If unsure, say Y.
-
-config IRDA_FAST_RR
- bool "Fast RRs (low latency)"
- depends on IRDA
- ---help---
- Say Y here is you want IrLAP to send fast RR (Receive Ready) frames
- when acting as a primary station.
- Disabling this option will make latency over IrDA very bad. Enabling
- this option will make the IrDA stack send more packet than strictly
- necessary, thus reduce your battery life (but not that much).
-
- Fast RR will make IrLAP send out a RR frame immediately when
- receiving a frame if its own transmit queue is currently empty. This
- will give a lot of speed improvement when receiving much data since
- the secondary station will not have to wait the max. turn around
- time (usually 500ms) before it is allowed to transmit the next time.
- If the transmit queue of the secondary is also empty, the primary will
- start backing-off before sending another RR frame, waiting longer
- each time until the back-off reaches the max. turn around time.
- This back-off increase in controlled via
- /proc/sys/net/irda/fast_poll_increase
-
- If unsure, say Y.
-
-config IRDA_DEBUG
- bool "Debug information"
- depends on IRDA
- help
- Say Y here if you want the IrDA subsystem to write debug information
- to your syslog. You can change the debug level in
- /proc/sys/net/irda/debug .
- When this option is enabled, the IrDA also perform many extra internal
- verifications which will usually prevent the kernel to crash in case of
- bugs.
-
- If unsure, say Y (since it makes it easier to find the bugs).
-
-source "drivers/net/irda/Kconfig"
-
diff --git a/net/irda/Makefile b/net/irda/Makefile
deleted file mode 100644
index 187f6c563a4b..000000000000
--- a/net/irda/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-#
-# Makefile for the Linux IrDA protocol layer.
-#
-
-obj-$(CONFIG_IRDA) += irda.o
-obj-$(CONFIG_IRLAN) += irlan/
-obj-$(CONFIG_IRNET) += irnet/
-obj-$(CONFIG_IRCOMM) += ircomm/
-
-irda-y := iriap.o iriap_event.o irlmp.o irlmp_event.o irlmp_frame.o \
- irlap.o irlap_event.o irlap_frame.o timer.o qos.o irqueue.o \
- irttp.o irda_device.o irias_object.o wrapper.o af_irda.o \
- discovery.o parameters.o irnetlink.o irmod.o
-irda-$(CONFIG_PROC_FS) += irproc.o
-irda-$(CONFIG_SYSCTL) += irsysctl.o
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
deleted file mode 100644
index 2e6990f8b80b..000000000000
--- a/net/irda/af_irda.c
+++ /dev/null
@@ -1,2695 +0,0 @@
-/*********************************************************************
- *
- * Filename: af_irda.c
- * Version: 0.9
- * Description: IrDA sockets implementation
- * Status: Stable
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sun May 31 10:12:43 1998
- * Modified at: Sat Dec 25 21:10:23 1999
- * Modified by: Dag Brattli <dag@brattli.net>
- * Sources: af_netroom.c, af_ax25.c, af_rose.c, af_x25.c etc.
- *
- * Copyright (c) 1999 Dag Brattli <dagb@cs.uit.no>
- * Copyright (c) 1999-2003 Jean Tourrilhes <jt@hpl.hp.com>
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * Linux-IrDA now supports four different types of IrDA sockets:
- *
- * o SOCK_STREAM: TinyTP connections with SAR disabled. The
- * max SDU size is 0 for conn. of this type
- * o SOCK_SEQPACKET: TinyTP connections with SAR enabled. TTP may
- * fragment the messages, but will preserve
- * the message boundaries
- * o SOCK_DGRAM: IRDAPROTO_UNITDATA: TinyTP connections with Unitdata
- * (unreliable) transfers
- * IRDAPROTO_ULTRA: Connectionless and unreliable data
- *
- ********************************************************************/
-
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/slab.h>
-#include <linux/sched/signal.h>
-#include <linux/init.h>
-#include <linux/net.h>
-#include <linux/irda.h>
-#include <linux/poll.h>
-
-#include <asm/ioctls.h> /* TIOCOUTQ, TIOCINQ */
-#include <linux/uaccess.h>
-
-#include <net/sock.h>
-#include <net/tcp_states.h>
-
-#include <net/irda/af_irda.h>
-
-static int irda_create(struct net *net, struct socket *sock, int protocol, int kern);
-
-static const struct proto_ops irda_stream_ops;
-static const struct proto_ops irda_seqpacket_ops;
-static const struct proto_ops irda_dgram_ops;
-
-#ifdef CONFIG_IRDA_ULTRA
-static const struct proto_ops irda_ultra_ops;
-#define ULTRA_MAX_DATA 382
-#endif /* CONFIG_IRDA_ULTRA */
-
-#define IRDA_MAX_HEADER (TTP_MAX_HEADER)
-
-/*
- * Function irda_data_indication (instance, sap, skb)
- *
- * Received some data from TinyTP. Just queue it on the receive queue
- *
- */
-static int irda_data_indication(void *instance, void *sap, struct sk_buff *skb)
-{
- struct irda_sock *self;
- struct sock *sk;
- int err;
-
- self = instance;
- sk = instance;
-
- err = sock_queue_rcv_skb(sk, skb);
- if (err) {
- pr_debug("%s(), error: no more mem!\n", __func__);
- self->rx_flow = FLOW_STOP;
-
- /* When we return error, TTP will need to requeue the skb */
- return err;
- }
-
- return 0;
-}
-
-/*
- * Function irda_disconnect_indication (instance, sap, reason, skb)
- *
- * Connection has been closed. Check reason to find out why
- *
- */
-static void irda_disconnect_indication(void *instance, void *sap,
- LM_REASON reason, struct sk_buff *skb)
-{
- struct irda_sock *self;
- struct sock *sk;
-
- self = instance;
-
- pr_debug("%s(%p)\n", __func__, self);
-
- /* Don't care about it, but let's not leak it */
- if(skb)
- dev_kfree_skb(skb);
-
- sk = instance;
- if (sk == NULL) {
- pr_debug("%s(%p) : BUG : sk is NULL\n",
- __func__, self);
- return;
- }
-
- /* Prevent race conditions with irda_release() and irda_shutdown() */
- bh_lock_sock(sk);
- if (!sock_flag(sk, SOCK_DEAD) && sk->sk_state != TCP_CLOSE) {
- sk->sk_state = TCP_CLOSE;
- sk->sk_shutdown |= SEND_SHUTDOWN;
-
- sk->sk_state_change(sk);
-
- /* Close our TSAP.
- * If we leave it open, IrLMP put it back into the list of
- * unconnected LSAPs. The problem is that any incoming request
- * can then be matched to this socket (and it will be, because
- * it is at the head of the list). This would prevent any
- * listening socket waiting on the same TSAP to get those
- * requests. Some apps forget to close sockets, or hang to it
- * a bit too long, so we may stay in this dead state long
- * enough to be noticed...
- * Note : all socket function do check sk->sk_state, so we are
- * safe...
- * Jean II
- */
- if (self->tsap) {
- irttp_close_tsap(self->tsap);
- self->tsap = NULL;
- }
- }
- bh_unlock_sock(sk);
-
- /* Note : once we are there, there is not much you want to do
- * with the socket anymore, apart from closing it.
- * For example, bind() and connect() won't reset sk->sk_err,
- * sk->sk_shutdown and sk->sk_flags to valid values...
- * Jean II
- */
-}
-
-/*
- * Function irda_connect_confirm (instance, sap, qos, max_sdu_size, skb)
- *
- * Connections has been confirmed by the remote device
- *
- */
-static void irda_connect_confirm(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_sdu_size, __u8 max_header_size,
- struct sk_buff *skb)
-{
- struct irda_sock *self;
- struct sock *sk;
-
- self = instance;
-
- pr_debug("%s(%p)\n", __func__, self);
-
- sk = instance;
- if (sk == NULL) {
- dev_kfree_skb(skb);
- return;
- }
-
- dev_kfree_skb(skb);
- // Should be ??? skb_queue_tail(&sk->sk_receive_queue, skb);
-
- /* How much header space do we need to reserve */
- self->max_header_size = max_header_size;
-
- /* IrTTP max SDU size in transmit direction */
- self->max_sdu_size_tx = max_sdu_size;
-
- /* Find out what the largest chunk of data that we can transmit is */
- switch (sk->sk_type) {
- case SOCK_STREAM:
- if (max_sdu_size != 0) {
- net_err_ratelimited("%s: max_sdu_size must be 0\n",
- __func__);
- return;
- }
- self->max_data_size = irttp_get_max_seg_size(self->tsap);
- break;
- case SOCK_SEQPACKET:
- if (max_sdu_size == 0) {
- net_err_ratelimited("%s: max_sdu_size cannot be 0\n",
- __func__);
- return;
- }
- self->max_data_size = max_sdu_size;
- break;
- default:
- self->max_data_size = irttp_get_max_seg_size(self->tsap);
- }
-
- pr_debug("%s(), max_data_size=%d\n", __func__,
- self->max_data_size);
-
- memcpy(&self->qos_tx, qos, sizeof(struct qos_info));
-
- /* We are now connected! */
- sk->sk_state = TCP_ESTABLISHED;
- sk->sk_state_change(sk);
-}
-
-/*
- * Function irda_connect_indication(instance, sap, qos, max_sdu_size, userdata)
- *
- * Incoming connection
- *
- */
-static void irda_connect_indication(void *instance, void *sap,
- struct qos_info *qos, __u32 max_sdu_size,
- __u8 max_header_size, struct sk_buff *skb)
-{
- struct irda_sock *self;
- struct sock *sk;
-
- self = instance;
-
- pr_debug("%s(%p)\n", __func__, self);
-
- sk = instance;
- if (sk == NULL) {
- dev_kfree_skb(skb);
- return;
- }
-
- /* How much header space do we need to reserve */
- self->max_header_size = max_header_size;
-
- /* IrTTP max SDU size in transmit direction */
- self->max_sdu_size_tx = max_sdu_size;
-
- /* Find out what the largest chunk of data that we can transmit is */
- switch (sk->sk_type) {
- case SOCK_STREAM:
- if (max_sdu_size != 0) {
- net_err_ratelimited("%s: max_sdu_size must be 0\n",
- __func__);
- kfree_skb(skb);
- return;
- }
- self->max_data_size = irttp_get_max_seg_size(self->tsap);
- break;
- case SOCK_SEQPACKET:
- if (max_sdu_size == 0) {
- net_err_ratelimited("%s: max_sdu_size cannot be 0\n",
- __func__);
- kfree_skb(skb);
- return;
- }
- self->max_data_size = max_sdu_size;
- break;
- default:
- self->max_data_size = irttp_get_max_seg_size(self->tsap);
- }
-
- pr_debug("%s(), max_data_size=%d\n", __func__,
- self->max_data_size);
-
- memcpy(&self->qos_tx, qos, sizeof(struct qos_info));
-
- skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_state_change(sk);
-}
-
-/*
- * Function irda_connect_response (handle)
- *
- * Accept incoming connection
- *
- */
-static void irda_connect_response(struct irda_sock *self)
-{
- struct sk_buff *skb;
-
- skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, GFP_KERNEL);
- if (skb == NULL) {
- pr_debug("%s() Unable to allocate sk_buff!\n",
- __func__);
- return;
- }
-
- /* Reserve space for MUX_CONTROL and LAP header */
- skb_reserve(skb, IRDA_MAX_HEADER);
-
- irttp_connect_response(self->tsap, self->max_sdu_size_rx, skb);
-}
-
-/*
- * Function irda_flow_indication (instance, sap, flow)
- *
- * Used by TinyTP to tell us if it can accept more data or not
- *
- */
-static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
-{
- struct irda_sock *self;
- struct sock *sk;
-
- self = instance;
- sk = instance;
- BUG_ON(sk == NULL);
-
- switch (flow) {
- case FLOW_STOP:
- pr_debug("%s(), IrTTP wants us to slow down\n",
- __func__);
- self->tx_flow = flow;
- break;
- case FLOW_START:
- self->tx_flow = flow;
- pr_debug("%s(), IrTTP wants us to start again\n",
- __func__);
- wake_up_interruptible(sk_sleep(sk));
- break;
- default:
- pr_debug("%s(), Unknown flow command!\n", __func__);
- /* Unknown flow command, better stop */
- self->tx_flow = flow;
- break;
- }
-}
-
-/*
- * Function irda_getvalue_confirm (obj_id, value, priv)
- *
- * Got answer from remote LM-IAS, just pass object to requester...
- *
- * Note : duplicate from above, but we need our own version that
- * doesn't touch the dtsap_sel and save the full value structure...
- */
-static void irda_getvalue_confirm(int result, __u16 obj_id,
- struct ias_value *value, void *priv)
-{
- struct irda_sock *self;
-
- self = priv;
- if (!self) {
- net_warn_ratelimited("%s: lost myself!\n", __func__);
- return;
- }
-
- pr_debug("%s(%p)\n", __func__, self);
-
- /* We probably don't need to make any more queries */
- iriap_close(self->iriap);
- self->iriap = NULL;
-
- /* Check if request succeeded */
- if (result != IAS_SUCCESS) {
- pr_debug("%s(), IAS query failed! (%d)\n", __func__,
- result);
-
- self->errno = result; /* We really need it later */
-
- /* Wake up any processes waiting for result */
- wake_up_interruptible(&self->query_wait);
-
- return;
- }
-
- /* Pass the object to the caller (so the caller must delete it) */
- self->ias_result = value;
- self->errno = 0;
-
- /* Wake up any processes waiting for result */
- wake_up_interruptible(&self->query_wait);
-}
-
-/*
- * Function irda_selective_discovery_indication (discovery)
- *
- * Got a selective discovery indication from IrLMP.
- *
- * IrLMP is telling us that this node is new and matching our hint bit
- * filter. Wake up any process waiting for answer...
- */
-static void irda_selective_discovery_indication(discinfo_t *discovery,
- DISCOVERY_MODE mode,
- void *priv)
-{
- struct irda_sock *self;
-
- self = priv;
- if (!self) {
- net_warn_ratelimited("%s: lost myself!\n", __func__);
- return;
- }
-
- /* Pass parameter to the caller */
- self->cachedaddr = discovery->daddr;
-
- /* Wake up process if its waiting for device to be discovered */
- wake_up_interruptible(&self->query_wait);
-}
-
-/*
- * Function irda_discovery_timeout (priv)
- *
- * Timeout in the selective discovery process
- *
- * We were waiting for a node to be discovered, but nothing has come up
- * so far. Wake up the user and tell him that we failed...
- */
-static void irda_discovery_timeout(u_long priv)
-{
- struct irda_sock *self;
-
- self = (struct irda_sock *) priv;
- BUG_ON(self == NULL);
-
- /* Nothing for the caller */
- self->cachelog = NULL;
- self->cachedaddr = 0;
- self->errno = -ETIME;
-
- /* Wake up process if its still waiting... */
- wake_up_interruptible(&self->query_wait);
-}
-
-/*
- * Function irda_open_tsap (self)
- *
- * Open local Transport Service Access Point (TSAP)
- *
- */
-static int irda_open_tsap(struct irda_sock *self, __u8 tsap_sel, char *name)
-{
- notify_t notify;
-
- if (self->tsap) {
- pr_debug("%s: busy!\n", __func__);
- return -EBUSY;
- }
-
- /* Initialize callbacks to be used by the IrDA stack */
- irda_notify_init(&notify);
- notify.connect_confirm = irda_connect_confirm;
- notify.connect_indication = irda_connect_indication;
- notify.disconnect_indication = irda_disconnect_indication;
- notify.data_indication = irda_data_indication;
- notify.udata_indication = irda_data_indication;
- notify.flow_indication = irda_flow_indication;
- notify.instance = self;
- strncpy(notify.name, name, NOTIFY_MAX_NAME);
-
- self->tsap = irttp_open_tsap(tsap_sel, DEFAULT_INITIAL_CREDIT,
- &notify);
- if (self->tsap == NULL) {
- pr_debug("%s(), Unable to allocate TSAP!\n",
- __func__);
- return -ENOMEM;
- }
- /* Remember which TSAP selector we actually got */
- self->stsap_sel = self->tsap->stsap_sel;
-
- return 0;
-}
-
-/*
- * Function irda_open_lsap (self)
- *
- * Open local Link Service Access Point (LSAP). Used for opening Ultra
- * sockets
- */
-#ifdef CONFIG_IRDA_ULTRA
-static int irda_open_lsap(struct irda_sock *self, int pid)
-{
- notify_t notify;
-
- if (self->lsap) {
- net_warn_ratelimited("%s(), busy!\n", __func__);
- return -EBUSY;
- }
-
- /* Initialize callbacks to be used by the IrDA stack */
- irda_notify_init(&notify);
- notify.udata_indication = irda_data_indication;
- notify.instance = self;
- strncpy(notify.name, "Ultra", NOTIFY_MAX_NAME);
-
- self->lsap = irlmp_open_lsap(LSAP_CONNLESS, &notify, pid);
- if (self->lsap == NULL) {
- pr_debug("%s(), Unable to allocate LSAP!\n", __func__);
- return -ENOMEM;
- }
-
- return 0;
-}
-#endif /* CONFIG_IRDA_ULTRA */
-
-/*
- * Function irda_find_lsap_sel (self, name)
- *
- * Try to lookup LSAP selector in remote LM-IAS
- *
- * Basically, we start a IAP query, and then go to sleep. When the query
- * return, irda_getvalue_confirm will wake us up, and we can examine the
- * result of the query...
- * Note that in some case, the query fail even before we go to sleep,
- * creating some races...
- */
-static int irda_find_lsap_sel(struct irda_sock *self, char *name)
-{
- pr_debug("%s(%p, %s)\n", __func__, self, name);
-
- if (self->iriap) {
- net_warn_ratelimited("%s(): busy with a previous query\n",
- __func__);
- return -EBUSY;
- }
-
- self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
- irda_getvalue_confirm);
- if(self->iriap == NULL)
- return -ENOMEM;
-
- /* Treat unexpected wakeup as disconnect */
- self->errno = -EHOSTUNREACH;
-
- /* Query remote LM-IAS */
- iriap_getvaluebyclass_request(self->iriap, self->saddr, self->daddr,
- name, "IrDA:TinyTP:LsapSel");
-
- /* Wait for answer, if not yet finished (or failed) */
- if (wait_event_interruptible(self->query_wait, (self->iriap==NULL)))
- /* Treat signals as disconnect */
- return -EHOSTUNREACH;
-
- /* Check what happened */
- if (self->errno)
- {
- /* Requested object/attribute doesn't exist */
- if((self->errno == IAS_CLASS_UNKNOWN) ||
- (self->errno == IAS_ATTRIB_UNKNOWN))
- return -EADDRNOTAVAIL;
- else
- return -EHOSTUNREACH;
- }
-
- /* Get the remote TSAP selector */
- switch (self->ias_result->type) {
- case IAS_INTEGER:
- pr_debug("%s() int=%d\n",
- __func__, self->ias_result->t.integer);
-
- if (self->ias_result->t.integer != -1)
- self->dtsap_sel = self->ias_result->t.integer;
- else
- self->dtsap_sel = 0;
- break;
- default:
- self->dtsap_sel = 0;
- pr_debug("%s(), bad type!\n", __func__);
- break;
- }
- if (self->ias_result)
- irias_delete_value(self->ias_result);
-
- if (self->dtsap_sel)
- return 0;
-
- return -EADDRNOTAVAIL;
-}
-
-/*
- * Function irda_discover_daddr_and_lsap_sel (self, name)
- *
- * This try to find a device with the requested service.
- *
- * It basically look into the discovery log. For each address in the list,
- * it queries the LM-IAS of the device to find if this device offer
- * the requested service.
- * If there is more than one node supporting the service, we complain
- * to the user (it should move devices around).
- * The, we set both the destination address and the lsap selector to point
- * on the service on the unique device we have found.
- *
- * Note : this function fails if there is more than one device in range,
- * because IrLMP doesn't disconnect the LAP when the last LSAP is closed.
- * Moreover, we would need to wait the LAP disconnection...
- */
-static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
-{
- discinfo_t *discoveries; /* Copy of the discovery log */
- int number; /* Number of nodes in the log */
- int i;
- int err = -ENETUNREACH;
- __u32 daddr = DEV_ADDR_ANY; /* Address we found the service on */
- __u8 dtsap_sel = 0x0; /* TSAP associated with it */
-
- pr_debug("%s(), name=%s\n", __func__, name);
-
- /* Ask lmp for the current discovery log
- * Note : we have to use irlmp_get_discoveries(), as opposed
- * to play with the cachelog directly, because while we are
- * making our ias query, le log might change... */
- discoveries = irlmp_get_discoveries(&number, self->mask.word,
- self->nslots);
- /* Check if the we got some results */
- if (discoveries == NULL)
- return -ENETUNREACH; /* No nodes discovered */
-
- /*
- * Now, check all discovered devices (if any), and connect
- * client only about the services that the client is
- * interested in...
- */
- for(i = 0; i < number; i++) {
- /* Try the address in the log */
- self->daddr = discoveries[i].daddr;
- self->saddr = 0x0;
- pr_debug("%s(), trying daddr = %08x\n",
- __func__, self->daddr);
-
- /* Query remote LM-IAS for this service */
- err = irda_find_lsap_sel(self, name);
- switch (err) {
- case 0:
- /* We found the requested service */
- if(daddr != DEV_ADDR_ANY) {
- pr_debug("%s(), discovered service ''%s'' in two different devices !!!\n",
- __func__, name);
- self->daddr = DEV_ADDR_ANY;
- kfree(discoveries);
- return -ENOTUNIQ;
- }
- /* First time we found that one, save it ! */
- daddr = self->daddr;
- dtsap_sel = self->dtsap_sel;
- break;
- case -EADDRNOTAVAIL:
- /* Requested service simply doesn't exist on this node */
- break;
- default:
- /* Something bad did happen :-( */
- pr_debug("%s(), unexpected IAS query failure\n",
- __func__);
- self->daddr = DEV_ADDR_ANY;
- kfree(discoveries);
- return -EHOSTUNREACH;
- }
- }
- /* Cleanup our copy of the discovery log */
- kfree(discoveries);
-
- /* Check out what we found */
- if(daddr == DEV_ADDR_ANY) {
- pr_debug("%s(), cannot discover service ''%s'' in any device !!!\n",
- __func__, name);
- self->daddr = DEV_ADDR_ANY;
- return -EADDRNOTAVAIL;
- }
-
- /* Revert back to discovered device & service */
- self->daddr = daddr;
- self->saddr = 0x0;
- self->dtsap_sel = dtsap_sel;
-
- pr_debug("%s(), discovered requested service ''%s'' at address %08x\n",
- __func__, name, self->daddr);
-
- return 0;
-}
-
-/*
- * Function irda_getname (sock, uaddr, uaddr_len, peer)
- *
- * Return the our own, or peers socket address (sockaddr_irda)
- *
- */
-static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
-{
- struct sockaddr_irda saddr;
- struct sock *sk = sock->sk;
- struct irda_sock *self = irda_sk(sk);
-
- memset(&saddr, 0, sizeof(saddr));
- if (peer) {
- if (sk->sk_state != TCP_ESTABLISHED)
- return -ENOTCONN;
-
- saddr.sir_family = AF_IRDA;
- saddr.sir_lsap_sel = self->dtsap_sel;
- saddr.sir_addr = self->daddr;
- } else {
- saddr.sir_family = AF_IRDA;
- saddr.sir_lsap_sel = self->stsap_sel;
- saddr.sir_addr = self->saddr;
- }
-
- pr_debug("%s(), tsap_sel = %#x\n", __func__, saddr.sir_lsap_sel);
- pr_debug("%s(), addr = %08x\n", __func__, saddr.sir_addr);
-
- /* uaddr_len come to us uninitialised */
- *uaddr_len = sizeof (struct sockaddr_irda);
- memcpy(uaddr, &saddr, *uaddr_len);
-
- return 0;
-}
-
-/*
- * Function irda_listen (sock, backlog)
- *
- * Just move to the listen state
- *
- */
-static int irda_listen(struct socket *sock, int backlog)
-{
- struct sock *sk = sock->sk;
- int err = -EOPNOTSUPP;
-
- lock_sock(sk);
-
- if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) &&
- (sk->sk_type != SOCK_DGRAM))
- goto out;
-
- if (sk->sk_state != TCP_LISTEN) {
- sk->sk_max_ack_backlog = backlog;
- sk->sk_state = TCP_LISTEN;
-
- err = 0;
- }
-out:
- release_sock(sk);
-
- return err;
-}
-
-/*
- * Function irda_bind (sock, uaddr, addr_len)
- *
- * Used by servers to register their well known TSAP
- *
- */
-static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
-{
- struct sock *sk = sock->sk;
- struct sockaddr_irda *addr = (struct sockaddr_irda *) uaddr;
- struct irda_sock *self = irda_sk(sk);
- int err;
-
- pr_debug("%s(%p)\n", __func__, self);
-
- if (addr_len != sizeof(struct sockaddr_irda))
- return -EINVAL;
-
- lock_sock(sk);
-#ifdef CONFIG_IRDA_ULTRA
- /* Special care for Ultra sockets */
- if ((sk->sk_type == SOCK_DGRAM) &&
- (sk->sk_protocol == IRDAPROTO_ULTRA)) {
- self->pid = addr->sir_lsap_sel;
- err = -EOPNOTSUPP;
- if (self->pid & 0x80) {
- pr_debug("%s(), extension in PID not supp!\n",
- __func__);
- goto out;
- }
- err = irda_open_lsap(self, self->pid);
- if (err < 0)
- goto out;
-
- /* Pretend we are connected */
- sock->state = SS_CONNECTED;
- sk->sk_state = TCP_ESTABLISHED;
- err = 0;
-
- goto out;
- }
-#endif /* CONFIG_IRDA_ULTRA */
-
- self->ias_obj = irias_new_object(addr->sir_name, jiffies);
- err = -ENOMEM;
- if (self->ias_obj == NULL)
- goto out;
-
- err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name);
- if (err < 0) {
- irias_delete_object(self->ias_obj);
- self->ias_obj = NULL;
- goto out;
- }
-
- /* Register with LM-IAS */
- irias_add_integer_attrib(self->ias_obj, "IrDA:TinyTP:LsapSel",
- self->stsap_sel, IAS_KERNEL_ATTR);
- irias_insert_object(self->ias_obj);
-
- err = 0;
-out:
- release_sock(sk);
- return err;
-}
-
-/*
- * Function irda_accept (sock, newsock, flags)
- *
- * Wait for incoming connection
- *
- */
-static int irda_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
-{
- struct sock *sk = sock->sk;
- struct irda_sock *new, *self = irda_sk(sk);
- struct sock *newsk;
- struct sk_buff *skb = NULL;
- int err;
-
- err = irda_create(sock_net(sk), newsock, sk->sk_protocol, kern);
- if (err)
- return err;
-
- err = -EINVAL;
-
- lock_sock(sk);
- if (sock->state != SS_UNCONNECTED)
- goto out;
-
- err = -EOPNOTSUPP;
- if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) &&
- (sk->sk_type != SOCK_DGRAM))
- goto out;
-
- err = -EINVAL;
- if (sk->sk_state != TCP_LISTEN)
- goto out;
-
- /*
- * The read queue this time is holding sockets ready to use
- * hooked into the SABM we saved
- */
-
- /*
- * We can perform the accept only if there is incoming data
- * on the listening socket.
- * So, we will block the caller until we receive any data.
- * If the caller was waiting on select() or poll() before
- * calling us, the data is waiting for us ;-)
- * Jean II
- */
- while (1) {
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb)
- break;
-
- /* Non blocking operation */
- err = -EWOULDBLOCK;
- if (flags & O_NONBLOCK)
- goto out;
-
- err = wait_event_interruptible(*(sk_sleep(sk)),
- skb_peek(&sk->sk_receive_queue));
- if (err)
- goto out;
- }
-
- newsk = newsock->sk;
- err = -EIO;
- if (newsk == NULL)
- goto out;
-
- newsk->sk_state = TCP_ESTABLISHED;
-
- new = irda_sk(newsk);
-
- /* Now attach up the new socket */
- new->tsap = irttp_dup(self->tsap, new);
- err = -EPERM; /* value does not seem to make sense. -arnd */
- if (!new->tsap) {
- pr_debug("%s(), dup failed!\n", __func__);
- goto out;
- }
-
- new->stsap_sel = new->tsap->stsap_sel;
- new->dtsap_sel = new->tsap->dtsap_sel;
- new->saddr = irttp_get_saddr(new->tsap);
- new->daddr = irttp_get_daddr(new->tsap);
-
- new->max_sdu_size_tx = self->max_sdu_size_tx;
- new->max_sdu_size_rx = self->max_sdu_size_rx;
- new->max_data_size = self->max_data_size;
- new->max_header_size = self->max_header_size;
-
- memcpy(&new->qos_tx, &self->qos_tx, sizeof(struct qos_info));
-
- /* Clean up the original one to keep it in listen state */
- irttp_listen(self->tsap);
-
- sk->sk_ack_backlog--;
-
- newsock->state = SS_CONNECTED;
-
- irda_connect_response(new);
- err = 0;
-out:
- kfree_skb(skb);
- release_sock(sk);
- return err;
-}
-
-/*
- * Function irda_connect (sock, uaddr, addr_len, flags)
- *
- * Connect to a IrDA device
- *
- * The main difference with a "standard" connect is that with IrDA we need
- * to resolve the service name into a TSAP selector (in TCP, port number
- * doesn't have to be resolved).
- * Because of this service name resolution, we can offer "auto-connect",
- * where we connect to a service without specifying a destination address.
- *
- * Note : by consulting "errno", the user space caller may learn the cause
- * of the failure. Most of them are visible in the function, others may come
- * from subroutines called and are listed here :
- * o EBUSY : already processing a connect
- * o EHOSTUNREACH : bad addr->sir_addr argument
- * o EADDRNOTAVAIL : bad addr->sir_name argument
- * o ENOTUNIQ : more than one node has addr->sir_name (auto-connect)
- * o ENETUNREACH : no node found on the network (auto-connect)
- */
-static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
- int addr_len, int flags)
-{
- struct sock *sk = sock->sk;
- struct sockaddr_irda *addr = (struct sockaddr_irda *) uaddr;
- struct irda_sock *self = irda_sk(sk);
- int err;
-
- pr_debug("%s(%p)\n", __func__, self);
-
- lock_sock(sk);
- /* Don't allow connect for Ultra sockets */
- err = -ESOCKTNOSUPPORT;
- if ((sk->sk_type == SOCK_DGRAM) && (sk->sk_protocol == IRDAPROTO_ULTRA))
- goto out;
-
- if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
- sock->state = SS_CONNECTED;
- err = 0;
- goto out; /* Connect completed during a ERESTARTSYS event */
- }
-
- if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
- sock->state = SS_UNCONNECTED;
- err = -ECONNREFUSED;
- goto out;
- }
-
- err = -EISCONN; /* No reconnect on a seqpacket socket */
- if (sk->sk_state == TCP_ESTABLISHED)
- goto out;
-
- sk->sk_state = TCP_CLOSE;
- sock->state = SS_UNCONNECTED;
-
- err = -EINVAL;
- if (addr_len != sizeof(struct sockaddr_irda))
- goto out;
-
- /* Check if user supplied any destination device address */
- if ((!addr->sir_addr) || (addr->sir_addr == DEV_ADDR_ANY)) {
- /* Try to find one suitable */
- err = irda_discover_daddr_and_lsap_sel(self, addr->sir_name);
- if (err) {
- pr_debug("%s(), auto-connect failed!\n", __func__);
- goto out;
- }
- } else {
- /* Use the one provided by the user */
- self->daddr = addr->sir_addr;
- pr_debug("%s(), daddr = %08x\n", __func__, self->daddr);
-
- /* If we don't have a valid service name, we assume the
- * user want to connect on a specific LSAP. Prevent
- * the use of invalid LSAPs (IrLMP 1.1 p10). Jean II */
- if((addr->sir_name[0] != '\0') ||
- (addr->sir_lsap_sel >= 0x70)) {
- /* Query remote LM-IAS using service name */
- err = irda_find_lsap_sel(self, addr->sir_name);
- if (err) {
- pr_debug("%s(), connect failed!\n", __func__);
- goto out;
- }
- } else {
- /* Directly connect to the remote LSAP
- * specified by the sir_lsap field.
- * Please use with caution, in IrDA LSAPs are
- * dynamic and there is no "well-known" LSAP. */
- self->dtsap_sel = addr->sir_lsap_sel;
- }
- }
-
- /* Check if we have opened a local TSAP */
- if (!self->tsap) {
- err = irda_open_tsap(self, LSAP_ANY, addr->sir_name);
- if (err)
- goto out;
- }
-
- /* Move to connecting socket, start sending Connect Requests */
- sock->state = SS_CONNECTING;
- sk->sk_state = TCP_SYN_SENT;
-
- /* Connect to remote device */
- err = irttp_connect_request(self->tsap, self->dtsap_sel,
- self->saddr, self->daddr, NULL,
- self->max_sdu_size_rx, NULL);
- if (err) {
- pr_debug("%s(), connect failed!\n", __func__);
- goto out;
- }
-
- /* Now the loop */
- err = -EINPROGRESS;
- if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK))
- goto out;
-
- err = -ERESTARTSYS;
- if (wait_event_interruptible(*(sk_sleep(sk)),
- (sk->sk_state != TCP_SYN_SENT)))
- goto out;
-
- if (sk->sk_state != TCP_ESTABLISHED) {
- sock->state = SS_UNCONNECTED;
- err = sock_error(sk);
- if (!err)
- err = -ECONNRESET;
- goto out;
- }
-
- sock->state = SS_CONNECTED;
-
- /* At this point, IrLMP has assigned our source address */
- self->saddr = irttp_get_saddr(self->tsap);
- err = 0;
-out:
- release_sock(sk);
- return err;
-}
-
-static struct proto irda_proto = {
- .name = "IRDA",
- .owner = THIS_MODULE,
- .obj_size = sizeof(struct irda_sock),
-};
-
-/*
- * Function irda_create (sock, protocol)
- *
- * Create IrDA socket
- *
- */
-static int irda_create(struct net *net, struct socket *sock, int protocol,
- int kern)
-{
- struct sock *sk;
- struct irda_sock *self;
-
- if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
- return -EINVAL;
-
- if (net != &init_net)
- return -EAFNOSUPPORT;
-
- /* Check for valid socket type */
- switch (sock->type) {
- case SOCK_STREAM: /* For TTP connections with SAR disabled */
- case SOCK_SEQPACKET: /* For TTP connections with SAR enabled */
- case SOCK_DGRAM: /* For TTP Unitdata or LMP Ultra transfers */
- break;
- default:
- return -ESOCKTNOSUPPORT;
- }
-
- /* Allocate networking socket */
- sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto, kern);
- if (sk == NULL)
- return -ENOMEM;
-
- self = irda_sk(sk);
- pr_debug("%s() : self is %p\n", __func__, self);
-
- init_waitqueue_head(&self->query_wait);
-
- switch (sock->type) {
- case SOCK_STREAM:
- sock->ops = &irda_stream_ops;
- self->max_sdu_size_rx = TTP_SAR_DISABLE;
- break;
- case SOCK_SEQPACKET:
- sock->ops = &irda_seqpacket_ops;
- self->max_sdu_size_rx = TTP_SAR_UNBOUND;
- break;
- case SOCK_DGRAM:
- switch (protocol) {
-#ifdef CONFIG_IRDA_ULTRA
- case IRDAPROTO_ULTRA:
- sock->ops = &irda_ultra_ops;
- /* Initialise now, because we may send on unbound
- * sockets. Jean II */
- self->max_data_size = ULTRA_MAX_DATA - LMP_PID_HEADER;
- self->max_header_size = IRDA_MAX_HEADER + LMP_PID_HEADER;
- break;
-#endif /* CONFIG_IRDA_ULTRA */
- case IRDAPROTO_UNITDATA:
- sock->ops = &irda_dgram_ops;
- /* We let Unitdata conn. be like seqpack conn. */
- self->max_sdu_size_rx = TTP_SAR_UNBOUND;
- break;
- default:
- sk_free(sk);
- return -ESOCKTNOSUPPORT;
- }
- break;
- default:
- sk_free(sk);
- return -ESOCKTNOSUPPORT;
- }
-
- /* Initialise networking socket struct */
- sock_init_data(sock, sk); /* Note : set sk->sk_refcnt to 1 */
- sk->sk_family = PF_IRDA;
- sk->sk_protocol = protocol;
-
- /* Register as a client with IrLMP */
- self->ckey = irlmp_register_client(0, NULL, NULL, NULL);
- self->mask.word = 0xffff;
- self->rx_flow = self->tx_flow = FLOW_START;
- self->nslots = DISCOVERY_DEFAULT_SLOTS;
- self->daddr = DEV_ADDR_ANY; /* Until we get connected */
- self->saddr = 0x0; /* so IrLMP assign us any link */
- return 0;
-}
-
-/*
- * Function irda_destroy_socket (self)
- *
- * Destroy socket
- *
- */
-static void irda_destroy_socket(struct irda_sock *self)
-{
- pr_debug("%s(%p)\n", __func__, self);
-
- /* Unregister with IrLMP */
- irlmp_unregister_client(self->ckey);
- irlmp_unregister_service(self->skey);
-
- /* Unregister with LM-IAS */
- if (self->ias_obj) {
- irias_delete_object(self->ias_obj);
- self->ias_obj = NULL;
- }
-
- if (self->iriap) {
- iriap_close(self->iriap);
- self->iriap = NULL;
- }
-
- if (self->tsap) {
- irttp_disconnect_request(self->tsap, NULL, P_NORMAL);
- irttp_close_tsap(self->tsap);
- self->tsap = NULL;
- }
-#ifdef CONFIG_IRDA_ULTRA
- if (self->lsap) {
- irlmp_close_lsap(self->lsap);
- self->lsap = NULL;
- }
-#endif /* CONFIG_IRDA_ULTRA */
-}
-
-/*
- * Function irda_release (sock)
- */
-static int irda_release(struct socket *sock)
-{
- struct sock *sk = sock->sk;
-
- if (sk == NULL)
- return 0;
-
- lock_sock(sk);
- sk->sk_state = TCP_CLOSE;
- sk->sk_shutdown |= SEND_SHUTDOWN;
- sk->sk_state_change(sk);
-
- /* Destroy IrDA socket */
- irda_destroy_socket(irda_sk(sk));
-
- sock_orphan(sk);
- sock->sk = NULL;
- release_sock(sk);
-
- /* Purge queues (see sock_init_data()) */
- skb_queue_purge(&sk->sk_receive_queue);
-
- /* Destroy networking socket if we are the last reference on it,
- * i.e. if(sk->sk_refcnt == 0) -> sk_free(sk) */
- sock_put(sk);
-
- /* Notes on socket locking and deallocation... - Jean II
- * In theory we should put pairs of sock_hold() / sock_put() to
- * prevent the socket to be destroyed whenever there is an
- * outstanding request or outstanding incoming packet or event.
- *
- * 1) This may include IAS request, both in connect and getsockopt.
- * Unfortunately, the situation is a bit more messy than it looks,
- * because we close iriap and kfree(self) above.
- *
- * 2) This may include selective discovery in getsockopt.
- * Same stuff as above, irlmp registration and self are gone.
- *
- * Probably 1 and 2 may not matter, because it's all triggered
- * by a process and the socket layer already prevent the
- * socket to go away while a process is holding it, through
- * sockfd_put() and fput()...
- *
- * 3) This may include deferred TSAP closure. In particular,
- * we may receive a late irda_disconnect_indication()
- * Fortunately, (tsap_cb *)->close_pend should protect us
- * from that.
- *
- * I did some testing on SMP, and it looks solid. And the socket
- * memory leak is now gone... - Jean II
- */
-
- return 0;
-}
-
-/*
- * Function irda_sendmsg (sock, msg, len)
- *
- * Send message down to TinyTP. This function is used for both STREAM and
- * SEQPACK services. This is possible since it forces the client to
- * fragment the message if necessary
- */
-static int irda_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
-{
- struct sock *sk = sock->sk;
- struct irda_sock *self;
- struct sk_buff *skb;
- int err = -EPIPE;
-
- pr_debug("%s(), len=%zd\n", __func__, len);
-
- /* Note : socket.c set MSG_EOR on SEQPACKET sockets */
- if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT |
- MSG_NOSIGNAL)) {
- return -EINVAL;
- }
-
- lock_sock(sk);
-
- if (sk->sk_shutdown & SEND_SHUTDOWN)
- goto out_err;
-
- if (sk->sk_state != TCP_ESTABLISHED) {
- err = -ENOTCONN;
- goto out;
- }
-
- self = irda_sk(sk);
-
- /* Check if IrTTP is wants us to slow down */
-
- if (wait_event_interruptible(*(sk_sleep(sk)),
- (self->tx_flow != FLOW_STOP || sk->sk_state != TCP_ESTABLISHED))) {
- err = -ERESTARTSYS;
- goto out;
- }
-
- /* Check if we are still connected */
- if (sk->sk_state != TCP_ESTABLISHED) {
- err = -ENOTCONN;
- goto out;
- }
-
- /* Check that we don't send out too big frames */
- if (len > self->max_data_size) {
- pr_debug("%s(), Chopping frame from %zd to %d bytes!\n",
- __func__, len, self->max_data_size);
- len = self->max_data_size;
- }
-
- skb = sock_alloc_send_skb(sk, len + self->max_header_size + 16,
- msg->msg_flags & MSG_DONTWAIT, &err);
- if (!skb)
- goto out_err;
-
- skb_reserve(skb, self->max_header_size + 16);
- skb_reset_transport_header(skb);
- skb_put(skb, len);
- err = memcpy_from_msg(skb_transport_header(skb), msg, len);
- if (err) {
- kfree_skb(skb);
- goto out_err;
- }
-
- /*
- * Just send the message to TinyTP, and let it deal with possible
- * errors. No need to duplicate all that here
- */
- err = irttp_data_request(self->tsap, skb);
- if (err) {
- pr_debug("%s(), err=%d\n", __func__, err);
- goto out_err;
- }
-
- release_sock(sk);
- /* Tell client how much data we actually sent */
- return len;
-
-out_err:
- err = sk_stream_error(sk, msg->msg_flags, err);
-out:
- release_sock(sk);
- return err;
-
-}
-
-/*
- * Function irda_recvmsg_dgram (sock, msg, size, flags)
- *
- * Try to receive message and copy it to user. The frame is discarded
- * after being read, regardless of how much the user actually read
- */
-static int irda_recvmsg_dgram(struct socket *sock, struct msghdr *msg,
- size_t size, int flags)
-{
- struct sock *sk = sock->sk;
- struct irda_sock *self = irda_sk(sk);
- struct sk_buff *skb;
- size_t copied;
- int err;
-
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &err);
- if (!skb)
- return err;
-
- skb_reset_transport_header(skb);
- copied = skb->len;
-
- if (copied > size) {
- pr_debug("%s(), Received truncated frame (%zd < %zd)!\n",
- __func__, copied, size);
- copied = size;
- msg->msg_flags |= MSG_TRUNC;
- }
- skb_copy_datagram_msg(skb, 0, msg, copied);
-
- skb_free_datagram(sk, skb);
-
- /*
- * Check if we have previously stopped IrTTP and we know
- * have more free space in our rx_queue. If so tell IrTTP
- * to start delivering frames again before our rx_queue gets
- * empty
- */
- if (self->rx_flow == FLOW_STOP) {
- if ((atomic_read(&sk->sk_rmem_alloc) << 2) <= sk->sk_rcvbuf) {
- pr_debug("%s(), Starting IrTTP\n", __func__);
- self->rx_flow = FLOW_START;
- irttp_flow_request(self->tsap, FLOW_START);
- }
- }
-
- return copied;
-}
-
-/*
- * Function irda_recvmsg_stream (sock, msg, size, flags)
- */
-static int irda_recvmsg_stream(struct socket *sock, struct msghdr *msg,
- size_t size, int flags)
-{
- struct sock *sk = sock->sk;
- struct irda_sock *self = irda_sk(sk);
- int noblock = flags & MSG_DONTWAIT;
- size_t copied = 0;
- int target, err;
- long timeo;
-
- if ((err = sock_error(sk)) < 0)
- return err;
-
- if (sock->flags & __SO_ACCEPTCON)
- return -EINVAL;
-
- err =-EOPNOTSUPP;
- if (flags & MSG_OOB)
- return -EOPNOTSUPP;
-
- err = 0;
- target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
- timeo = sock_rcvtimeo(sk, noblock);
-
- do {
- int chunk;
- struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
-
- if (skb == NULL) {
- DEFINE_WAIT(wait);
- err = 0;
-
- if (copied >= target)
- break;
-
- prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-
- /*
- * POSIX 1003.1g mandates this order.
- */
- err = sock_error(sk);
- if (err)
- ;
- else if (sk->sk_shutdown & RCV_SHUTDOWN)
- ;
- else if (noblock)
- err = -EAGAIN;
- else if (signal_pending(current))
- err = sock_intr_errno(timeo);
- else if (sk->sk_state != TCP_ESTABLISHED)
- err = -ENOTCONN;
- else if (skb_peek(&sk->sk_receive_queue) == NULL)
- /* Wait process until data arrives */
- schedule();
-
- finish_wait(sk_sleep(sk), &wait);
-
- if (err)
- return err;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
- break;
-
- continue;
- }
-
- chunk = min_t(unsigned int, skb->len, size);
- if (memcpy_to_msg(msg, skb->data, chunk)) {
- skb_queue_head(&sk->sk_receive_queue, skb);
- if (copied == 0)
- copied = -EFAULT;
- break;
- }
- copied += chunk;
- size -= chunk;
-
- /* Mark read part of skb as used */
- if (!(flags & MSG_PEEK)) {
- skb_pull(skb, chunk);
-
- /* put the skb back if we didn't use it up.. */
- if (skb->len) {
- pr_debug("%s(), back on q!\n",
- __func__);
- skb_queue_head(&sk->sk_receive_queue, skb);
- break;
- }
-
- kfree_skb(skb);
- } else {
- pr_debug("%s() questionable!?\n", __func__);
-
- /* put message back and return */
- skb_queue_head(&sk->sk_receive_queue, skb);
- break;
- }
- } while (size);
-
- /*
- * Check if we have previously stopped IrTTP and we know
- * have more free space in our rx_queue. If so tell IrTTP
- * to start delivering frames again before our rx_queue gets
- * empty
- */
- if (self->rx_flow == FLOW_STOP) {
- if ((atomic_read(&sk->sk_rmem_alloc) << 2) <= sk->sk_rcvbuf) {
- pr_debug("%s(), Starting IrTTP\n", __func__);
- self->rx_flow = FLOW_START;
- irttp_flow_request(self->tsap, FLOW_START);
- }
- }
-
- return copied;
-}
-
-/*
- * Function irda_sendmsg_dgram (sock, msg, len)
- *
- * Send message down to TinyTP for the unreliable sequenced
- * packet service...
- *
- */
-static int irda_sendmsg_dgram(struct socket *sock, struct msghdr *msg,
- size_t len)
-{
- struct sock *sk = sock->sk;
- struct irda_sock *self;
- struct sk_buff *skb;
- int err;
-
- pr_debug("%s(), len=%zd\n", __func__, len);
-
- if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
- return -EINVAL;
-
- lock_sock(sk);
-
- if (sk->sk_shutdown & SEND_SHUTDOWN) {
- send_sig(SIGPIPE, current, 0);
- err = -EPIPE;
- goto out;
- }
-
- err = -ENOTCONN;
- if (sk->sk_state != TCP_ESTABLISHED)
- goto out;
-
- self = irda_sk(sk);
-
- /*
- * Check that we don't send out too big frames. This is an unreliable
- * service, so we have no fragmentation and no coalescence
- */
- if (len > self->max_data_size) {
- pr_debug("%s(), Warning too much data! Chopping frame from %zd to %d bytes!\n",
- __func__, len, self->max_data_size);
- len = self->max_data_size;
- }
-
- skb = sock_alloc_send_skb(sk, len + self->max_header_size,
- msg->msg_flags & MSG_DONTWAIT, &err);
- err = -ENOBUFS;
- if (!skb)
- goto out;
-
- skb_reserve(skb, self->max_header_size);
- skb_reset_transport_header(skb);
-
- pr_debug("%s(), appending user data\n", __func__);
- skb_put(skb, len);
- err = memcpy_from_msg(skb_transport_header(skb), msg, len);
- if (err) {
- kfree_skb(skb);
- goto out;
- }
-
- /*
- * Just send the message to TinyTP, and let it deal with possible
- * errors. No need to duplicate all that here
- */
- err = irttp_udata_request(self->tsap, skb);
- if (err) {
- pr_debug("%s(), err=%d\n", __func__, err);
- goto out;
- }
-
- release_sock(sk);
- return len;
-
-out:
- release_sock(sk);
- return err;
-}
-
-/*
- * Function irda_sendmsg_ultra (sock, msg, len)
- *
- * Send message down to IrLMP for the unreliable Ultra
- * packet service...
- */
-#ifdef CONFIG_IRDA_ULTRA
-static int irda_sendmsg_ultra(struct socket *sock, struct msghdr *msg,
- size_t len)
-{
- struct sock *sk = sock->sk;
- struct irda_sock *self;
- __u8 pid = 0;
- int bound = 0;
- struct sk_buff *skb;
- int err;
-
- pr_debug("%s(), len=%zd\n", __func__, len);
-
- err = -EINVAL;
- if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
- return -EINVAL;
-
- lock_sock(sk);
-
- err = -EPIPE;
- if (sk->sk_shutdown & SEND_SHUTDOWN) {
- send_sig(SIGPIPE, current, 0);
- goto out;
- }
-
- self = irda_sk(sk);
-
- /* Check if an address was specified with sendto. Jean II */
- if (msg->msg_name) {
- DECLARE_SOCKADDR(struct sockaddr_irda *, addr, msg->msg_name);
- err = -EINVAL;
- /* Check address, extract pid. Jean II */
- if (msg->msg_namelen < sizeof(*addr))
- goto out;
- if (addr->sir_family != AF_IRDA)
- goto out;
-
- pid = addr->sir_lsap_sel;
- if (pid & 0x80) {
- pr_debug("%s(), extension in PID not supp!\n",
- __func__);
- err = -EOPNOTSUPP;
- goto out;
- }
- } else {
- /* Check that the socket is properly bound to an Ultra
- * port. Jean II */
- if ((self->lsap == NULL) ||
- (sk->sk_state != TCP_ESTABLISHED)) {
- pr_debug("%s(), socket not bound to Ultra PID.\n",
- __func__);
- err = -ENOTCONN;
- goto out;
- }
- /* Use PID from socket */
- bound = 1;
- }
-
- /*
- * Check that we don't send out too big frames. This is an unreliable
- * service, so we have no fragmentation and no coalescence
- */
- if (len > self->max_data_size) {
- pr_debug("%s(), Warning too much data! Chopping frame from %zd to %d bytes!\n",
- __func__, len, self->max_data_size);
- len = self->max_data_size;
- }
-
- skb = sock_alloc_send_skb(sk, len + self->max_header_size,
- msg->msg_flags & MSG_DONTWAIT, &err);
- err = -ENOBUFS;
- if (!skb)
- goto out;
-
- skb_reserve(skb, self->max_header_size);
- skb_reset_transport_header(skb);
-
- pr_debug("%s(), appending user data\n", __func__);
- skb_put(skb, len);
- err = memcpy_from_msg(skb_transport_header(skb), msg, len);
- if (err) {
- kfree_skb(skb);
- goto out;
- }
-
- err = irlmp_connless_data_request((bound ? self->lsap : NULL),
- skb, pid);
- if (err)
- pr_debug("%s(), err=%d\n", __func__, err);
-out:
- release_sock(sk);
- return err ? : len;
-}
-#endif /* CONFIG_IRDA_ULTRA */
-
-/*
- * Function irda_shutdown (sk, how)
- */
-static int irda_shutdown(struct socket *sock, int how)
-{
- struct sock *sk = sock->sk;
- struct irda_sock *self = irda_sk(sk);
-
- pr_debug("%s(%p)\n", __func__, self);
-
- lock_sock(sk);
-
- sk->sk_state = TCP_CLOSE;
- sk->sk_shutdown |= SEND_SHUTDOWN;
- sk->sk_state_change(sk);
-
- if (self->iriap) {
- iriap_close(self->iriap);
- self->iriap = NULL;
- }
-
- if (self->tsap) {
- irttp_disconnect_request(self->tsap, NULL, P_NORMAL);
- irttp_close_tsap(self->tsap);
- self->tsap = NULL;
- }
-
- /* A few cleanup so the socket look as good as new... */
- self->rx_flow = self->tx_flow = FLOW_START; /* needed ??? */
- self->daddr = DEV_ADDR_ANY; /* Until we get re-connected */
- self->saddr = 0x0; /* so IrLMP assign us any link */
-
- release_sock(sk);
-
- return 0;
-}
-
-/*
- * Function irda_poll (file, sock, wait)
- */
-static unsigned int irda_poll(struct file * file, struct socket *sock,
- poll_table *wait)
-{
- struct sock *sk = sock->sk;
- struct irda_sock *self = irda_sk(sk);
- unsigned int mask;
-
- poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
-
- /* Exceptional events? */
- if (sk->sk_err)
- mask |= POLLERR;
- if (sk->sk_shutdown & RCV_SHUTDOWN) {
- pr_debug("%s(), POLLHUP\n", __func__);
- mask |= POLLHUP;
- }
-
- /* Readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue)) {
- pr_debug("Socket is readable\n");
- mask |= POLLIN | POLLRDNORM;
- }
-
- /* Connection-based need to check for termination and startup */
- switch (sk->sk_type) {
- case SOCK_STREAM:
- if (sk->sk_state == TCP_CLOSE) {
- pr_debug("%s(), POLLHUP\n", __func__);
- mask |= POLLHUP;
- }
-
- if (sk->sk_state == TCP_ESTABLISHED) {
- if ((self->tx_flow == FLOW_START) &&
- sock_writeable(sk))
- {
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
- }
- }
- break;
- case SOCK_SEQPACKET:
- if ((self->tx_flow == FLOW_START) &&
- sock_writeable(sk))
- {
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
- }
- break;
- case SOCK_DGRAM:
- if (sock_writeable(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
- break;
- default:
- break;
- }
-
- return mask;
-}
-
-/*
- * Function irda_ioctl (sock, cmd, arg)
- */
-static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
- struct sock *sk = sock->sk;
- int err;
-
- pr_debug("%s(), cmd=%#x\n", __func__, cmd);
-
- err = -EINVAL;
- switch (cmd) {
- case TIOCOUTQ: {
- long amount;
-
- amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
- if (amount < 0)
- amount = 0;
- err = put_user(amount, (unsigned int __user *)arg);
- break;
- }
-
- case TIOCINQ: {
- struct sk_buff *skb;
- long amount = 0L;
- /* These two are safe on a single CPU system as only user tasks fiddle here */
- if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
- amount = skb->len;
- err = put_user(amount, (unsigned int __user *)arg);
- break;
- }
-
- case SIOCGSTAMP:
- if (sk != NULL)
- err = sock_get_timestamp(sk, (struct timeval __user *)arg);
- break;
-
- case SIOCGIFADDR:
- case SIOCSIFADDR:
- case SIOCGIFDSTADDR:
- case SIOCSIFDSTADDR:
- case SIOCGIFBRDADDR:
- case SIOCSIFBRDADDR:
- case SIOCGIFNETMASK:
- case SIOCSIFNETMASK:
- case SIOCGIFMETRIC:
- case SIOCSIFMETRIC:
- break;
- default:
- pr_debug("%s(), doing device ioctl!\n", __func__);
- err = -ENOIOCTLCMD;
- }
-
- return err;
-}
-
-#ifdef CONFIG_COMPAT
-/*
- * Function irda_ioctl (sock, cmd, arg)
- */
-static int irda_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
- /*
- * All IRDA's ioctl are standard ones.
- */
- return -ENOIOCTLCMD;
-}
-#endif
-
-/*
- * Function irda_setsockopt (sock, level, optname, optval, optlen)
- *
- * Set some options for the socket
- *
- */
-static int irda_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- struct sock *sk = sock->sk;
- struct irda_sock *self = irda_sk(sk);
- struct irda_ias_set *ias_opt;
- struct ias_object *ias_obj;
- struct ias_attrib * ias_attr; /* Attribute in IAS object */
- int opt, free_ias = 0, err = 0;
-
- pr_debug("%s(%p)\n", __func__, self);
-
- if (level != SOL_IRLMP)
- return -ENOPROTOOPT;
-
- lock_sock(sk);
-
- switch (optname) {
- case IRLMP_IAS_SET:
- /* The user want to add an attribute to an existing IAS object
- * (in the IAS database) or to create a new object with this
- * attribute.
- * We first query IAS to know if the object exist, and then
- * create the right attribute...
- */
-
- if (optlen != sizeof(struct irda_ias_set)) {
- err = -EINVAL;
- goto out;
- }
-
- /* Copy query to the driver. */
- ias_opt = memdup_user(optval, optlen);
- if (IS_ERR(ias_opt)) {
- err = PTR_ERR(ias_opt);
- goto out;
- }
-
- /* Find the object we target.
- * If the user gives us an empty string, we use the object
- * associated with this socket. This will workaround
- * duplicated class name - Jean II */
- if(ias_opt->irda_class_name[0] == '\0') {
- if(self->ias_obj == NULL) {
- kfree(ias_opt);
- err = -EINVAL;
- goto out;
- }
- ias_obj = self->ias_obj;
- } else
- ias_obj = irias_find_object(ias_opt->irda_class_name);
-
- /* Only ROOT can mess with the global IAS database.
- * Users can only add attributes to the object associated
- * with the socket they own - Jean II */
- if((!capable(CAP_NET_ADMIN)) &&
- ((ias_obj == NULL) || (ias_obj != self->ias_obj))) {
- kfree(ias_opt);
- err = -EPERM;
- goto out;
- }
-
- /* If the object doesn't exist, create it */
- if(ias_obj == (struct ias_object *) NULL) {
- /* Create a new object */
- ias_obj = irias_new_object(ias_opt->irda_class_name,
- jiffies);
- if (ias_obj == NULL) {
- kfree(ias_opt);
- err = -ENOMEM;
- goto out;
- }
- free_ias = 1;
- }
-
- /* Do we have the attribute already ? */
- if(irias_find_attrib(ias_obj, ias_opt->irda_attrib_name)) {
- kfree(ias_opt);
- if (free_ias) {
- kfree(ias_obj->name);
- kfree(ias_obj);
- }
- err = -EINVAL;
- goto out;
- }
-
- /* Look at the type */
- switch(ias_opt->irda_attrib_type) {
- case IAS_INTEGER:
- /* Add an integer attribute */
- irias_add_integer_attrib(
- ias_obj,
- ias_opt->irda_attrib_name,
- ias_opt->attribute.irda_attrib_int,
- IAS_USER_ATTR);
- break;
- case IAS_OCT_SEQ:
- /* Check length */
- if(ias_opt->attribute.irda_attrib_octet_seq.len >
- IAS_MAX_OCTET_STRING) {
- kfree(ias_opt);
- if (free_ias) {
- kfree(ias_obj->name);
- kfree(ias_obj);
- }
-
- err = -EINVAL;
- goto out;
- }
- /* Add an octet sequence attribute */
- irias_add_octseq_attrib(
- ias_obj,
- ias_opt->irda_attrib_name,
- ias_opt->attribute.irda_attrib_octet_seq.octet_seq,
- ias_opt->attribute.irda_attrib_octet_seq.len,
- IAS_USER_ATTR);
- break;
- case IAS_STRING:
- /* Should check charset & co */
- /* Check length */
- /* The length is encoded in a __u8, and
- * IAS_MAX_STRING == 256, so there is no way
- * userspace can pass us a string too large.
- * Jean II */
- /* NULL terminate the string (avoid troubles) */
- ias_opt->attribute.irda_attrib_string.string[ias_opt->attribute.irda_attrib_string.len] = '\0';
- /* Add a string attribute */
- irias_add_string_attrib(
- ias_obj,
- ias_opt->irda_attrib_name,
- ias_opt->attribute.irda_attrib_string.string,
- IAS_USER_ATTR);
- break;
- default :
- kfree(ias_opt);
- if (free_ias) {
- kfree(ias_obj->name);
- kfree(ias_obj);
- }
- err = -EINVAL;
- goto out;
- }
- irias_insert_object(ias_obj);
- kfree(ias_opt);
- break;
- case IRLMP_IAS_DEL:
- /* The user want to delete an object from our local IAS
- * database. We just need to query the IAS, check is the
- * object is not owned by the kernel and delete it.
- */
-
- if (optlen != sizeof(struct irda_ias_set)) {
- err = -EINVAL;
- goto out;
- }
-
- /* Copy query to the driver. */
- ias_opt = memdup_user(optval, optlen);
- if (IS_ERR(ias_opt)) {
- err = PTR_ERR(ias_opt);
- goto out;
- }
-
- /* Find the object we target.
- * If the user gives us an empty string, we use the object
- * associated with this socket. This will workaround
- * duplicated class name - Jean II */
- if(ias_opt->irda_class_name[0] == '\0')
- ias_obj = self->ias_obj;
- else
- ias_obj = irias_find_object(ias_opt->irda_class_name);
- if(ias_obj == (struct ias_object *) NULL) {
- kfree(ias_opt);
- err = -EINVAL;
- goto out;
- }
-
- /* Only ROOT can mess with the global IAS database.
- * Users can only del attributes from the object associated
- * with the socket they own - Jean II */
- if((!capable(CAP_NET_ADMIN)) &&
- ((ias_obj == NULL) || (ias_obj != self->ias_obj))) {
- kfree(ias_opt);
- err = -EPERM;
- goto out;
- }
-
- /* Find the attribute (in the object) we target */
- ias_attr = irias_find_attrib(ias_obj,
- ias_opt->irda_attrib_name);
- if(ias_attr == (struct ias_attrib *) NULL) {
- kfree(ias_opt);
- err = -EINVAL;
- goto out;
- }
-
- /* Check is the user space own the object */
- if(ias_attr->value->owner != IAS_USER_ATTR) {
- pr_debug("%s(), attempting to delete a kernel attribute\n",
- __func__);
- kfree(ias_opt);
- err = -EPERM;
- goto out;
- }
-
- /* Remove the attribute (and maybe the object) */
- irias_delete_attrib(ias_obj, ias_attr, 1);
- kfree(ias_opt);
- break;
- case IRLMP_MAX_SDU_SIZE:
- if (optlen < sizeof(int)) {
- err = -EINVAL;
- goto out;
- }
-
- if (get_user(opt, (int __user *)optval)) {
- err = -EFAULT;
- goto out;
- }
-
- /* Only possible for a seqpacket service (TTP with SAR) */
- if (sk->sk_type != SOCK_SEQPACKET) {
- pr_debug("%s(), setting max_sdu_size = %d\n",
- __func__, opt);
- self->max_sdu_size_rx = opt;
- } else {
- net_warn_ratelimited("%s: not allowed to set MAXSDUSIZE for this socket type!\n",
- __func__);
- err = -ENOPROTOOPT;
- goto out;
- }
- break;
- case IRLMP_HINTS_SET:
- if (optlen < sizeof(int)) {
- err = -EINVAL;
- goto out;
- }
-
- /* The input is really a (__u8 hints[2]), easier as an int */
- if (get_user(opt, (int __user *)optval)) {
- err = -EFAULT;
- goto out;
- }
-
- /* Unregister any old registration */
- irlmp_unregister_service(self->skey);
-
- self->skey = irlmp_register_service((__u16) opt);
- break;
- case IRLMP_HINT_MASK_SET:
- /* As opposed to the previous case which set the hint bits
- * that we advertise, this one set the filter we use when
- * making a discovery (nodes which don't match any hint
- * bit in the mask are not reported).
- */
- if (optlen < sizeof(int)) {
- err = -EINVAL;
- goto out;
- }
-
- /* The input is really a (__u8 hints[2]), easier as an int */
- if (get_user(opt, (int __user *)optval)) {
- err = -EFAULT;
- goto out;
- }
-
- /* Set the new hint mask */
- self->mask.word = (__u16) opt;
- /* Mask out extension bits */
- self->mask.word &= 0x7f7f;
- /* Check if no bits */
- if(!self->mask.word)
- self->mask.word = 0xFFFF;
-
- break;
- default:
- err = -ENOPROTOOPT;
- break;
- }
-
-out:
- release_sock(sk);
-
- return err;
-}
-
-/*
- * Function irda_extract_ias_value(ias_opt, ias_value)
- *
- * Translate internal IAS value structure to the user space representation
- *
- * The external representation of IAS values, as we exchange them with
- * user space program is quite different from the internal representation,
- * as stored in the IAS database (because we need a flat structure for
- * crossing kernel boundary).
- * This function transform the former in the latter. We also check
- * that the value type is valid.
- */
-static int irda_extract_ias_value(struct irda_ias_set *ias_opt,
- struct ias_value *ias_value)
-{
- /* Look at the type */
- switch (ias_value->type) {
- case IAS_INTEGER:
- /* Copy the integer */
- ias_opt->attribute.irda_attrib_int = ias_value->t.integer;
- break;
- case IAS_OCT_SEQ:
- /* Set length */
- ias_opt->attribute.irda_attrib_octet_seq.len = ias_value->len;
- /* Copy over */
- memcpy(ias_opt->attribute.irda_attrib_octet_seq.octet_seq,
- ias_value->t.oct_seq, ias_value->len);
- break;
- case IAS_STRING:
- /* Set length */
- ias_opt->attribute.irda_attrib_string.len = ias_value->len;
- ias_opt->attribute.irda_attrib_string.charset = ias_value->charset;
- /* Copy over */
- memcpy(ias_opt->attribute.irda_attrib_string.string,
- ias_value->t.string, ias_value->len);
- /* NULL terminate the string (avoid troubles) */
- ias_opt->attribute.irda_attrib_string.string[ias_value->len] = '\0';
- break;
- case IAS_MISSING:
- default :
- return -EINVAL;
- }
-
- /* Copy type over */
- ias_opt->irda_attrib_type = ias_value->type;
-
- return 0;
-}
-
-/*
- * Function irda_getsockopt (sock, level, optname, optval, optlen)
- */
-static int irda_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- struct sock *sk = sock->sk;
- struct irda_sock *self = irda_sk(sk);
- struct irda_device_list list;
- struct irda_device_info *discoveries;
- struct irda_ias_set * ias_opt; /* IAS get/query params */
- struct ias_object * ias_obj; /* Object in IAS */
- struct ias_attrib * ias_attr; /* Attribute in IAS object */
- int daddr = DEV_ADDR_ANY; /* Dest address for IAS queries */
- int val = 0;
- int len = 0;
- int err = 0;
- int offset, total;
-
- pr_debug("%s(%p)\n", __func__, self);
-
- if (level != SOL_IRLMP)
- return -ENOPROTOOPT;
-
- if (get_user(len, optlen))
- return -EFAULT;
-
- if(len < 0)
- return -EINVAL;
-
- lock_sock(sk);
-
- switch (optname) {
- case IRLMP_ENUMDEVICES:
-
- /* Offset to first device entry */
- offset = sizeof(struct irda_device_list) -
- sizeof(struct irda_device_info);
-
- if (len < offset) {
- err = -EINVAL;
- goto out;
- }
-
- /* Ask lmp for the current discovery log */
- discoveries = irlmp_get_discoveries(&list.len, self->mask.word,
- self->nslots);
- /* Check if the we got some results */
- if (discoveries == NULL) {
- err = -EAGAIN;
- goto out; /* Didn't find any devices */
- }
-
- /* Write total list length back to client */
- if (copy_to_user(optval, &list, offset))
- err = -EFAULT;
-
- /* Copy the list itself - watch for overflow */
- if (list.len > 2048) {
- err = -EINVAL;
- goto bed;
- }
- total = offset + (list.len * sizeof(struct irda_device_info));
- if (total > len)
- total = len;
- if (copy_to_user(optval+offset, discoveries, total - offset))
- err = -EFAULT;
-
- /* Write total number of bytes used back to client */
- if (put_user(total, optlen))
- err = -EFAULT;
-bed:
- /* Free up our buffer */
- kfree(discoveries);
- break;
- case IRLMP_MAX_SDU_SIZE:
- val = self->max_data_size;
- len = sizeof(int);
- if (put_user(len, optlen)) {
- err = -EFAULT;
- goto out;
- }
-
- if (copy_to_user(optval, &val, len)) {
- err = -EFAULT;
- goto out;
- }
-
- break;
- case IRLMP_IAS_GET:
- /* The user want an object from our local IAS database.
- * We just need to query the IAS and return the value
- * that we found */
-
- /* Check that the user has allocated the right space for us */
- if (len != sizeof(struct irda_ias_set)) {
- err = -EINVAL;
- goto out;
- }
-
- /* Copy query to the driver. */
- ias_opt = memdup_user(optval, len);
- if (IS_ERR(ias_opt)) {
- err = PTR_ERR(ias_opt);
- goto out;
- }
-
- /* Find the object we target.
- * If the user gives us an empty string, we use the object
- * associated with this socket. This will workaround
- * duplicated class name - Jean II */
- if(ias_opt->irda_class_name[0] == '\0')
- ias_obj = self->ias_obj;
- else
- ias_obj = irias_find_object(ias_opt->irda_class_name);
- if(ias_obj == (struct ias_object *) NULL) {
- kfree(ias_opt);
- err = -EINVAL;
- goto out;
- }
-
- /* Find the attribute (in the object) we target */
- ias_attr = irias_find_attrib(ias_obj,
- ias_opt->irda_attrib_name);
- if(ias_attr == (struct ias_attrib *) NULL) {
- kfree(ias_opt);
- err = -EINVAL;
- goto out;
- }
-
- /* Translate from internal to user structure */
- err = irda_extract_ias_value(ias_opt, ias_attr->value);
- if(err) {
- kfree(ias_opt);
- goto out;
- }
-
- /* Copy reply to the user */
- if (copy_to_user(optval, ias_opt,
- sizeof(struct irda_ias_set))) {
- kfree(ias_opt);
- err = -EFAULT;
- goto out;
- }
- /* Note : don't need to put optlen, we checked it */
- kfree(ias_opt);
- break;
- case IRLMP_IAS_QUERY:
- /* The user want an object from a remote IAS database.
- * We need to use IAP to query the remote database and
- * then wait for the answer to come back. */
-
- /* Check that the user has allocated the right space for us */
- if (len != sizeof(struct irda_ias_set)) {
- err = -EINVAL;
- goto out;
- }
-
- /* Copy query to the driver. */
- ias_opt = memdup_user(optval, len);
- if (IS_ERR(ias_opt)) {
- err = PTR_ERR(ias_opt);
- goto out;
- }
-
- /* At this point, there are two cases...
- * 1) the socket is connected - that's the easy case, we
- * just query the device we are connected to...
- * 2) the socket is not connected - the user doesn't want
- * to connect and/or may not have a valid service name
- * (so can't create a fake connection). In this case,
- * we assume that the user pass us a valid destination
- * address in the requesting structure...
- */
- if(self->daddr != DEV_ADDR_ANY) {
- /* We are connected - reuse known daddr */
- daddr = self->daddr;
- } else {
- /* We are not connected, we must specify a valid
- * destination address */
- daddr = ias_opt->daddr;
- if((!daddr) || (daddr == DEV_ADDR_ANY)) {
- kfree(ias_opt);
- err = -EINVAL;
- goto out;
- }
- }
-
- /* Check that we can proceed with IAP */
- if (self->iriap) {
- net_warn_ratelimited("%s: busy with a previous query\n",
- __func__);
- kfree(ias_opt);
- err = -EBUSY;
- goto out;
- }
-
- self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
- irda_getvalue_confirm);
-
- if (self->iriap == NULL) {
- kfree(ias_opt);
- err = -ENOMEM;
- goto out;
- }
-
- /* Treat unexpected wakeup as disconnect */
- self->errno = -EHOSTUNREACH;
-
- /* Query remote LM-IAS */
- iriap_getvaluebyclass_request(self->iriap,
- self->saddr, daddr,
- ias_opt->irda_class_name,
- ias_opt->irda_attrib_name);
-
- /* Wait for answer, if not yet finished (or failed) */
- if (wait_event_interruptible(self->query_wait,
- (self->iriap == NULL))) {
- /* pending request uses copy of ias_opt-content
- * we can free it regardless! */
- kfree(ias_opt);
- /* Treat signals as disconnect */
- err = -EHOSTUNREACH;
- goto out;
- }
-
- /* Check what happened */
- if (self->errno)
- {
- kfree(ias_opt);
- /* Requested object/attribute doesn't exist */
- if((self->errno == IAS_CLASS_UNKNOWN) ||
- (self->errno == IAS_ATTRIB_UNKNOWN))
- err = -EADDRNOTAVAIL;
- else
- err = -EHOSTUNREACH;
-
- goto out;
- }
-
- /* Translate from internal to user structure */
- err = irda_extract_ias_value(ias_opt, self->ias_result);
- if (self->ias_result)
- irias_delete_value(self->ias_result);
- if (err) {
- kfree(ias_opt);
- goto out;
- }
-
- /* Copy reply to the user */
- if (copy_to_user(optval, ias_opt,
- sizeof(struct irda_ias_set))) {
- kfree(ias_opt);
- err = -EFAULT;
- goto out;
- }
- /* Note : don't need to put optlen, we checked it */
- kfree(ias_opt);
- break;
- case IRLMP_WAITDEVICE:
- /* This function is just another way of seeing life ;-)
- * IRLMP_ENUMDEVICES assumes that you have a static network,
- * and that you just want to pick one of the devices present.
- * On the other hand, in here we assume that no device is
- * present and that at some point in the future a device will
- * come into range. When this device arrive, we just wake
- * up the caller, so that he has time to connect to it before
- * the device goes away...
- * Note : once the node has been discovered for more than a
- * few second, it won't trigger this function, unless it
- * goes away and come back changes its hint bits (so we
- * might call it IRLMP_WAITNEWDEVICE).
- */
-
- /* Check that the user is passing us an int */
- if (len != sizeof(int)) {
- err = -EINVAL;
- goto out;
- }
- /* Get timeout in ms (max time we block the caller) */
- if (get_user(val, (int __user *)optval)) {
- err = -EFAULT;
- goto out;
- }
-
- /* Tell IrLMP we want to be notified */
- irlmp_update_client(self->ckey, self->mask.word,
- irda_selective_discovery_indication,
- NULL, (void *) self);
-
- /* Do some discovery (and also return cached results) */
- irlmp_discovery_request(self->nslots);
-
- /* Wait until a node is discovered */
- if (!self->cachedaddr) {
- pr_debug("%s(), nothing discovered yet, going to sleep...\n",
- __func__);
-
- /* Set watchdog timer to expire in <val> ms. */
- self->errno = 0;
- setup_timer(&self->watchdog, irda_discovery_timeout,
- (unsigned long)self);
- mod_timer(&self->watchdog,
- jiffies + msecs_to_jiffies(val));
-
- /* Wait for IR-LMP to call us back */
- err = __wait_event_interruptible(self->query_wait,
- (self->cachedaddr != 0 || self->errno == -ETIME));
-
- /* If watchdog is still activated, kill it! */
- del_timer(&(self->watchdog));
-
- pr_debug("%s(), ...waking up !\n", __func__);
-
- if (err != 0)
- goto out;
- }
- else
- pr_debug("%s(), found immediately !\n",
- __func__);
-
- /* Tell IrLMP that we have been notified */
- irlmp_update_client(self->ckey, self->mask.word,
- NULL, NULL, NULL);
-
- /* Check if the we got some results */
- if (!self->cachedaddr) {
- err = -EAGAIN; /* Didn't find any devices */
- goto out;
- }
- daddr = self->cachedaddr;
- /* Cleanup */
- self->cachedaddr = 0;
-
- /* We return the daddr of the device that trigger the
- * wakeup. As irlmp pass us only the new devices, we
- * are sure that it's not an old device.
- * If the user want more details, he should query
- * the whole discovery log and pick one device...
- */
- if (put_user(daddr, (int __user *)optval)) {
- err = -EFAULT;
- goto out;
- }
-
- break;
- default:
- err = -ENOPROTOOPT;
- }
-
-out:
-
- release_sock(sk);
-
- return err;
-}
-
-static const struct net_proto_family irda_family_ops = {
- .family = PF_IRDA,
- .create = irda_create,
- .owner = THIS_MODULE,
-};
-
-static const struct proto_ops irda_stream_ops = {
- .family = PF_IRDA,
- .owner = THIS_MODULE,
- .release = irda_release,
- .bind = irda_bind,
- .connect = irda_connect,
- .socketpair = sock_no_socketpair,
- .accept = irda_accept,
- .getname = irda_getname,
- .poll = irda_poll,
- .ioctl = irda_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = irda_compat_ioctl,
-#endif
- .listen = irda_listen,
- .shutdown = irda_shutdown,
- .setsockopt = irda_setsockopt,
- .getsockopt = irda_getsockopt,
- .sendmsg = irda_sendmsg,
- .recvmsg = irda_recvmsg_stream,
- .mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage,
-};
-
-static const struct proto_ops irda_seqpacket_ops = {
- .family = PF_IRDA,
- .owner = THIS_MODULE,
- .release = irda_release,
- .bind = irda_bind,
- .connect = irda_connect,
- .socketpair = sock_no_socketpair,
- .accept = irda_accept,
- .getname = irda_getname,
- .poll = datagram_poll,
- .ioctl = irda_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = irda_compat_ioctl,
-#endif
- .listen = irda_listen,
- .shutdown = irda_shutdown,
- .setsockopt = irda_setsockopt,
- .getsockopt = irda_getsockopt,
- .sendmsg = irda_sendmsg,
- .recvmsg = irda_recvmsg_dgram,
- .mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage,
-};
-
-static const struct proto_ops irda_dgram_ops = {
- .family = PF_IRDA,
- .owner = THIS_MODULE,
- .release = irda_release,
- .bind = irda_bind,
- .connect = irda_connect,
- .socketpair = sock_no_socketpair,
- .accept = irda_accept,
- .getname = irda_getname,
- .poll = datagram_poll,
- .ioctl = irda_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = irda_compat_ioctl,
-#endif
- .listen = irda_listen,
- .shutdown = irda_shutdown,
- .setsockopt = irda_setsockopt,
- .getsockopt = irda_getsockopt,
- .sendmsg = irda_sendmsg_dgram,
- .recvmsg = irda_recvmsg_dgram,
- .mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage,
-};
-
-#ifdef CONFIG_IRDA_ULTRA
-static const struct proto_ops irda_ultra_ops = {
- .family = PF_IRDA,
- .owner = THIS_MODULE,
- .release = irda_release,
- .bind = irda_bind,
- .connect = sock_no_connect,
- .socketpair = sock_no_socketpair,
- .accept = sock_no_accept,
- .getname = irda_getname,
- .poll = datagram_poll,
- .ioctl = irda_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = irda_compat_ioctl,
-#endif
- .listen = sock_no_listen,
- .shutdown = irda_shutdown,
- .setsockopt = irda_setsockopt,
- .getsockopt = irda_getsockopt,
- .sendmsg = irda_sendmsg_ultra,
- .recvmsg = irda_recvmsg_dgram,
- .mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage,
-};
-#endif /* CONFIG_IRDA_ULTRA */
-
-/*
- * Function irsock_init (pro)
- *
- * Initialize IrDA protocol
- *
- */
-int __init irsock_init(void)
-{
- int rc = proto_register(&irda_proto, 0);
-
- if (rc == 0)
- rc = sock_register(&irda_family_ops);
-
- return rc;
-}
-
-/*
- * Function irsock_cleanup (void)
- *
- * Remove IrDA protocol
- *
- */
-void irsock_cleanup(void)
-{
- sock_unregister(PF_IRDA);
- proto_unregister(&irda_proto);
-}
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
deleted file mode 100644
index 364d70aed068..000000000000
--- a/net/irda/discovery.c
+++ /dev/null
@@ -1,417 +0,0 @@
-/*********************************************************************
- *
- * Filename: discovery.c
- * Version: 0.1
- * Description: Routines for handling discoveries at the IrLMP layer
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Tue Apr 6 15:33:50 1999
- * Modified at: Sat Oct 9 17:11:31 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- * Modified at: Fri May 28 3:11 CST 1999
- * Modified by: Horst von Brand <vonbrand@sleipnir.valparaiso.cl>
- *
- * Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/fs.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlmp.h>
-
-#include <net/irda/discovery.h>
-
-#include <asm/unaligned.h>
-
-/*
- * Function irlmp_add_discovery (cachelog, discovery)
- *
- * Add a new discovery to the cachelog, and remove any old discoveries
- * from the same device
- *
- * Note : we try to preserve the time this device was *first* discovered
- * (as opposed to the time of last discovery used for cleanup). This is
- * used by clients waiting for discovery events to tell if the device
- * discovered is "new" or just the same old one. They can't rely there
- * on a binary flag (new/old), because not all discovery events are
- * propagated to them, and they might not always listen, so they would
- * miss some new devices popping up...
- * Jean II
- */
-void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *new)
-{
- discovery_t *discovery, *node;
- unsigned long flags;
-
- /* Set time of first discovery if node is new (see below) */
- new->firststamp = new->timestamp;
-
- spin_lock_irqsave(&cachelog->hb_spinlock, flags);
-
- /*
- * Remove all discoveries of devices that has previously been
- * discovered on the same link with the same name (info), or the
- * same daddr. We do this since some devices (mostly PDAs) change
- * their device address between every discovery.
- */
- discovery = (discovery_t *) hashbin_get_first(cachelog);
- while (discovery != NULL ) {
- node = discovery;
-
- /* Be sure to stay one item ahead */
- discovery = (discovery_t *) hashbin_get_next(cachelog);
-
- if ((node->data.saddr == new->data.saddr) &&
- ((node->data.daddr == new->data.daddr) ||
- (strcmp(node->data.info, new->data.info) == 0)))
- {
- /* This discovery is a previous discovery
- * from the same device, so just remove it
- */
- hashbin_remove_this(cachelog, (irda_queue_t *) node);
- /* Check if hints bits are unchanged */
- if (get_unaligned((__u16 *)node->data.hints) == get_unaligned((__u16 *)new->data.hints))
- /* Set time of first discovery for this node */
- new->firststamp = node->firststamp;
- kfree(node);
- }
- }
-
- /* Insert the new and updated version */
- hashbin_insert(cachelog, (irda_queue_t *) new, new->data.daddr, NULL);
-
- spin_unlock_irqrestore(&cachelog->hb_spinlock, flags);
-}
-
-/*
- * Function irlmp_add_discovery_log (cachelog, log)
- *
- * Merge a disovery log into the cachelog.
- *
- */
-void irlmp_add_discovery_log(hashbin_t *cachelog, hashbin_t *log)
-{
- discovery_t *discovery;
-
- /*
- * If log is missing this means that IrLAP was unable to perform the
- * discovery, so restart discovery again with just the half timeout
- * of the normal one.
- */
- /* Well... It means that there was nobody out there - Jean II */
- if (log == NULL) {
- /* irlmp_start_discovery_timer(irlmp, 150); */
- return;
- }
-
- /*
- * Locking : we are the only owner of this discovery log, so
- * no need to lock it.
- * We just need to lock the global log in irlmp_add_discovery().
- */
- discovery = (discovery_t *) hashbin_remove_first(log);
- while (discovery != NULL) {
- irlmp_add_discovery(cachelog, discovery);
-
- discovery = (discovery_t *) hashbin_remove_first(log);
- }
-
- /* Delete the now empty log */
- hashbin_delete(log, (FREE_FUNC) kfree);
-}
-
-/*
- * Function irlmp_expire_discoveries (log, saddr, force)
- *
- * Go through all discoveries and expire all that has stayed too long
- *
- * Note : this assume that IrLAP won't change its saddr, which
- * currently is a valid assumption...
- */
-void irlmp_expire_discoveries(hashbin_t *log, __u32 saddr, int force)
-{
- discovery_t * discovery;
- discovery_t * curr;
- unsigned long flags;
- discinfo_t * buffer = NULL;
- int n; /* Size of the full log */
- int i = 0; /* How many we expired */
-
- IRDA_ASSERT(log != NULL, return;);
- spin_lock_irqsave(&log->hb_spinlock, flags);
-
- discovery = (discovery_t *) hashbin_get_first(log);
- while (discovery != NULL) {
- /* Be sure to be one item ahead */
- curr = discovery;
- discovery = (discovery_t *) hashbin_get_next(log);
-
- /* Test if it's time to expire this discovery */
- if ((curr->data.saddr == saddr) &&
- (force ||
- ((jiffies - curr->timestamp) > DISCOVERY_EXPIRE_TIMEOUT)))
- {
- /* Create buffer as needed.
- * As this function get called a lot and most time
- * we don't have anything to put in the log (we are
- * quite picky), we can save a lot of overhead
- * by not calling kmalloc. Jean II */
- if(buffer == NULL) {
- /* Create the client specific buffer */
- n = HASHBIN_GET_SIZE(log);
- buffer = kmalloc(n * sizeof(struct irda_device_info), GFP_ATOMIC);
- if (buffer == NULL) {
- spin_unlock_irqrestore(&log->hb_spinlock, flags);
- return;
- }
-
- }
-
- /* Copy discovery information */
- memcpy(&(buffer[i]), &(curr->data),
- sizeof(discinfo_t));
- i++;
-
- /* Remove it from the log */
- curr = hashbin_remove_this(log, (irda_queue_t *) curr);
- kfree(curr);
- }
- }
-
- /* Drop the spinlock before calling the higher layers, as
- * we can't guarantee they won't call us back and create a
- * deadlock. We will work on our own private data, so we
- * don't care to be interrupted. - Jean II */
- spin_unlock_irqrestore(&log->hb_spinlock, flags);
-
- if(buffer == NULL)
- return;
-
- /* Tell IrLMP and registered clients about it */
- irlmp_discovery_expiry(buffer, i);
-
- /* Free up our buffer */
- kfree(buffer);
-}
-
-#if 0
-/*
- * Function irlmp_dump_discoveries (log)
- *
- * Print out all discoveries in log
- *
- */
-void irlmp_dump_discoveries(hashbin_t *log)
-{
- discovery_t *discovery;
-
- IRDA_ASSERT(log != NULL, return;);
-
- discovery = (discovery_t *) hashbin_get_first(log);
- while (discovery != NULL) {
- pr_debug("Discovery:\n");
- pr_debug(" daddr=%08x\n", discovery->data.daddr);
- pr_debug(" saddr=%08x\n", discovery->data.saddr);
- pr_debug(" nickname=%s\n", discovery->data.info);
-
- discovery = (discovery_t *) hashbin_get_next(log);
- }
-}
-#endif
-
-/*
- * Function irlmp_copy_discoveries (log, pn, mask)
- *
- * Copy all discoveries in a buffer
- *
- * This function implement a safe way for lmp clients to access the
- * discovery log. The basic problem is that we don't want the log
- * to change (add/remove) while the client is reading it. If the
- * lmp client manipulate directly the hashbin, he is sure to get
- * into troubles...
- * The idea is that we copy all the current discovery log in a buffer
- * which is specific to the client and pass this copy to him. As we
- * do this operation with the spinlock grabbed, we are safe...
- * Note : we don't want those clients to grab the spinlock, because
- * we have no control on how long they will hold it...
- * Note : we choose to copy the log in "struct irda_device_info" to
- * save space...
- * Note : the client must kfree himself() the log...
- * Jean II
- */
-struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
- __u16 mask, int old_entries)
-{
- discovery_t * discovery;
- unsigned long flags;
- discinfo_t * buffer = NULL;
- int j_timeout = (sysctl_discovery_timeout * HZ);
- int n; /* Size of the full log */
- int i = 0; /* How many we picked */
-
- IRDA_ASSERT(pn != NULL, return NULL;);
- IRDA_ASSERT(log != NULL, return NULL;);
-
- /* Save spin lock */
- spin_lock_irqsave(&log->hb_spinlock, flags);
-
- discovery = (discovery_t *) hashbin_get_first(log);
- while (discovery != NULL) {
- /* Mask out the ones we don't want :
- * We want to match the discovery mask, and to get only
- * the most recent one (unless we want old ones) */
- if ((get_unaligned((__u16 *)discovery->data.hints) & mask) &&
- ((old_entries) ||
- ((jiffies - discovery->firststamp) < j_timeout))) {
- /* Create buffer as needed.
- * As this function get called a lot and most time
- * we don't have anything to put in the log (we are
- * quite picky), we can save a lot of overhead
- * by not calling kmalloc. Jean II */
- if(buffer == NULL) {
- /* Create the client specific buffer */
- n = HASHBIN_GET_SIZE(log);
- buffer = kmalloc(n * sizeof(struct irda_device_info), GFP_ATOMIC);
- if (buffer == NULL) {
- spin_unlock_irqrestore(&log->hb_spinlock, flags);
- return NULL;
- }
-
- }
-
- /* Copy discovery information */
- memcpy(&(buffer[i]), &(discovery->data),
- sizeof(discinfo_t));
- i++;
- }
- discovery = (discovery_t *) hashbin_get_next(log);
- }
-
- spin_unlock_irqrestore(&log->hb_spinlock, flags);
-
- /* Get the actual number of device in the buffer and return */
- *pn = i;
- return buffer;
-}
-
-#ifdef CONFIG_PROC_FS
-static inline discovery_t *discovery_seq_idx(loff_t pos)
-
-{
- discovery_t *discovery;
-
- for (discovery = (discovery_t *) hashbin_get_first(irlmp->cachelog);
- discovery != NULL;
- discovery = (discovery_t *) hashbin_get_next(irlmp->cachelog)) {
- if (pos-- == 0)
- break;
- }
-
- return discovery;
-}
-
-static void *discovery_seq_start(struct seq_file *seq, loff_t *pos)
-{
- spin_lock_irq(&irlmp->cachelog->hb_spinlock);
- return *pos ? discovery_seq_idx(*pos - 1) : SEQ_START_TOKEN;
-}
-
-static void *discovery_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- ++*pos;
- return (v == SEQ_START_TOKEN)
- ? (void *) hashbin_get_first(irlmp->cachelog)
- : (void *) hashbin_get_next(irlmp->cachelog);
-}
-
-static void discovery_seq_stop(struct seq_file *seq, void *v)
-{
- spin_unlock_irq(&irlmp->cachelog->hb_spinlock);
-}
-
-static int discovery_seq_show(struct seq_file *seq, void *v)
-{
- if (v == SEQ_START_TOKEN)
- seq_puts(seq, "IrLMP: Discovery log:\n\n");
- else {
- const discovery_t *discovery = v;
-
- seq_printf(seq, "nickname: %s, hint: 0x%02x%02x",
- discovery->data.info,
- discovery->data.hints[0],
- discovery->data.hints[1]);
-#if 0
- if ( discovery->data.hints[0] & HINT_PNP)
- seq_puts(seq, "PnP Compatible ");
- if ( discovery->data.hints[0] & HINT_PDA)
- seq_puts(seq, "PDA/Palmtop ");
- if ( discovery->data.hints[0] & HINT_COMPUTER)
- seq_puts(seq, "Computer ");
- if ( discovery->data.hints[0] & HINT_PRINTER)
- seq_puts(seq, "Printer ");
- if ( discovery->data.hints[0] & HINT_MODEM)
- seq_puts(seq, "Modem ");
- if ( discovery->data.hints[0] & HINT_FAX)
- seq_puts(seq, "Fax ");
- if ( discovery->data.hints[0] & HINT_LAN)
- seq_puts(seq, "LAN Access ");
-
- if ( discovery->data.hints[1] & HINT_TELEPHONY)
- seq_puts(seq, "Telephony ");
- if ( discovery->data.hints[1] & HINT_FILE_SERVER)
- seq_puts(seq, "File Server ");
- if ( discovery->data.hints[1] & HINT_COMM)
- seq_puts(seq, "IrCOMM ");
- if ( discovery->data.hints[1] & HINT_OBEX)
- seq_puts(seq, "IrOBEX ");
-#endif
- seq_printf(seq,", saddr: 0x%08x, daddr: 0x%08x\n\n",
- discovery->data.saddr,
- discovery->data.daddr);
-
- seq_putc(seq, '\n');
- }
- return 0;
-}
-
-static const struct seq_operations discovery_seq_ops = {
- .start = discovery_seq_start,
- .next = discovery_seq_next,
- .stop = discovery_seq_stop,
- .show = discovery_seq_show,
-};
-
-static int discovery_seq_open(struct inode *inode, struct file *file)
-{
- IRDA_ASSERT(irlmp != NULL, return -EINVAL;);
-
- return seq_open(file, &discovery_seq_ops);
-}
-
-const struct file_operations discovery_seq_fops = {
- .owner = THIS_MODULE,
- .open = discovery_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-#endif
diff --git a/net/irda/ircomm/Kconfig b/net/irda/ircomm/Kconfig
deleted file mode 100644
index 19492c1707b7..000000000000
--- a/net/irda/ircomm/Kconfig
+++ /dev/null
@@ -1,12 +0,0 @@
-config IRCOMM
- tristate "IrCOMM protocol"
- depends on IRDA && TTY
- help
- Say Y here if you want to build support for the IrCOMM protocol.
- To compile it as modules, choose M here: the modules will be
- called ircomm and ircomm_tty.
- IrCOMM implements serial port emulation, and makes it possible to
- use all existing applications that understands TTY's with an
- infrared link. Thus you should be able to use application like PPP,
- minicom and others.
-
diff --git a/net/irda/ircomm/Makefile b/net/irda/ircomm/Makefile
deleted file mode 100644
index ab23b5ba7e33..000000000000
--- a/net/irda/ircomm/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-#
-# Makefile for the Linux IrDA IrCOMM protocol layer.
-#
-
-obj-$(CONFIG_IRCOMM) += ircomm.o ircomm-tty.o
-
-ircomm-y := ircomm_core.o ircomm_event.o ircomm_lmp.o ircomm_ttp.o
-ircomm-tty-y := ircomm_tty.o ircomm_tty_attach.o ircomm_tty_ioctl.o ircomm_param.o
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
deleted file mode 100644
index 3af219545f6d..000000000000
--- a/net/irda/ircomm/ircomm_core.c
+++ /dev/null
@@ -1,563 +0,0 @@
-/*********************************************************************
- *
- * Filename: ircomm_core.c
- * Version: 1.0
- * Description: IrCOMM service interface
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sun Jun 6 20:37:34 1999
- * Modified at: Tue Dec 21 13:26:41 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irmod.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irttp.h>
-#include <net/irda/irias_object.h>
-
-#include <net/irda/ircomm_event.h>
-#include <net/irda/ircomm_lmp.h>
-#include <net/irda/ircomm_ttp.h>
-#include <net/irda/ircomm_param.h>
-#include <net/irda/ircomm_core.h>
-
-static int __ircomm_close(struct ircomm_cb *self);
-static void ircomm_control_indication(struct ircomm_cb *self,
- struct sk_buff *skb, int clen);
-
-#ifdef CONFIG_PROC_FS
-extern struct proc_dir_entry *proc_irda;
-static int ircomm_seq_open(struct inode *, struct file *);
-
-static const struct file_operations ircomm_proc_fops = {
- .owner = THIS_MODULE,
- .open = ircomm_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-#endif /* CONFIG_PROC_FS */
-
-hashbin_t *ircomm = NULL;
-
-static int __init ircomm_init(void)
-{
- ircomm = hashbin_new(HB_LOCK);
- if (ircomm == NULL) {
- net_err_ratelimited("%s(), can't allocate hashbin!\n",
- __func__);
- return -ENOMEM;
- }
-
-#ifdef CONFIG_PROC_FS
- { struct proc_dir_entry *ent;
- ent = proc_create("ircomm", 0, proc_irda, &ircomm_proc_fops);
- if (!ent) {
- printk(KERN_ERR "ircomm_init: can't create /proc entry!\n");
- return -ENODEV;
- }
- }
-#endif /* CONFIG_PROC_FS */
-
- net_info_ratelimited("IrCOMM protocol (Dag Brattli)\n");
-
- return 0;
-}
-
-static void __exit ircomm_cleanup(void)
-{
- hashbin_delete(ircomm, (FREE_FUNC) __ircomm_close);
-
-#ifdef CONFIG_PROC_FS
- remove_proc_entry("ircomm", proc_irda);
-#endif /* CONFIG_PROC_FS */
-}
-
-/*
- * Function ircomm_open (client_notify)
- *
- * Start a new IrCOMM instance
- *
- */
-struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line)
-{
- struct ircomm_cb *self = NULL;
- int ret;
-
- pr_debug("%s(), service_type=0x%02x\n", __func__ ,
- service_type);
-
- IRDA_ASSERT(ircomm != NULL, return NULL;);
-
- self = kzalloc(sizeof(struct ircomm_cb), GFP_KERNEL);
- if (self == NULL)
- return NULL;
-
- self->notify = *notify;
- self->magic = IRCOMM_MAGIC;
-
- /* Check if we should use IrLMP or IrTTP */
- if (service_type & IRCOMM_3_WIRE_RAW) {
- self->flow_status = FLOW_START;
- ret = ircomm_open_lsap(self);
- } else
- ret = ircomm_open_tsap(self);
-
- if (ret < 0) {
- kfree(self);
- return NULL;
- }
-
- self->service_type = service_type;
- self->line = line;
-
- hashbin_insert(ircomm, (irda_queue_t *) self, line, NULL);
-
- ircomm_next_state(self, IRCOMM_IDLE);
-
- return self;
-}
-
-EXPORT_SYMBOL(ircomm_open);
-
-/*
- * Function ircomm_close_instance (self)
- *
- * Remove IrCOMM instance
- *
- */
-static int __ircomm_close(struct ircomm_cb *self)
-{
- /* Disconnect link if any */
- ircomm_do_event(self, IRCOMM_DISCONNECT_REQUEST, NULL, NULL);
-
- /* Remove TSAP */
- if (self->tsap) {
- irttp_close_tsap(self->tsap);
- self->tsap = NULL;
- }
-
- /* Remove LSAP */
- if (self->lsap) {
- irlmp_close_lsap(self->lsap);
- self->lsap = NULL;
- }
- self->magic = 0;
-
- kfree(self);
-
- return 0;
-}
-
-/*
- * Function ircomm_close (self)
- *
- * Closes and removes the specified IrCOMM instance
- *
- */
-int ircomm_close(struct ircomm_cb *self)
-{
- struct ircomm_cb *entry;
-
- IRDA_ASSERT(self != NULL, return -EIO;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EIO;);
-
- entry = hashbin_remove(ircomm, self->line, NULL);
-
- IRDA_ASSERT(entry == self, return -1;);
-
- return __ircomm_close(self);
-}
-
-EXPORT_SYMBOL(ircomm_close);
-
-/*
- * Function ircomm_connect_request (self, service_type)
- *
- * Impl. of this function is differ from one of the reference. This
- * function does discovery as well as sending connect request
- *
- */
-int ircomm_connect_request(struct ircomm_cb *self, __u8 dlsap_sel,
- __u32 saddr, __u32 daddr, struct sk_buff *skb,
- __u8 service_type)
-{
- struct ircomm_info info;
- int ret;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
-
- self->service_type= service_type;
-
- info.dlsap_sel = dlsap_sel;
- info.saddr = saddr;
- info.daddr = daddr;
-
- ret = ircomm_do_event(self, IRCOMM_CONNECT_REQUEST, skb, &info);
-
- return ret;
-}
-
-EXPORT_SYMBOL(ircomm_connect_request);
-
-/*
- * Function ircomm_connect_indication (self, qos, skb)
- *
- * Notify user layer about the incoming connection
- *
- */
-void ircomm_connect_indication(struct ircomm_cb *self, struct sk_buff *skb,
- struct ircomm_info *info)
-{
- /*
- * If there are any data hiding in the control channel, we must
- * deliver it first. The side effect is that the control channel
- * will be removed from the skb
- */
- if (self->notify.connect_indication)
- self->notify.connect_indication(self->notify.instance, self,
- info->qos, info->max_data_size,
- info->max_header_size, skb);
- else {
- pr_debug("%s(), missing handler\n", __func__);
- }
-}
-
-/*
- * Function ircomm_connect_response (self, userdata, max_sdu_size)
- *
- * User accepts connection
- *
- */
-int ircomm_connect_response(struct ircomm_cb *self, struct sk_buff *userdata)
-{
- int ret;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
-
- ret = ircomm_do_event(self, IRCOMM_CONNECT_RESPONSE, userdata, NULL);
-
- return ret;
-}
-
-EXPORT_SYMBOL(ircomm_connect_response);
-
-/*
- * Function connect_confirm (self, skb)
- *
- * Notify user layer that the link is now connected
- *
- */
-void ircomm_connect_confirm(struct ircomm_cb *self, struct sk_buff *skb,
- struct ircomm_info *info)
-{
- if (self->notify.connect_confirm )
- self->notify.connect_confirm(self->notify.instance,
- self, info->qos,
- info->max_data_size,
- info->max_header_size, skb);
- else {
- pr_debug("%s(), missing handler\n", __func__);
- }
-}
-
-/*
- * Function ircomm_data_request (self, userdata)
- *
- * Send IrCOMM data to peer device
- *
- */
-int ircomm_data_request(struct ircomm_cb *self, struct sk_buff *skb)
-{
- int ret;
-
- IRDA_ASSERT(self != NULL, return -EFAULT;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EFAULT;);
- IRDA_ASSERT(skb != NULL, return -EFAULT;);
-
- ret = ircomm_do_event(self, IRCOMM_DATA_REQUEST, skb, NULL);
-
- return ret;
-}
-
-EXPORT_SYMBOL(ircomm_data_request);
-
-/*
- * Function ircomm_data_indication (self, skb)
- *
- * Data arrived, so deliver it to user
- *
- */
-void ircomm_data_indication(struct ircomm_cb *self, struct sk_buff *skb)
-{
- IRDA_ASSERT(skb->len > 0, return;);
-
- if (self->notify.data_indication)
- self->notify.data_indication(self->notify.instance, self, skb);
- else {
- pr_debug("%s(), missing handler\n", __func__);
- }
-}
-
-/*
- * Function ircomm_process_data (self, skb)
- *
- * Data arrived which may contain control channel data
- *
- */
-void ircomm_process_data(struct ircomm_cb *self, struct sk_buff *skb)
-{
- int clen;
-
- IRDA_ASSERT(skb->len > 0, return;);
-
- clen = skb->data[0];
-
- /*
- * Input validation check: a stir4200/mcp2150 combinations sometimes
- * results in frames with clen > remaining packet size. These are
- * illegal; if we throw away just this frame then it seems to carry on
- * fine
- */
- if (unlikely(skb->len < (clen + 1))) {
- pr_debug("%s() throwing away illegal frame\n",
- __func__);
- return;
- }
-
- /*
- * If there are any data hiding in the control channel, we must
- * deliver it first. The side effect is that the control channel
- * will be removed from the skb
- */
- if (clen > 0)
- ircomm_control_indication(self, skb, clen);
-
- /* Remove control channel from data channel */
- skb_pull(skb, clen+1);
-
- if (skb->len)
- ircomm_data_indication(self, skb);
- else {
- pr_debug("%s(), data was control info only!\n",
- __func__);
- }
-}
-
-/*
- * Function ircomm_control_request (self, params)
- *
- * Send control data to peer device
- *
- */
-int ircomm_control_request(struct ircomm_cb *self, struct sk_buff *skb)
-{
- int ret;
-
- IRDA_ASSERT(self != NULL, return -EFAULT;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EFAULT;);
- IRDA_ASSERT(skb != NULL, return -EFAULT;);
-
- ret = ircomm_do_event(self, IRCOMM_CONTROL_REQUEST, skb, NULL);
-
- return ret;
-}
-
-EXPORT_SYMBOL(ircomm_control_request);
-
-/*
- * Function ircomm_control_indication (self, skb)
- *
- * Data has arrived on the control channel
- *
- */
-static void ircomm_control_indication(struct ircomm_cb *self,
- struct sk_buff *skb, int clen)
-{
- /* Use udata for delivering data on the control channel */
- if (self->notify.udata_indication) {
- struct sk_buff *ctrl_skb;
-
- /* We don't own the skb, so clone it */
- ctrl_skb = skb_clone(skb, GFP_ATOMIC);
- if (!ctrl_skb)
- return;
-
- /* Remove data channel from control channel */
- skb_trim(ctrl_skb, clen+1);
-
- self->notify.udata_indication(self->notify.instance, self,
- ctrl_skb);
-
- /* Drop reference count -
- * see ircomm_tty_control_indication(). */
- dev_kfree_skb(ctrl_skb);
- } else {
- pr_debug("%s(), missing handler\n", __func__);
- }
-}
-
-/*
- * Function ircomm_disconnect_request (self, userdata, priority)
- *
- * User layer wants to disconnect the IrCOMM connection
- *
- */
-int ircomm_disconnect_request(struct ircomm_cb *self, struct sk_buff *userdata)
-{
- struct ircomm_info info;
- int ret;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
-
- ret = ircomm_do_event(self, IRCOMM_DISCONNECT_REQUEST, userdata,
- &info);
- return ret;
-}
-
-EXPORT_SYMBOL(ircomm_disconnect_request);
-
-/*
- * Function disconnect_indication (self, skb)
- *
- * Tell user that the link has been disconnected
- *
- */
-void ircomm_disconnect_indication(struct ircomm_cb *self, struct sk_buff *skb,
- struct ircomm_info *info)
-{
- IRDA_ASSERT(info != NULL, return;);
-
- if (self->notify.disconnect_indication) {
- self->notify.disconnect_indication(self->notify.instance, self,
- info->reason, skb);
- } else {
- pr_debug("%s(), missing handler\n", __func__);
- }
-}
-
-/*
- * Function ircomm_flow_request (self, flow)
- *
- *
- *
- */
-void ircomm_flow_request(struct ircomm_cb *self, LOCAL_FLOW flow)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
-
- if (self->service_type == IRCOMM_3_WIRE_RAW)
- return;
-
- irttp_flow_request(self->tsap, flow);
-}
-
-EXPORT_SYMBOL(ircomm_flow_request);
-
-#ifdef CONFIG_PROC_FS
-static void *ircomm_seq_start(struct seq_file *seq, loff_t *pos)
-{
- struct ircomm_cb *self;
- loff_t off = 0;
-
- spin_lock_irq(&ircomm->hb_spinlock);
-
- for (self = (struct ircomm_cb *) hashbin_get_first(ircomm);
- self != NULL;
- self = (struct ircomm_cb *) hashbin_get_next(ircomm)) {
- if (off++ == *pos)
- break;
-
- }
- return self;
-}
-
-static void *ircomm_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- ++*pos;
-
- return (void *) hashbin_get_next(ircomm);
-}
-
-static void ircomm_seq_stop(struct seq_file *seq, void *v)
-{
- spin_unlock_irq(&ircomm->hb_spinlock);
-}
-
-static int ircomm_seq_show(struct seq_file *seq, void *v)
-{
- const struct ircomm_cb *self = v;
-
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EINVAL; );
-
- if(self->line < 0x10)
- seq_printf(seq, "ircomm%d", self->line);
- else
- seq_printf(seq, "irlpt%d", self->line - 0x10);
-
- seq_printf(seq,
- " state: %s, slsap_sel: %#02x, dlsap_sel: %#02x, mode:",
- ircomm_state[ self->state],
- self->slsap_sel, self->dlsap_sel);
-
- if(self->service_type & IRCOMM_3_WIRE_RAW)
- seq_printf(seq, " 3-wire-raw");
- if(self->service_type & IRCOMM_3_WIRE)
- seq_printf(seq, " 3-wire");
- if(self->service_type & IRCOMM_9_WIRE)
- seq_printf(seq, " 9-wire");
- if(self->service_type & IRCOMM_CENTRONICS)
- seq_printf(seq, " Centronics");
- seq_putc(seq, '\n');
-
- return 0;
-}
-
-static const struct seq_operations ircomm_seq_ops = {
- .start = ircomm_seq_start,
- .next = ircomm_seq_next,
- .stop = ircomm_seq_stop,
- .show = ircomm_seq_show,
-};
-
-static int ircomm_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ircomm_seq_ops);
-}
-#endif /* CONFIG_PROC_FS */
-
-MODULE_AUTHOR("Dag Brattli <dag@brattli.net>");
-MODULE_DESCRIPTION("IrCOMM protocol");
-MODULE_LICENSE("GPL");
-
-module_init(ircomm_init);
-module_exit(ircomm_cleanup);
diff --git a/net/irda/ircomm/ircomm_event.c b/net/irda/ircomm/ircomm_event.c
deleted file mode 100644
index b0730ac9f388..000000000000
--- a/net/irda/ircomm/ircomm_event.c
+++ /dev/null
@@ -1,246 +0,0 @@
-/*********************************************************************
- *
- * Filename: ircomm_event.c
- * Version: 1.0
- * Description: IrCOMM layer state machine
- * Status: Stable
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sun Jun 6 20:33:11 1999
- * Modified at: Sun Dec 12 13:44:32 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irttp.h>
-#include <net/irda/irias_object.h>
-
-#include <net/irda/ircomm_core.h>
-#include <net/irda/ircomm_event.h>
-
-static int ircomm_state_idle(struct ircomm_cb *self, IRCOMM_EVENT event,
- struct sk_buff *skb, struct ircomm_info *info);
-static int ircomm_state_waiti(struct ircomm_cb *self, IRCOMM_EVENT event,
- struct sk_buff *skb, struct ircomm_info *info);
-static int ircomm_state_waitr(struct ircomm_cb *self, IRCOMM_EVENT event,
- struct sk_buff *skb, struct ircomm_info *info);
-static int ircomm_state_conn(struct ircomm_cb *self, IRCOMM_EVENT event,
- struct sk_buff *skb, struct ircomm_info *info);
-
-const char *const ircomm_state[] = {
- "IRCOMM_IDLE",
- "IRCOMM_WAITI",
- "IRCOMM_WAITR",
- "IRCOMM_CONN",
-};
-
-static const char *const ircomm_event[] __maybe_unused = {
- "IRCOMM_CONNECT_REQUEST",
- "IRCOMM_CONNECT_RESPONSE",
- "IRCOMM_TTP_CONNECT_INDICATION",
- "IRCOMM_LMP_CONNECT_INDICATION",
- "IRCOMM_TTP_CONNECT_CONFIRM",
- "IRCOMM_LMP_CONNECT_CONFIRM",
-
- "IRCOMM_LMP_DISCONNECT_INDICATION",
- "IRCOMM_TTP_DISCONNECT_INDICATION",
- "IRCOMM_DISCONNECT_REQUEST",
-
- "IRCOMM_TTP_DATA_INDICATION",
- "IRCOMM_LMP_DATA_INDICATION",
- "IRCOMM_DATA_REQUEST",
- "IRCOMM_CONTROL_REQUEST",
- "IRCOMM_CONTROL_INDICATION",
-};
-
-static int (*state[])(struct ircomm_cb *self, IRCOMM_EVENT event,
- struct sk_buff *skb, struct ircomm_info *info) =
-{
- ircomm_state_idle,
- ircomm_state_waiti,
- ircomm_state_waitr,
- ircomm_state_conn,
-};
-
-/*
- * Function ircomm_state_idle (self, event, skb)
- *
- * IrCOMM is currently idle
- *
- */
-static int ircomm_state_idle(struct ircomm_cb *self, IRCOMM_EVENT event,
- struct sk_buff *skb, struct ircomm_info *info)
-{
- int ret = 0;
-
- switch (event) {
- case IRCOMM_CONNECT_REQUEST:
- ircomm_next_state(self, IRCOMM_WAITI);
- ret = self->issue.connect_request(self, skb, info);
- break;
- case IRCOMM_TTP_CONNECT_INDICATION:
- case IRCOMM_LMP_CONNECT_INDICATION:
- ircomm_next_state(self, IRCOMM_WAITR);
- ircomm_connect_indication(self, skb, info);
- break;
- default:
- pr_debug("%s(), unknown event: %s\n", __func__ ,
- ircomm_event[event]);
- ret = -EINVAL;
- }
- return ret;
-}
-
-/*
- * Function ircomm_state_waiti (self, event, skb)
- *
- * The IrCOMM user has requested an IrCOMM connection to the remote
- * device and is awaiting confirmation
- */
-static int ircomm_state_waiti(struct ircomm_cb *self, IRCOMM_EVENT event,
- struct sk_buff *skb, struct ircomm_info *info)
-{
- int ret = 0;
-
- switch (event) {
- case IRCOMM_TTP_CONNECT_CONFIRM:
- case IRCOMM_LMP_CONNECT_CONFIRM:
- ircomm_next_state(self, IRCOMM_CONN);
- ircomm_connect_confirm(self, skb, info);
- break;
- case IRCOMM_TTP_DISCONNECT_INDICATION:
- case IRCOMM_LMP_DISCONNECT_INDICATION:
- ircomm_next_state(self, IRCOMM_IDLE);
- ircomm_disconnect_indication(self, skb, info);
- break;
- default:
- pr_debug("%s(), unknown event: %s\n", __func__ ,
- ircomm_event[event]);
- ret = -EINVAL;
- }
- return ret;
-}
-
-/*
- * Function ircomm_state_waitr (self, event, skb)
- *
- * IrCOMM has received an incoming connection request and is awaiting
- * response from the user
- */
-static int ircomm_state_waitr(struct ircomm_cb *self, IRCOMM_EVENT event,
- struct sk_buff *skb, struct ircomm_info *info)
-{
- int ret = 0;
-
- switch (event) {
- case IRCOMM_CONNECT_RESPONSE:
- ircomm_next_state(self, IRCOMM_CONN);
- ret = self->issue.connect_response(self, skb);
- break;
- case IRCOMM_DISCONNECT_REQUEST:
- ircomm_next_state(self, IRCOMM_IDLE);
- ret = self->issue.disconnect_request(self, skb, info);
- break;
- case IRCOMM_TTP_DISCONNECT_INDICATION:
- case IRCOMM_LMP_DISCONNECT_INDICATION:
- ircomm_next_state(self, IRCOMM_IDLE);
- ircomm_disconnect_indication(self, skb, info);
- break;
- default:
- pr_debug("%s(), unknown event = %s\n", __func__ ,
- ircomm_event[event]);
- ret = -EINVAL;
- }
- return ret;
-}
-
-/*
- * Function ircomm_state_conn (self, event, skb)
- *
- * IrCOMM is connected to the peer IrCOMM device
- *
- */
-static int ircomm_state_conn(struct ircomm_cb *self, IRCOMM_EVENT event,
- struct sk_buff *skb, struct ircomm_info *info)
-{
- int ret = 0;
-
- switch (event) {
- case IRCOMM_DATA_REQUEST:
- ret = self->issue.data_request(self, skb, 0);
- break;
- case IRCOMM_TTP_DATA_INDICATION:
- ircomm_process_data(self, skb);
- break;
- case IRCOMM_LMP_DATA_INDICATION:
- ircomm_data_indication(self, skb);
- break;
- case IRCOMM_CONTROL_REQUEST:
- /* Just send a separate frame for now */
- ret = self->issue.data_request(self, skb, skb->len);
- break;
- case IRCOMM_TTP_DISCONNECT_INDICATION:
- case IRCOMM_LMP_DISCONNECT_INDICATION:
- ircomm_next_state(self, IRCOMM_IDLE);
- ircomm_disconnect_indication(self, skb, info);
- break;
- case IRCOMM_DISCONNECT_REQUEST:
- ircomm_next_state(self, IRCOMM_IDLE);
- ret = self->issue.disconnect_request(self, skb, info);
- break;
- default:
- pr_debug("%s(), unknown event = %s\n", __func__ ,
- ircomm_event[event]);
- ret = -EINVAL;
- }
- return ret;
-}
-
-/*
- * Function ircomm_do_event (self, event, skb)
- *
- * Process event
- *
- */
-int ircomm_do_event(struct ircomm_cb *self, IRCOMM_EVENT event,
- struct sk_buff *skb, struct ircomm_info *info)
-{
- pr_debug("%s: state=%s, event=%s\n", __func__ ,
- ircomm_state[self->state], ircomm_event[event]);
-
- return (*state[self->state])(self, event, skb, info);
-}
-
-/*
- * Function ircomm_next_state (self, state)
- *
- * Switch state
- *
- */
-void ircomm_next_state(struct ircomm_cb *self, IRCOMM_STATE state)
-{
- self->state = state;
-
- pr_debug("%s: next state=%s, service type=%d\n", __func__ ,
- ircomm_state[self->state], self->service_type);
-}
diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c
deleted file mode 100644
index e4cc847bb933..000000000000
--- a/net/irda/ircomm/ircomm_lmp.c
+++ /dev/null
@@ -1,350 +0,0 @@
-/*********************************************************************
- *
- * Filename: ircomm_lmp.c
- * Version: 1.0
- * Description: Interface between IrCOMM and IrLMP
- * Status: Stable
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sun Jun 6 20:48:27 1999
- * Modified at: Sun Dec 12 13:44:17 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- * Sources: Previous IrLPT work by Thomas Davis
- *
- * Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/init.h>
-#include <linux/gfp.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irda_device.h> /* struct irda_skb_cb */
-
-#include <net/irda/ircomm_event.h>
-#include <net/irda/ircomm_lmp.h>
-
-
-/*
- * Function ircomm_lmp_connect_request (self, userdata)
- *
- *
- *
- */
-static int ircomm_lmp_connect_request(struct ircomm_cb *self,
- struct sk_buff *userdata,
- struct ircomm_info *info)
-{
- int ret = 0;
-
- /* Don't forget to refcount it - should be NULL anyway */
- if(userdata)
- skb_get(userdata);
-
- ret = irlmp_connect_request(self->lsap, info->dlsap_sel,
- info->saddr, info->daddr, NULL, userdata);
- return ret;
-}
-
-/*
- * Function ircomm_lmp_connect_response (self, skb)
- *
- *
- *
- */
-static int ircomm_lmp_connect_response(struct ircomm_cb *self,
- struct sk_buff *userdata)
-{
- struct sk_buff *tx_skb;
-
- /* Any userdata supplied? */
- if (userdata == NULL) {
- tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
- if (!tx_skb)
- return -ENOMEM;
-
- /* Reserve space for MUX and LAP header */
- skb_reserve(tx_skb, LMP_MAX_HEADER);
- } else {
- /*
- * Check that the client has reserved enough space for
- * headers
- */
- IRDA_ASSERT(skb_headroom(userdata) >= LMP_MAX_HEADER,
- return -1;);
-
- /* Don't forget to refcount it - should be NULL anyway */
- skb_get(userdata);
- tx_skb = userdata;
- }
-
- return irlmp_connect_response(self->lsap, tx_skb);
-}
-
-static int ircomm_lmp_disconnect_request(struct ircomm_cb *self,
- struct sk_buff *userdata,
- struct ircomm_info *info)
-{
- struct sk_buff *tx_skb;
- int ret;
-
- if (!userdata) {
- tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
- if (!tx_skb)
- return -ENOMEM;
-
- /* Reserve space for MUX and LAP header */
- skb_reserve(tx_skb, LMP_MAX_HEADER);
- userdata = tx_skb;
- } else {
- /* Don't forget to refcount it - should be NULL anyway */
- skb_get(userdata);
- }
-
- ret = irlmp_disconnect_request(self->lsap, userdata);
-
- return ret;
-}
-
-/*
- * Function ircomm_lmp_flow_control (skb)
- *
- * This function is called when a data frame we have sent to IrLAP has
- * been deallocated. We do this to make sure we don't flood IrLAP with
- * frames, since we are not using the IrTTP flow control mechanism
- */
-static void ircomm_lmp_flow_control(struct sk_buff *skb)
-{
- struct irda_skb_cb *cb;
- struct ircomm_cb *self;
- int line;
-
- IRDA_ASSERT(skb != NULL, return;);
-
- cb = (struct irda_skb_cb *) skb->cb;
-
- line = cb->line;
-
- self = (struct ircomm_cb *) hashbin_lock_find(ircomm, line, NULL);
- if (!self) {
- pr_debug("%s(), didn't find myself\n", __func__);
- return;
- }
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
-
- self->pkt_count--;
-
- if ((self->pkt_count < 2) && (self->flow_status == FLOW_STOP)) {
- pr_debug("%s(), asking TTY to start again!\n", __func__);
- self->flow_status = FLOW_START;
- if (self->notify.flow_indication)
- self->notify.flow_indication(self->notify.instance,
- self, FLOW_START);
- }
-}
-
-/*
- * Function ircomm_lmp_data_request (self, userdata)
- *
- * Send data frame to peer device
- *
- */
-static int ircomm_lmp_data_request(struct ircomm_cb *self,
- struct sk_buff *skb,
- int not_used)
-{
- struct irda_skb_cb *cb;
- int ret;
-
- IRDA_ASSERT(skb != NULL, return -1;);
-
- cb = (struct irda_skb_cb *) skb->cb;
-
- cb->line = self->line;
-
- pr_debug("%s(), sending frame\n", __func__);
-
- /* Don't forget to refcount it - see ircomm_tty_do_softint() */
- skb_get(skb);
-
- skb_orphan(skb);
- skb->destructor = ircomm_lmp_flow_control;
-
- if ((self->pkt_count++ > 7) && (self->flow_status == FLOW_START)) {
- pr_debug("%s(), asking TTY to slow down!\n", __func__);
- self->flow_status = FLOW_STOP;
- if (self->notify.flow_indication)
- self->notify.flow_indication(self->notify.instance,
- self, FLOW_STOP);
- }
- ret = irlmp_data_request(self->lsap, skb);
- if (ret) {
- net_err_ratelimited("%s(), failed\n", __func__);
- /* irlmp_data_request already free the packet */
- }
-
- return ret;
-}
-
-/*
- * Function ircomm_lmp_data_indication (instance, sap, skb)
- *
- * Incoming data which we must deliver to the state machine, to check
- * we are still connected.
- */
-static int ircomm_lmp_data_indication(void *instance, void *sap,
- struct sk_buff *skb)
-{
- struct ircomm_cb *self = (struct ircomm_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
- IRDA_ASSERT(skb != NULL, return -1;);
-
- ircomm_do_event(self, IRCOMM_LMP_DATA_INDICATION, skb, NULL);
-
- /* Drop reference count - see ircomm_tty_data_indication(). */
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-/*
- * Function ircomm_lmp_connect_confirm (instance, sap, qos, max_sdu_size,
- * max_header_size, skb)
- *
- * Connection has been confirmed by peer device
- *
- */
-static void ircomm_lmp_connect_confirm(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_seg_size,
- __u8 max_header_size,
- struct sk_buff *skb)
-{
- struct ircomm_cb *self = (struct ircomm_cb *) instance;
- struct ircomm_info info;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
- IRDA_ASSERT(qos != NULL, return;);
-
- info.max_data_size = max_seg_size;
- info.max_header_size = max_header_size;
- info.qos = qos;
-
- ircomm_do_event(self, IRCOMM_LMP_CONNECT_CONFIRM, skb, &info);
-
- /* Drop reference count - see ircomm_tty_connect_confirm(). */
- dev_kfree_skb(skb);
-}
-
-/*
- * Function ircomm_lmp_connect_indication (instance, sap, qos, max_sdu_size,
- * max_header_size, skb)
- *
- * Peer device wants to make a connection with us
- *
- */
-static void ircomm_lmp_connect_indication(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_seg_size,
- __u8 max_header_size,
- struct sk_buff *skb)
-{
- struct ircomm_cb *self = (struct ircomm_cb *)instance;
- struct ircomm_info info;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
- IRDA_ASSERT(qos != NULL, return;);
-
- info.max_data_size = max_seg_size;
- info.max_header_size = max_header_size;
- info.qos = qos;
-
- ircomm_do_event(self, IRCOMM_LMP_CONNECT_INDICATION, skb, &info);
-
- /* Drop reference count - see ircomm_tty_connect_indication(). */
- dev_kfree_skb(skb);
-}
-
-/*
- * Function ircomm_lmp_disconnect_indication (instance, sap, reason, skb)
- *
- * Peer device has closed the connection, or the link went down for some
- * other reason
- */
-static void ircomm_lmp_disconnect_indication(void *instance, void *sap,
- LM_REASON reason,
- struct sk_buff *skb)
-{
- struct ircomm_cb *self = (struct ircomm_cb *) instance;
- struct ircomm_info info;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
-
- info.reason = reason;
-
- ircomm_do_event(self, IRCOMM_LMP_DISCONNECT_INDICATION, skb, &info);
-
- /* Drop reference count - see ircomm_tty_disconnect_indication(). */
- if(skb)
- dev_kfree_skb(skb);
-}
-/*
- * Function ircomm_open_lsap (self)
- *
- * Open LSAP. This function will only be used when using "raw" services
- *
- */
-int ircomm_open_lsap(struct ircomm_cb *self)
-{
- notify_t notify;
-
- /* Register callbacks */
- irda_notify_init(&notify);
- notify.data_indication = ircomm_lmp_data_indication;
- notify.connect_confirm = ircomm_lmp_connect_confirm;
- notify.connect_indication = ircomm_lmp_connect_indication;
- notify.disconnect_indication = ircomm_lmp_disconnect_indication;
- notify.instance = self;
- strlcpy(notify.name, "IrCOMM", sizeof(notify.name));
-
- self->lsap = irlmp_open_lsap(LSAP_ANY, &notify, 0);
- if (!self->lsap) {
- pr_debug("%sfailed to allocate tsap\n", __func__);
- return -1;
- }
- self->slsap_sel = self->lsap->slsap_sel;
-
- /*
- * Initialize the call-table for issuing commands
- */
- self->issue.data_request = ircomm_lmp_data_request;
- self->issue.connect_request = ircomm_lmp_connect_request;
- self->issue.connect_response = ircomm_lmp_connect_response;
- self->issue.disconnect_request = ircomm_lmp_disconnect_request;
-
- return 0;
-}
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
deleted file mode 100644
index 5728e76ca6d5..000000000000
--- a/net/irda/ircomm/ircomm_param.c
+++ /dev/null
@@ -1,501 +0,0 @@
-/*********************************************************************
- *
- * Filename: ircomm_param.c
- * Version: 1.0
- * Description: Parameter handling for the IrCOMM protocol
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Mon Jun 7 10:25:11 1999
- * Modified at: Sun Jan 30 14:32:03 2000
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/gfp.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/parameters.h>
-
-#include <net/irda/ircomm_core.h>
-#include <net/irda/ircomm_tty_attach.h>
-#include <net/irda/ircomm_tty.h>
-
-#include <net/irda/ircomm_param.h>
-
-static int ircomm_param_service_type(void *instance, irda_param_t *param,
- int get);
-static int ircomm_param_port_type(void *instance, irda_param_t *param,
- int get);
-static int ircomm_param_port_name(void *instance, irda_param_t *param,
- int get);
-static int ircomm_param_service_type(void *instance, irda_param_t *param,
- int get);
-static int ircomm_param_data_rate(void *instance, irda_param_t *param,
- int get);
-static int ircomm_param_data_format(void *instance, irda_param_t *param,
- int get);
-static int ircomm_param_flow_control(void *instance, irda_param_t *param,
- int get);
-static int ircomm_param_xon_xoff(void *instance, irda_param_t *param, int get);
-static int ircomm_param_enq_ack(void *instance, irda_param_t *param, int get);
-static int ircomm_param_line_status(void *instance, irda_param_t *param,
- int get);
-static int ircomm_param_dte(void *instance, irda_param_t *param, int get);
-static int ircomm_param_dce(void *instance, irda_param_t *param, int get);
-static int ircomm_param_poll(void *instance, irda_param_t *param, int get);
-
-static const pi_minor_info_t pi_minor_call_table_common[] = {
- { ircomm_param_service_type, PV_INT_8_BITS },
- { ircomm_param_port_type, PV_INT_8_BITS },
- { ircomm_param_port_name, PV_STRING }
-};
-static const pi_minor_info_t pi_minor_call_table_non_raw[] = {
- { ircomm_param_data_rate, PV_INT_32_BITS | PV_BIG_ENDIAN },
- { ircomm_param_data_format, PV_INT_8_BITS },
- { ircomm_param_flow_control, PV_INT_8_BITS },
- { ircomm_param_xon_xoff, PV_INT_16_BITS },
- { ircomm_param_enq_ack, PV_INT_16_BITS },
- { ircomm_param_line_status, PV_INT_8_BITS }
-};
-static const pi_minor_info_t pi_minor_call_table_9_wire[] = {
- { ircomm_param_dte, PV_INT_8_BITS },
- { ircomm_param_dce, PV_INT_8_BITS },
- { ircomm_param_poll, PV_NO_VALUE },
-};
-
-static const pi_major_info_t pi_major_call_table[] = {
- { pi_minor_call_table_common, 3 },
- { pi_minor_call_table_non_raw, 6 },
- { pi_minor_call_table_9_wire, 3 }
-/* { pi_minor_call_table_centronics } */
-};
-
-pi_param_info_t ircomm_param_info = { pi_major_call_table, 3, 0x0f, 4 };
-
-/*
- * Function ircomm_param_request (self, pi, flush)
- *
- * Queue a parameter for the control channel
- *
- */
-int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
-{
- unsigned long flags;
- struct sk_buff *skb;
- int count;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- /* Make sure we don't send parameters for raw mode */
- if (self->service_type == IRCOMM_3_WIRE_RAW)
- return 0;
-
- spin_lock_irqsave(&self->spinlock, flags);
-
- skb = self->ctrl_skb;
- if (!skb) {
- skb = alloc_skb(256, GFP_ATOMIC);
- if (!skb) {
- spin_unlock_irqrestore(&self->spinlock, flags);
- return -ENOMEM;
- }
-
- skb_reserve(skb, self->max_header_size);
- self->ctrl_skb = skb;
- }
- /*
- * Inserting is a little bit tricky since we don't know how much
- * room we will need. But this should hopefully work OK
- */
- count = irda_param_insert(self, pi, skb_tail_pointer(skb),
- skb_tailroom(skb), &ircomm_param_info);
- if (count < 0) {
- net_warn_ratelimited("%s(), no room for parameter!\n",
- __func__);
- spin_unlock_irqrestore(&self->spinlock, flags);
- return -1;
- }
- skb_put(skb, count);
- pr_debug("%s(), skb->len=%d\n", __func__, skb->len);
-
- spin_unlock_irqrestore(&self->spinlock, flags);
-
- if (flush) {
- /* ircomm_tty_do_softint will take care of the rest */
- schedule_work(&self->tqueue);
- }
-
- return count;
-}
-
-/*
- * Function ircomm_param_service_type (self, buf, len)
- *
- * Handle service type, this function will both be called after the LM-IAS
- * query and then the remote device sends its initial parameters
- *
- */
-static int ircomm_param_service_type(void *instance, irda_param_t *param,
- int get)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
- __u8 service_type = (__u8) param->pv.i;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- if (get) {
- param->pv.i = self->settings.service_type;
- return 0;
- }
-
- /* Find all common service types */
- service_type &= self->service_type;
- if (!service_type) {
- pr_debug("%s(), No common service type to use!\n", __func__);
- return -1;
- }
- pr_debug("%s(), services in common=%02x\n", __func__ ,
- service_type);
-
- /*
- * Now choose a preferred service type of those available
- */
- if (service_type & IRCOMM_CENTRONICS)
- self->settings.service_type = IRCOMM_CENTRONICS;
- else if (service_type & IRCOMM_9_WIRE)
- self->settings.service_type = IRCOMM_9_WIRE;
- else if (service_type & IRCOMM_3_WIRE)
- self->settings.service_type = IRCOMM_3_WIRE;
- else if (service_type & IRCOMM_3_WIRE_RAW)
- self->settings.service_type = IRCOMM_3_WIRE_RAW;
-
- pr_debug("%s(), resulting service type=0x%02x\n", __func__ ,
- self->settings.service_type);
-
- /*
- * Now the line is ready for some communication. Check if we are a
- * server, and send over some initial parameters.
- * Client do it in ircomm_tty_state_setup().
- * Note : we may get called from ircomm_tty_getvalue_confirm(),
- * therefore before we even have open any socket. And self->client
- * is initialised to TRUE only later. So, we check if the link is
- * really initialised. - Jean II
- */
- if ((self->max_header_size != IRCOMM_TTY_HDR_UNINITIALISED) &&
- (!self->client) &&
- (self->settings.service_type != IRCOMM_3_WIRE_RAW))
- {
- /* Init connection */
- ircomm_tty_send_initial_parameters(self);
- ircomm_tty_link_established(self);
- }
-
- return 0;
-}
-
-/*
- * Function ircomm_param_port_type (self, param)
- *
- * The port type parameter tells if the devices are serial or parallel.
- * Since we only advertise serial service, this parameter should only
- * be equal to IRCOMM_SERIAL.
- */
-static int ircomm_param_port_type(void *instance, irda_param_t *param, int get)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- if (get)
- param->pv.i = IRCOMM_SERIAL;
- else {
- self->settings.port_type = (__u8) param->pv.i;
-
- pr_debug("%s(), port type=%d\n", __func__ ,
- self->settings.port_type);
- }
- return 0;
-}
-
-/*
- * Function ircomm_param_port_name (self, param)
- *
- * Exchange port name
- *
- */
-static int ircomm_param_port_name(void *instance, irda_param_t *param, int get)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- if (get) {
- pr_debug("%s(), not imp!\n", __func__);
- } else {
- pr_debug("%s(), port-name=%s\n", __func__ , param->pv.c);
- strncpy(self->settings.port_name, param->pv.c, 32);
- }
-
- return 0;
-}
-
-/*
- * Function ircomm_param_data_rate (self, param)
- *
- * Exchange data rate to be used in this settings
- *
- */
-static int ircomm_param_data_rate(void *instance, irda_param_t *param, int get)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- if (get)
- param->pv.i = self->settings.data_rate;
- else
- self->settings.data_rate = param->pv.i;
-
- pr_debug("%s(), data rate = %d\n", __func__ , param->pv.i);
-
- return 0;
-}
-
-/*
- * Function ircomm_param_data_format (self, param)
- *
- * Exchange data format to be used in this settings
- *
- */
-static int ircomm_param_data_format(void *instance, irda_param_t *param,
- int get)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- if (get)
- param->pv.i = self->settings.data_format;
- else
- self->settings.data_format = (__u8) param->pv.i;
-
- return 0;
-}
-
-/*
- * Function ircomm_param_flow_control (self, param)
- *
- * Exchange flow control settings to be used in this settings
- *
- */
-static int ircomm_param_flow_control(void *instance, irda_param_t *param,
- int get)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- if (get)
- param->pv.i = self->settings.flow_control;
- else
- self->settings.flow_control = (__u8) param->pv.i;
-
- pr_debug("%s(), flow control = 0x%02x\n", __func__ , (__u8)param->pv.i);
-
- return 0;
-}
-
-/*
- * Function ircomm_param_xon_xoff (self, param)
- *
- * Exchange XON/XOFF characters
- *
- */
-static int ircomm_param_xon_xoff(void *instance, irda_param_t *param, int get)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- if (get) {
- param->pv.i = self->settings.xonxoff[0];
- param->pv.i |= self->settings.xonxoff[1] << 8;
- } else {
- self->settings.xonxoff[0] = (__u16) param->pv.i & 0xff;
- self->settings.xonxoff[1] = (__u16) param->pv.i >> 8;
- }
-
- pr_debug("%s(), XON/XOFF = 0x%02x,0x%02x\n", __func__ ,
- param->pv.i & 0xff, param->pv.i >> 8);
-
- return 0;
-}
-
-/*
- * Function ircomm_param_enq_ack (self, param)
- *
- * Exchange ENQ/ACK characters
- *
- */
-static int ircomm_param_enq_ack(void *instance, irda_param_t *param, int get)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- if (get) {
- param->pv.i = self->settings.enqack[0];
- param->pv.i |= self->settings.enqack[1] << 8;
- } else {
- self->settings.enqack[0] = (__u16) param->pv.i & 0xff;
- self->settings.enqack[1] = (__u16) param->pv.i >> 8;
- }
-
- pr_debug("%s(), ENQ/ACK = 0x%02x,0x%02x\n", __func__ ,
- param->pv.i & 0xff, param->pv.i >> 8);
-
- return 0;
-}
-
-/*
- * Function ircomm_param_line_status (self, param)
- *
- *
- *
- */
-static int ircomm_param_line_status(void *instance, irda_param_t *param,
- int get)
-{
- pr_debug("%s(), not impl.\n", __func__);
-
- return 0;
-}
-
-/*
- * Function ircomm_param_dte (instance, param)
- *
- * If we get here, there must be some sort of null-modem connection, and
- * we are probably working in server mode as well.
- */
-static int ircomm_param_dte(void *instance, irda_param_t *param, int get)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
- __u8 dte;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- if (get)
- param->pv.i = self->settings.dte;
- else {
- dte = (__u8) param->pv.i;
-
- self->settings.dce = 0;
-
- if (dte & IRCOMM_DELTA_DTR)
- self->settings.dce |= (IRCOMM_DELTA_DSR|
- IRCOMM_DELTA_RI |
- IRCOMM_DELTA_CD);
- if (dte & IRCOMM_DTR)
- self->settings.dce |= (IRCOMM_DSR|
- IRCOMM_RI |
- IRCOMM_CD);
-
- if (dte & IRCOMM_DELTA_RTS)
- self->settings.dce |= IRCOMM_DELTA_CTS;
- if (dte & IRCOMM_RTS)
- self->settings.dce |= IRCOMM_CTS;
-
- /* Take appropriate actions */
- ircomm_tty_check_modem_status(self);
-
- /* Null modem cable emulator */
- self->settings.null_modem = TRUE;
- }
-
- return 0;
-}
-
-/*
- * Function ircomm_param_dce (instance, param)
- *
- *
- *
- */
-static int ircomm_param_dce(void *instance, irda_param_t *param, int get)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
- __u8 dce;
-
- pr_debug("%s(), dce = 0x%02x\n", __func__ , (__u8)param->pv.i);
-
- dce = (__u8) param->pv.i;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- self->settings.dce = dce;
-
- /* Check if any of the settings have changed */
- if (dce & 0x0f) {
- if (dce & IRCOMM_DELTA_CTS) {
- pr_debug("%s(), CTS\n", __func__);
- }
- }
-
- ircomm_tty_check_modem_status(self);
-
- return 0;
-}
-
-/*
- * Function ircomm_param_poll (instance, param)
- *
- * Called when the peer device is polling for the line settings
- *
- */
-static int ircomm_param_poll(void *instance, irda_param_t *param, int get)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- /* Poll parameters are always of length 0 (just a signal) */
- if (!get) {
- /* Respond with DTE line settings */
- ircomm_param_request(self, IRCOMM_DTE, TRUE);
- }
- return 0;
-}
-
-
-
-
-
diff --git a/net/irda/ircomm/ircomm_ttp.c b/net/irda/ircomm/ircomm_ttp.c
deleted file mode 100644
index 4b81e0934770..000000000000
--- a/net/irda/ircomm/ircomm_ttp.c
+++ /dev/null
@@ -1,350 +0,0 @@
-/*********************************************************************
- *
- * Filename: ircomm_ttp.c
- * Version: 1.0
- * Description: Interface between IrCOMM and IrTTP
- * Status: Stable
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sun Jun 6 20:48:27 1999
- * Modified at: Mon Dec 13 11:35:13 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/init.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irttp.h>
-
-#include <net/irda/ircomm_event.h>
-#include <net/irda/ircomm_ttp.h>
-
-static int ircomm_ttp_data_indication(void *instance, void *sap,
- struct sk_buff *skb);
-static void ircomm_ttp_connect_confirm(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_sdu_size,
- __u8 max_header_size,
- struct sk_buff *skb);
-static void ircomm_ttp_connect_indication(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_sdu_size,
- __u8 max_header_size,
- struct sk_buff *skb);
-static void ircomm_ttp_flow_indication(void *instance, void *sap,
- LOCAL_FLOW cmd);
-static void ircomm_ttp_disconnect_indication(void *instance, void *sap,
- LM_REASON reason,
- struct sk_buff *skb);
-static int ircomm_ttp_data_request(struct ircomm_cb *self,
- struct sk_buff *skb,
- int clen);
-static int ircomm_ttp_connect_request(struct ircomm_cb *self,
- struct sk_buff *userdata,
- struct ircomm_info *info);
-static int ircomm_ttp_connect_response(struct ircomm_cb *self,
- struct sk_buff *userdata);
-static int ircomm_ttp_disconnect_request(struct ircomm_cb *self,
- struct sk_buff *userdata,
- struct ircomm_info *info);
-
-/*
- * Function ircomm_open_tsap (self)
- *
- *
- *
- */
-int ircomm_open_tsap(struct ircomm_cb *self)
-{
- notify_t notify;
-
- /* Register callbacks */
- irda_notify_init(&notify);
- notify.data_indication = ircomm_ttp_data_indication;
- notify.connect_confirm = ircomm_ttp_connect_confirm;
- notify.connect_indication = ircomm_ttp_connect_indication;
- notify.flow_indication = ircomm_ttp_flow_indication;
- notify.disconnect_indication = ircomm_ttp_disconnect_indication;
- notify.instance = self;
- strlcpy(notify.name, "IrCOMM", sizeof(notify.name));
-
- self->tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT,
- &notify);
- if (!self->tsap) {
- pr_debug("%sfailed to allocate tsap\n", __func__);
- return -1;
- }
- self->slsap_sel = self->tsap->stsap_sel;
-
- /*
- * Initialize the call-table for issuing commands
- */
- self->issue.data_request = ircomm_ttp_data_request;
- self->issue.connect_request = ircomm_ttp_connect_request;
- self->issue.connect_response = ircomm_ttp_connect_response;
- self->issue.disconnect_request = ircomm_ttp_disconnect_request;
-
- return 0;
-}
-
-/*
- * Function ircomm_ttp_connect_request (self, userdata)
- *
- *
- *
- */
-static int ircomm_ttp_connect_request(struct ircomm_cb *self,
- struct sk_buff *userdata,
- struct ircomm_info *info)
-{
- int ret = 0;
-
- /* Don't forget to refcount it - should be NULL anyway */
- if(userdata)
- skb_get(userdata);
-
- ret = irttp_connect_request(self->tsap, info->dlsap_sel,
- info->saddr, info->daddr, NULL,
- TTP_SAR_DISABLE, userdata);
-
- return ret;
-}
-
-/*
- * Function ircomm_ttp_connect_response (self, skb)
- *
- *
- *
- */
-static int ircomm_ttp_connect_response(struct ircomm_cb *self,
- struct sk_buff *userdata)
-{
- int ret;
-
- /* Don't forget to refcount it - should be NULL anyway */
- if(userdata)
- skb_get(userdata);
-
- ret = irttp_connect_response(self->tsap, TTP_SAR_DISABLE, userdata);
-
- return ret;
-}
-
-/*
- * Function ircomm_ttp_data_request (self, userdata)
- *
- * Send IrCOMM data to IrTTP layer. Currently we do not try to combine
- * control data with pure data, so they will be sent as separate frames.
- * Should not be a big problem though, since control frames are rare. But
- * some of them are sent after connection establishment, so this can
- * increase the latency a bit.
- */
-static int ircomm_ttp_data_request(struct ircomm_cb *self,
- struct sk_buff *skb,
- int clen)
-{
- int ret;
-
- IRDA_ASSERT(skb != NULL, return -1;);
-
- pr_debug("%s(), clen=%d\n", __func__ , clen);
-
- /*
- * Insert clen field, currently we either send data only, or control
- * only frames, to make things easier and avoid queueing
- */
- IRDA_ASSERT(skb_headroom(skb) >= IRCOMM_HEADER_SIZE, return -1;);
-
- /* Don't forget to refcount it - see ircomm_tty_do_softint() */
- skb_get(skb);
-
- skb_push(skb, IRCOMM_HEADER_SIZE);
-
- skb->data[0] = clen;
-
- ret = irttp_data_request(self->tsap, skb);
- if (ret) {
- net_err_ratelimited("%s(), failed\n", __func__);
- /* irttp_data_request already free the packet */
- }
-
- return ret;
-}
-
-/*
- * Function ircomm_ttp_data_indication (instance, sap, skb)
- *
- * Incoming data
- *
- */
-static int ircomm_ttp_data_indication(void *instance, void *sap,
- struct sk_buff *skb)
-{
- struct ircomm_cb *self = (struct ircomm_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
- IRDA_ASSERT(skb != NULL, return -1;);
-
- ircomm_do_event(self, IRCOMM_TTP_DATA_INDICATION, skb, NULL);
-
- /* Drop reference count - see ircomm_tty_data_indication(). */
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-static void ircomm_ttp_connect_confirm(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_sdu_size,
- __u8 max_header_size,
- struct sk_buff *skb)
-{
- struct ircomm_cb *self = (struct ircomm_cb *) instance;
- struct ircomm_info info;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
- IRDA_ASSERT(qos != NULL, goto out;);
-
- if (max_sdu_size != TTP_SAR_DISABLE) {
- net_err_ratelimited("%s(), SAR not allowed for IrCOMM!\n",
- __func__);
- goto out;
- }
-
- info.max_data_size = irttp_get_max_seg_size(self->tsap)
- - IRCOMM_HEADER_SIZE;
- info.max_header_size = max_header_size + IRCOMM_HEADER_SIZE;
- info.qos = qos;
-
- ircomm_do_event(self, IRCOMM_TTP_CONNECT_CONFIRM, skb, &info);
-
-out:
- /* Drop reference count - see ircomm_tty_connect_confirm(). */
- dev_kfree_skb(skb);
-}
-
-/*
- * Function ircomm_ttp_connect_indication (instance, sap, qos, max_sdu_size,
- * max_header_size, skb)
- *
- *
- *
- */
-static void ircomm_ttp_connect_indication(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_sdu_size,
- __u8 max_header_size,
- struct sk_buff *skb)
-{
- struct ircomm_cb *self = (struct ircomm_cb *)instance;
- struct ircomm_info info;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
- IRDA_ASSERT(qos != NULL, goto out;);
-
- if (max_sdu_size != TTP_SAR_DISABLE) {
- net_err_ratelimited("%s(), SAR not allowed for IrCOMM!\n",
- __func__);
- goto out;
- }
-
- info.max_data_size = irttp_get_max_seg_size(self->tsap)
- - IRCOMM_HEADER_SIZE;
- info.max_header_size = max_header_size + IRCOMM_HEADER_SIZE;
- info.qos = qos;
-
- ircomm_do_event(self, IRCOMM_TTP_CONNECT_INDICATION, skb, &info);
-
-out:
- /* Drop reference count - see ircomm_tty_connect_indication(). */
- dev_kfree_skb(skb);
-}
-
-/*
- * Function ircomm_ttp_disconnect_request (self, userdata, info)
- *
- *
- *
- */
-static int ircomm_ttp_disconnect_request(struct ircomm_cb *self,
- struct sk_buff *userdata,
- struct ircomm_info *info)
-{
- int ret;
-
- /* Don't forget to refcount it - should be NULL anyway */
- if(userdata)
- skb_get(userdata);
-
- ret = irttp_disconnect_request(self->tsap, userdata, P_NORMAL);
-
- return ret;
-}
-
-/*
- * Function ircomm_ttp_disconnect_indication (instance, sap, reason, skb)
- *
- *
- *
- */
-static void ircomm_ttp_disconnect_indication(void *instance, void *sap,
- LM_REASON reason,
- struct sk_buff *skb)
-{
- struct ircomm_cb *self = (struct ircomm_cb *) instance;
- struct ircomm_info info;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
-
- info.reason = reason;
-
- ircomm_do_event(self, IRCOMM_TTP_DISCONNECT_INDICATION, skb, &info);
-
- /* Drop reference count - see ircomm_tty_disconnect_indication(). */
- if(skb)
- dev_kfree_skb(skb);
-}
-
-/*
- * Function ircomm_ttp_flow_indication (instance, sap, cmd)
- *
- * Layer below is telling us to start or stop the flow of data
- *
- */
-static void ircomm_ttp_flow_indication(void *instance, void *sap,
- LOCAL_FLOW cmd)
-{
- struct ircomm_cb *self = (struct ircomm_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
-
- if (self->notify.flow_indication)
- self->notify.flow_indication(self->notify.instance, self, cmd);
-}
-
-
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
deleted file mode 100644
index ec157c3419b5..000000000000
--- a/net/irda/ircomm/ircomm_tty.c
+++ /dev/null
@@ -1,1329 +0,0 @@
-/*********************************************************************
- *
- * Filename: ircomm_tty.c
- * Version: 1.0
- * Description: IrCOMM serial TTY driver
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sun Jun 6 21:00:56 1999
- * Modified at: Wed Feb 23 00:09:02 2000
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- * Sources: serial.c and previous IrCOMM work by Takahide Higuchi
- *
- * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/sched/signal.h>
-#include <linux/seq_file.h>
-#include <linux/termios.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
-#include <linux/interrupt.h>
-#include <linux/device.h> /* for MODULE_ALIAS_CHARDEV_MAJOR */
-
-#include <linux/uaccess.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irmod.h>
-
-#include <net/irda/ircomm_core.h>
-#include <net/irda/ircomm_param.h>
-#include <net/irda/ircomm_tty_attach.h>
-#include <net/irda/ircomm_tty.h>
-
-static int ircomm_tty_install(struct tty_driver *driver,
- struct tty_struct *tty);
-static int ircomm_tty_open(struct tty_struct *tty, struct file *filp);
-static void ircomm_tty_close(struct tty_struct * tty, struct file *filp);
-static int ircomm_tty_write(struct tty_struct * tty,
- const unsigned char *buf, int count);
-static int ircomm_tty_write_room(struct tty_struct *tty);
-static void ircomm_tty_throttle(struct tty_struct *tty);
-static void ircomm_tty_unthrottle(struct tty_struct *tty);
-static int ircomm_tty_chars_in_buffer(struct tty_struct *tty);
-static void ircomm_tty_flush_buffer(struct tty_struct *tty);
-static void ircomm_tty_send_xchar(struct tty_struct *tty, char ch);
-static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout);
-static void ircomm_tty_hangup(struct tty_struct *tty);
-static void ircomm_tty_do_softint(struct work_struct *work);
-static void ircomm_tty_shutdown(struct ircomm_tty_cb *self);
-static void ircomm_tty_stop(struct tty_struct *tty);
-
-static int ircomm_tty_data_indication(void *instance, void *sap,
- struct sk_buff *skb);
-static int ircomm_tty_control_indication(void *instance, void *sap,
- struct sk_buff *skb);
-static void ircomm_tty_flow_indication(void *instance, void *sap,
- LOCAL_FLOW cmd);
-#ifdef CONFIG_PROC_FS
-static const struct file_operations ircomm_tty_proc_fops;
-#endif /* CONFIG_PROC_FS */
-static struct tty_driver *driver;
-
-static hashbin_t *ircomm_tty = NULL;
-
-static const struct tty_operations ops = {
- .install = ircomm_tty_install,
- .open = ircomm_tty_open,
- .close = ircomm_tty_close,
- .write = ircomm_tty_write,
- .write_room = ircomm_tty_write_room,
- .chars_in_buffer = ircomm_tty_chars_in_buffer,
- .flush_buffer = ircomm_tty_flush_buffer,
- .ioctl = ircomm_tty_ioctl, /* ircomm_tty_ioctl.c */
- .tiocmget = ircomm_tty_tiocmget, /* ircomm_tty_ioctl.c */
- .tiocmset = ircomm_tty_tiocmset, /* ircomm_tty_ioctl.c */
- .throttle = ircomm_tty_throttle,
- .unthrottle = ircomm_tty_unthrottle,
- .send_xchar = ircomm_tty_send_xchar,
- .set_termios = ircomm_tty_set_termios,
- .stop = ircomm_tty_stop,
- .start = ircomm_tty_start,
- .hangup = ircomm_tty_hangup,
- .wait_until_sent = ircomm_tty_wait_until_sent,
-#ifdef CONFIG_PROC_FS
- .proc_fops = &ircomm_tty_proc_fops,
-#endif /* CONFIG_PROC_FS */
-};
-
-static void ircomm_port_raise_dtr_rts(struct tty_port *port, int raise)
-{
- struct ircomm_tty_cb *self = container_of(port, struct ircomm_tty_cb,
- port);
- /*
- * Here, we use to lock those two guys, but as ircomm_param_request()
- * does it itself, I don't see the point (and I see the deadlock).
- * Jean II
- */
- if (raise)
- self->settings.dte |= IRCOMM_RTS | IRCOMM_DTR;
- else
- self->settings.dte &= ~(IRCOMM_RTS | IRCOMM_DTR);
-
- ircomm_param_request(self, IRCOMM_DTE, TRUE);
-}
-
-static int ircomm_port_carrier_raised(struct tty_port *port)
-{
- struct ircomm_tty_cb *self = container_of(port, struct ircomm_tty_cb,
- port);
- return self->settings.dce & IRCOMM_CD;
-}
-
-static const struct tty_port_operations ircomm_port_ops = {
- .dtr_rts = ircomm_port_raise_dtr_rts,
- .carrier_raised = ircomm_port_carrier_raised,
-};
-
-/*
- * Function ircomm_tty_init()
- *
- * Init IrCOMM TTY layer/driver
- *
- */
-static int __init ircomm_tty_init(void)
-{
- driver = alloc_tty_driver(IRCOMM_TTY_PORTS);
- if (!driver)
- return -ENOMEM;
- ircomm_tty = hashbin_new(HB_LOCK);
- if (ircomm_tty == NULL) {
- net_err_ratelimited("%s(), can't allocate hashbin!\n",
- __func__);
- put_tty_driver(driver);
- return -ENOMEM;
- }
-
- driver->driver_name = "ircomm";
- driver->name = "ircomm";
- driver->major = IRCOMM_TTY_MAJOR;
- driver->minor_start = IRCOMM_TTY_MINOR;
- driver->type = TTY_DRIVER_TYPE_SERIAL;
- driver->subtype = SERIAL_TYPE_NORMAL;
- driver->init_termios = tty_std_termios;
- driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
- driver->flags = TTY_DRIVER_REAL_RAW;
- tty_set_operations(driver, &ops);
- if (tty_register_driver(driver)) {
- net_err_ratelimited("%s(): Couldn't register serial driver\n",
- __func__);
- put_tty_driver(driver);
- return -1;
- }
- return 0;
-}
-
-static void __exit __ircomm_tty_cleanup(struct ircomm_tty_cb *self)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- ircomm_tty_shutdown(self);
-
- self->magic = 0;
- tty_port_destroy(&self->port);
- kfree(self);
-}
-
-/*
- * Function ircomm_tty_cleanup ()
- *
- * Remove IrCOMM TTY layer/driver
- *
- */
-static void __exit ircomm_tty_cleanup(void)
-{
- int ret;
-
- ret = tty_unregister_driver(driver);
- if (ret) {
- net_err_ratelimited("%s(), failed to unregister driver\n",
- __func__);
- return;
- }
-
- hashbin_delete(ircomm_tty, (FREE_FUNC) __ircomm_tty_cleanup);
- put_tty_driver(driver);
-}
-
-/*
- * Function ircomm_startup (self)
- *
- *
- *
- */
-static int ircomm_tty_startup(struct ircomm_tty_cb *self)
-{
- notify_t notify;
- int ret = -ENODEV;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- /* Check if already open */
- if (tty_port_initialized(&self->port)) {
- pr_debug("%s(), already open so break out!\n", __func__);
- return 0;
- }
- tty_port_set_initialized(&self->port, 1);
-
- /* Register with IrCOMM */
- irda_notify_init(&notify);
- /* These callbacks we must handle ourselves */
- notify.data_indication = ircomm_tty_data_indication;
- notify.udata_indication = ircomm_tty_control_indication;
- notify.flow_indication = ircomm_tty_flow_indication;
-
- /* Use the ircomm_tty interface for these ones */
- notify.disconnect_indication = ircomm_tty_disconnect_indication;
- notify.connect_confirm = ircomm_tty_connect_confirm;
- notify.connect_indication = ircomm_tty_connect_indication;
- strlcpy(notify.name, "ircomm_tty", sizeof(notify.name));
- notify.instance = self;
-
- if (!self->ircomm) {
- self->ircomm = ircomm_open(&notify, self->service_type,
- self->line);
- }
- if (!self->ircomm)
- goto err;
-
- self->slsap_sel = self->ircomm->slsap_sel;
-
- /* Connect IrCOMM link with remote device */
- ret = ircomm_tty_attach_cable(self);
- if (ret < 0) {
- net_err_ratelimited("%s(), error attaching cable!\n", __func__);
- goto err;
- }
-
- return 0;
-err:
- tty_port_set_initialized(&self->port, 0);
- return ret;
-}
-
-/*
- * Function ircomm_block_til_ready (self, filp)
- *
- *
- *
- */
-static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
- struct tty_struct *tty, struct file *filp)
-{
- struct tty_port *port = &self->port;
- DECLARE_WAITQUEUE(wait, current);
- int retval;
- int do_clocal = 0;
- unsigned long flags;
-
- /*
- * If non-blocking mode is set, or the port is not enabled,
- * then make the check up front and then exit.
- */
- if (tty_io_error(tty)) {
- tty_port_set_active(port, 1);
- return 0;
- }
-
- if (filp->f_flags & O_NONBLOCK) {
- /* nonblock mode is set */
- if (C_BAUD(tty))
- tty_port_raise_dtr_rts(port);
- tty_port_set_active(port, 1);
- pr_debug("%s(), O_NONBLOCK requested!\n", __func__);
- return 0;
- }
-
- if (C_CLOCAL(tty)) {
- pr_debug("%s(), doing CLOCAL!\n", __func__);
- do_clocal = 1;
- }
-
- /* Wait for carrier detect and the line to become
- * free (i.e., not in use by the callout). While we are in
- * this loop, port->count is dropped by one, so that
- * mgsl_close() knows when to free things. We restore it upon
- * exit, either normal or abnormal.
- */
-
- retval = 0;
- add_wait_queue(&port->open_wait, &wait);
-
- pr_debug("%s(%d):block_til_ready before block on %s open_count=%d\n",
- __FILE__, __LINE__, tty->driver->name, port->count);
-
- spin_lock_irqsave(&port->lock, flags);
- port->count--;
- port->blocked_open++;
- spin_unlock_irqrestore(&port->lock, flags);
-
- while (1) {
- if (C_BAUD(tty) && tty_port_initialized(port))
- tty_port_raise_dtr_rts(port);
-
- set_current_state(TASK_INTERRUPTIBLE);
-
- if (tty_hung_up_p(filp) || !tty_port_initialized(port)) {
- retval = (port->flags & ASYNC_HUP_NOTIFY) ?
- -EAGAIN : -ERESTARTSYS;
- break;
- }
-
- /*
- * Check if link is ready now. Even if CLOCAL is
- * specified, we cannot return before the IrCOMM link is
- * ready
- */
- if ((do_clocal || tty_port_carrier_raised(port)) &&
- self->state == IRCOMM_TTY_READY)
- {
- break;
- }
-
- if (signal_pending(current)) {
- retval = -ERESTARTSYS;
- break;
- }
-
- pr_debug("%s(%d):block_til_ready blocking on %s open_count=%d\n",
- __FILE__, __LINE__, tty->driver->name, port->count);
-
- schedule();
- }
-
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&port->open_wait, &wait);
-
- spin_lock_irqsave(&port->lock, flags);
- if (!tty_hung_up_p(filp))
- port->count++;
- port->blocked_open--;
- spin_unlock_irqrestore(&port->lock, flags);
-
- pr_debug("%s(%d):block_til_ready after blocking on %s open_count=%d\n",
- __FILE__, __LINE__, tty->driver->name, port->count);
-
- if (!retval)
- tty_port_set_active(port, 1);
-
- return retval;
-}
-
-
-static int ircomm_tty_install(struct tty_driver *driver, struct tty_struct *tty)
-{
- struct ircomm_tty_cb *self;
- unsigned int line = tty->index;
-
- /* Check if instance already exists */
- self = hashbin_lock_find(ircomm_tty, line, NULL);
- if (!self) {
- /* No, so make new instance */
- self = kzalloc(sizeof(struct ircomm_tty_cb), GFP_KERNEL);
- if (self == NULL)
- return -ENOMEM;
-
- tty_port_init(&self->port);
- self->port.ops = &ircomm_port_ops;
- self->magic = IRCOMM_TTY_MAGIC;
- self->flow = FLOW_STOP;
-
- self->line = line;
- INIT_WORK(&self->tqueue, ircomm_tty_do_softint);
- self->max_header_size = IRCOMM_TTY_HDR_UNINITIALISED;
- self->max_data_size = IRCOMM_TTY_DATA_UNINITIALISED;
-
- /* Init some important stuff */
- init_timer(&self->watchdog_timer);
- spin_lock_init(&self->spinlock);
-
- /*
- * Force TTY into raw mode by default which is usually what
- * we want for IrCOMM and IrLPT. This way applications will
- * not have to twiddle with printcap etc.
- *
- * Note this is completely usafe and doesn't work properly
- */
- tty->termios.c_iflag = 0;
- tty->termios.c_oflag = 0;
-
- /* Insert into hash */
- hashbin_insert(ircomm_tty, (irda_queue_t *) self, line, NULL);
- }
-
- tty->driver_data = self;
-
- return tty_port_install(&self->port, driver, tty);
-}
-
-/*
- * Function ircomm_tty_open (tty, filp)
- *
- * This routine is called when a particular tty device is opened. This
- * routine is mandatory; if this routine is not filled in, the attempted
- * open will fail with ENODEV.
- */
-static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
-{
- struct ircomm_tty_cb *self = tty->driver_data;
- unsigned long flags;
- int ret;
-
- /* ++ is not atomic, so this should be protected - Jean II */
- spin_lock_irqsave(&self->port.lock, flags);
- self->port.count++;
- spin_unlock_irqrestore(&self->port.lock, flags);
- tty_port_tty_set(&self->port, tty);
-
- pr_debug("%s(), %s%d, count = %d\n", __func__ , tty->driver->name,
- self->line, self->port.count);
-
- /* Not really used by us, but lets do it anyway */
- self->port.low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
-
- /* Check if this is a "normal" ircomm device, or an irlpt device */
- if (self->line < 0x10) {
- self->service_type = IRCOMM_3_WIRE | IRCOMM_9_WIRE;
- self->settings.service_type = IRCOMM_9_WIRE; /* 9 wire as default */
- /* Jan Kiszka -> add DSR/RI -> Conform to IrCOMM spec */
- self->settings.dce = IRCOMM_CTS | IRCOMM_CD | IRCOMM_DSR | IRCOMM_RI; /* Default line settings */
- pr_debug("%s(), IrCOMM device\n", __func__);
- } else {
- pr_debug("%s(), IrLPT device\n", __func__);
- self->service_type = IRCOMM_3_WIRE_RAW;
- self->settings.service_type = IRCOMM_3_WIRE_RAW; /* Default */
- }
-
- ret = ircomm_tty_startup(self);
- if (ret)
- return ret;
-
- ret = ircomm_tty_block_til_ready(self, tty, filp);
- if (ret) {
- pr_debug("%s(), returning after block_til_ready with %d\n",
- __func__, ret);
-
- return ret;
- }
- return 0;
-}
-
-/*
- * Function ircomm_tty_close (tty, filp)
- *
- * This routine is called when a particular tty device is closed.
- *
- */
-static void ircomm_tty_close(struct tty_struct *tty, struct file *filp)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
- struct tty_port *port = &self->port;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- if (tty_port_close_start(port, tty, filp) == 0)
- return;
-
- ircomm_tty_shutdown(self);
-
- tty_driver_flush_buffer(tty);
-
- tty_port_close_end(port, tty);
- tty_port_tty_set(port, NULL);
-}
-
-/*
- * Function ircomm_tty_flush_buffer (tty)
- *
- *
- *
- */
-static void ircomm_tty_flush_buffer(struct tty_struct *tty)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- /*
- * Let do_softint() do this to avoid race condition with
- * do_softint() ;-)
- */
- schedule_work(&self->tqueue);
-}
-
-/*
- * Function ircomm_tty_do_softint (work)
- *
- * We use this routine to give the write wakeup to the user at at a
- * safe time (as fast as possible after write have completed). This
- * can be compared to the Tx interrupt.
- */
-static void ircomm_tty_do_softint(struct work_struct *work)
-{
- struct ircomm_tty_cb *self =
- container_of(work, struct ircomm_tty_cb, tqueue);
- struct tty_struct *tty;
- unsigned long flags;
- struct sk_buff *skb, *ctrl_skb;
-
- if (!self || self->magic != IRCOMM_TTY_MAGIC)
- return;
-
- tty = tty_port_tty_get(&self->port);
- if (!tty)
- return;
-
- /* Unlink control buffer */
- spin_lock_irqsave(&self->spinlock, flags);
-
- ctrl_skb = self->ctrl_skb;
- self->ctrl_skb = NULL;
-
- spin_unlock_irqrestore(&self->spinlock, flags);
-
- /* Flush control buffer if any */
- if(ctrl_skb) {
- if(self->flow == FLOW_START)
- ircomm_control_request(self->ircomm, ctrl_skb);
- /* Drop reference count - see ircomm_ttp_data_request(). */
- dev_kfree_skb(ctrl_skb);
- }
-
- if (tty->hw_stopped)
- goto put;
-
- /* Unlink transmit buffer */
- spin_lock_irqsave(&self->spinlock, flags);
-
- skb = self->tx_skb;
- self->tx_skb = NULL;
-
- spin_unlock_irqrestore(&self->spinlock, flags);
-
- /* Flush transmit buffer if any */
- if (skb) {
- ircomm_tty_do_event(self, IRCOMM_TTY_DATA_REQUEST, skb, NULL);
- /* Drop reference count - see ircomm_ttp_data_request(). */
- dev_kfree_skb(skb);
- }
-
- /* Check if user (still) wants to be waken up */
- tty_wakeup(tty);
-put:
- tty_kref_put(tty);
-}
-
-/*
- * Function ircomm_tty_write (tty, buf, count)
- *
- * This routine is called by the kernel to write a series of characters
- * to the tty device. The characters may come from user space or kernel
- * space. This routine will return the number of characters actually
- * accepted for writing. This routine is mandatory.
- */
-static int ircomm_tty_write(struct tty_struct *tty,
- const unsigned char *buf, int count)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
- unsigned long flags;
- struct sk_buff *skb;
- int tailroom = 0;
- int len = 0;
- int size;
-
- pr_debug("%s(), count=%d, hw_stopped=%d\n", __func__ , count,
- tty->hw_stopped);
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- /* We may receive packets from the TTY even before we have finished
- * our setup. Not cool.
- * The problem is that we don't know the final header and data size
- * to create the proper skb, so any skb we would create would have
- * bogus header and data size, so need care.
- * We use a bogus header size to safely detect this condition.
- * Another problem is that hw_stopped was set to 0 way before it
- * should be, so we would drop this skb. It should now be fixed.
- * One option is to not accept data until we are properly setup.
- * But, I suspect that when it happens, the ppp line discipline
- * just "drops" the data, which might screw up connect scripts.
- * The second option is to create a "safe skb", with large header
- * and small size (see ircomm_tty_open() for values).
- * We just need to make sure that when the real values get filled,
- * we don't mess up the original "safe skb" (see tx_data_size).
- * Jean II */
- if (self->max_header_size == IRCOMM_TTY_HDR_UNINITIALISED) {
- pr_debug("%s() : not initialised\n", __func__);
-#ifdef IRCOMM_NO_TX_BEFORE_INIT
- /* We didn't consume anything, TTY will retry */
- return 0;
-#endif
- }
-
- if (count < 1)
- return 0;
-
- /* Protect our manipulation of self->tx_skb and related */
- spin_lock_irqsave(&self->spinlock, flags);
-
- /* Fetch current transmit buffer */
- skb = self->tx_skb;
-
- /*
- * Send out all the data we get, possibly as multiple fragmented
- * frames, but this will only happen if the data is larger than the
- * max data size. The normal case however is just the opposite, and
- * this function may be called multiple times, and will then actually
- * defragment the data and send it out as one packet as soon as
- * possible, but at a safer point in time
- */
- while (count) {
- size = count;
-
- /* Adjust data size to the max data size */
- if (size > self->max_data_size)
- size = self->max_data_size;
-
- /*
- * Do we already have a buffer ready for transmit, or do
- * we need to allocate a new frame
- */
- if (skb) {
- /*
- * Any room for more data at the end of the current
- * transmit buffer? Cannot use skb_tailroom, since
- * dev_alloc_skb gives us a larger skb than we
- * requested
- * Note : use tx_data_size, because max_data_size
- * may have changed and we don't want to overwrite
- * the skb. - Jean II
- */
- if ((tailroom = (self->tx_data_size - skb->len)) > 0) {
- /* Adjust data to tailroom */
- if (size > tailroom)
- size = tailroom;
- } else {
- /*
- * Current transmit frame is full, so break
- * out, so we can send it as soon as possible
- */
- break;
- }
- } else {
- /* Prepare a full sized frame */
- skb = alloc_skb(self->max_data_size+
- self->max_header_size,
- GFP_ATOMIC);
- if (!skb) {
- spin_unlock_irqrestore(&self->spinlock, flags);
- return -ENOBUFS;
- }
- skb_reserve(skb, self->max_header_size);
- self->tx_skb = skb;
- /* Remember skb size because max_data_size may
- * change later on - Jean II */
- self->tx_data_size = self->max_data_size;
- }
-
- /* Copy data */
- skb_put_data(skb, buf + len, size);
-
- count -= size;
- len += size;
- }
-
- spin_unlock_irqrestore(&self->spinlock, flags);
-
- /*
- * Schedule a new thread which will transmit the frame as soon
- * as possible, but at a safe point in time. We do this so the
- * "user" can give us data multiple times, as PPP does (because of
- * its 256 byte tx buffer). We will then defragment and send out
- * all this data as one single packet.
- */
- schedule_work(&self->tqueue);
-
- return len;
-}
-
-/*
- * Function ircomm_tty_write_room (tty)
- *
- * This routine returns the numbers of characters the tty driver will
- * accept for queuing to be written. This number is subject to change as
- * output buffers get emptied, or if the output flow control is acted.
- */
-static int ircomm_tty_write_room(struct tty_struct *tty)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
- unsigned long flags;
- int ret;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-#ifdef IRCOMM_NO_TX_BEFORE_INIT
- /* max_header_size tells us if the channel is initialised or not. */
- if (self->max_header_size == IRCOMM_TTY_HDR_UNINITIALISED)
- /* Don't bother us yet */
- return 0;
-#endif
-
- /* Check if we are allowed to transmit any data.
- * hw_stopped is the regular flow control.
- * Jean II */
- if (tty->hw_stopped)
- ret = 0;
- else {
- spin_lock_irqsave(&self->spinlock, flags);
- if (self->tx_skb)
- ret = self->tx_data_size - self->tx_skb->len;
- else
- ret = self->max_data_size;
- spin_unlock_irqrestore(&self->spinlock, flags);
- }
- pr_debug("%s(), ret=%d\n", __func__ , ret);
-
- return ret;
-}
-
-/*
- * Function ircomm_tty_wait_until_sent (tty, timeout)
- *
- * This routine waits until the device has written out all of the
- * characters in its transmitter FIFO.
- */
-static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
- unsigned long orig_jiffies, poll_time;
- unsigned long flags;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- orig_jiffies = jiffies;
-
- /* Set poll time to 200 ms */
- poll_time = msecs_to_jiffies(200);
- if (timeout)
- poll_time = min_t(unsigned long, timeout, poll_time);
-
- spin_lock_irqsave(&self->spinlock, flags);
- while (self->tx_skb && self->tx_skb->len) {
- spin_unlock_irqrestore(&self->spinlock, flags);
- schedule_timeout_interruptible(poll_time);
- spin_lock_irqsave(&self->spinlock, flags);
- if (signal_pending(current))
- break;
- if (timeout && time_after(jiffies, orig_jiffies + timeout))
- break;
- }
- spin_unlock_irqrestore(&self->spinlock, flags);
- __set_current_state(TASK_RUNNING);
-}
-
-/*
- * Function ircomm_tty_throttle (tty)
- *
- * This routine notifies the tty driver that input buffers for the line
- * discipline are close to full, and it should somehow signal that no
- * more characters should be sent to the tty.
- */
-static void ircomm_tty_throttle(struct tty_struct *tty)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- /* Software flow control? */
- if (I_IXOFF(tty))
- ircomm_tty_send_xchar(tty, STOP_CHAR(tty));
-
- /* Hardware flow control? */
- if (C_CRTSCTS(tty)) {
- self->settings.dte &= ~IRCOMM_RTS;
- self->settings.dte |= IRCOMM_DELTA_RTS;
-
- ircomm_param_request(self, IRCOMM_DTE, TRUE);
- }
-
- ircomm_flow_request(self->ircomm, FLOW_STOP);
-}
-
-/*
- * Function ircomm_tty_unthrottle (tty)
- *
- * This routine notifies the tty drivers that it should signals that
- * characters can now be sent to the tty without fear of overrunning the
- * input buffers of the line disciplines.
- */
-static void ircomm_tty_unthrottle(struct tty_struct *tty)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- /* Using software flow control? */
- if (I_IXOFF(tty))
- ircomm_tty_send_xchar(tty, START_CHAR(tty));
-
- /* Using hardware flow control? */
- if (C_CRTSCTS(tty)) {
- self->settings.dte |= (IRCOMM_RTS|IRCOMM_DELTA_RTS);
-
- ircomm_param_request(self, IRCOMM_DTE, TRUE);
- pr_debug("%s(), FLOW_START\n", __func__);
- }
- ircomm_flow_request(self->ircomm, FLOW_START);
-}
-
-/*
- * Function ircomm_tty_chars_in_buffer (tty)
- *
- * Indicates if there are any data in the buffer
- *
- */
-static int ircomm_tty_chars_in_buffer(struct tty_struct *tty)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
- unsigned long flags;
- int len = 0;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- spin_lock_irqsave(&self->spinlock, flags);
-
- if (self->tx_skb)
- len = self->tx_skb->len;
-
- spin_unlock_irqrestore(&self->spinlock, flags);
-
- return len;
-}
-
-static void ircomm_tty_shutdown(struct ircomm_tty_cb *self)
-{
- unsigned long flags;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- if (!tty_port_initialized(&self->port))
- return;
- tty_port_set_initialized(&self->port, 0);
-
- ircomm_tty_detach_cable(self);
-
- spin_lock_irqsave(&self->spinlock, flags);
-
- del_timer(&self->watchdog_timer);
-
- /* Free parameter buffer */
- if (self->ctrl_skb) {
- dev_kfree_skb(self->ctrl_skb);
- self->ctrl_skb = NULL;
- }
-
- /* Free transmit buffer */
- if (self->tx_skb) {
- dev_kfree_skb(self->tx_skb);
- self->tx_skb = NULL;
- }
-
- if (self->ircomm) {
- ircomm_close(self->ircomm);
- self->ircomm = NULL;
- }
-
- spin_unlock_irqrestore(&self->spinlock, flags);
-}
-
-/*
- * Function ircomm_tty_hangup (tty)
- *
- * This routine notifies the tty driver that it should hangup the tty
- * device.
- *
- */
-static void ircomm_tty_hangup(struct tty_struct *tty)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
- struct tty_port *port = &self->port;
- unsigned long flags;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- /* ircomm_tty_flush_buffer(tty); */
- ircomm_tty_shutdown(self);
-
- spin_lock_irqsave(&port->lock, flags);
- if (port->tty) {
- set_bit(TTY_IO_ERROR, &port->tty->flags);
- tty_kref_put(port->tty);
- }
- port->tty = NULL;
- port->count = 0;
- spin_unlock_irqrestore(&port->lock, flags);
- tty_port_set_active(port, 0);
-
- wake_up_interruptible(&port->open_wait);
-}
-
-/*
- * Function ircomm_tty_send_xchar (tty, ch)
- *
- * This routine is used to send a high-priority XON/XOFF character to
- * the device.
- */
-static void ircomm_tty_send_xchar(struct tty_struct *tty, char ch)
-{
- pr_debug("%s(), not impl\n", __func__);
-}
-
-/*
- * Function ircomm_tty_start (tty)
- *
- * This routine notifies the tty driver that it resume sending
- * characters to the tty device.
- */
-void ircomm_tty_start(struct tty_struct *tty)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-
- ircomm_flow_request(self->ircomm, FLOW_START);
-}
-
-/*
- * Function ircomm_tty_stop (tty)
- *
- * This routine notifies the tty driver that it should stop outputting
- * characters to the tty device.
- */
-static void ircomm_tty_stop(struct tty_struct *tty)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- ircomm_flow_request(self->ircomm, FLOW_STOP);
-}
-
-/*
- * Function ircomm_check_modem_status (self)
- *
- * Check for any changes in the DCE's line settings. This function should
- * be called whenever the dce parameter settings changes, to update the
- * flow control settings and other things
- */
-void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self)
-{
- struct tty_struct *tty;
- int status;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- tty = tty_port_tty_get(&self->port);
-
- status = self->settings.dce;
-
- if (status & IRCOMM_DCE_DELTA_ANY) {
- /*wake_up_interruptible(&self->delta_msr_wait);*/
- }
- if (tty_port_check_carrier(&self->port) && (status & IRCOMM_DELTA_CD)) {
- pr_debug("%s(), ircomm%d CD now %s...\n", __func__ , self->line,
- (status & IRCOMM_CD) ? "on" : "off");
-
- if (status & IRCOMM_CD) {
- wake_up_interruptible(&self->port.open_wait);
- } else {
- pr_debug("%s(), Doing serial hangup..\n", __func__);
- if (tty)
- tty_hangup(tty);
-
- /* Hangup will remote the tty, so better break out */
- goto put;
- }
- }
- if (tty && tty_port_cts_enabled(&self->port)) {
- if (tty->hw_stopped) {
- if (status & IRCOMM_CTS) {
- pr_debug("%s(), CTS tx start...\n", __func__);
- tty->hw_stopped = 0;
-
- /* Wake up processes blocked on open */
- wake_up_interruptible(&self->port.open_wait);
-
- schedule_work(&self->tqueue);
- goto put;
- }
- } else {
- if (!(status & IRCOMM_CTS)) {
- pr_debug("%s(), CTS tx stop...\n", __func__);
- tty->hw_stopped = 1;
- }
- }
- }
-put:
- tty_kref_put(tty);
-}
-
-/*
- * Function ircomm_tty_data_indication (instance, sap, skb)
- *
- * Handle incoming data, and deliver it to the line discipline
- *
- */
-static int ircomm_tty_data_indication(void *instance, void *sap,
- struct sk_buff *skb)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
- struct tty_struct *tty;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
- IRDA_ASSERT(skb != NULL, return -1;);
-
- tty = tty_port_tty_get(&self->port);
- if (!tty) {
- pr_debug("%s(), no tty!\n", __func__);
- return 0;
- }
-
- /*
- * If we receive data when hardware is stopped then something is wrong.
- * We try to poll the peers line settings to check if we are up todate.
- * Devices like WinCE can do this, and since they don't send any
- * params, we can just as well declare the hardware for running.
- */
- if (tty->hw_stopped && (self->flow == FLOW_START)) {
- pr_debug("%s(), polling for line settings!\n", __func__);
- ircomm_param_request(self, IRCOMM_POLL, TRUE);
-
- /* We can just as well declare the hardware for running */
- ircomm_tty_send_initial_parameters(self);
- ircomm_tty_link_established(self);
- }
- tty_kref_put(tty);
-
- /*
- * Use flip buffer functions since the code may be called from interrupt
- * context
- */
- tty_insert_flip_string(&self->port, skb->data, skb->len);
- tty_flip_buffer_push(&self->port);
-
- /* No need to kfree_skb - see ircomm_ttp_data_indication() */
-
- return 0;
-}
-
-/*
- * Function ircomm_tty_control_indication (instance, sap, skb)
- *
- * Parse all incoming parameters (easy!)
- *
- */
-static int ircomm_tty_control_indication(void *instance, void *sap,
- struct sk_buff *skb)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
- int clen;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
- IRDA_ASSERT(skb != NULL, return -1;);
-
- clen = skb->data[0];
-
- irda_param_extract_all(self, skb->data+1, IRDA_MIN(skb->len-1, clen),
- &ircomm_param_info);
-
- /* No need to kfree_skb - see ircomm_control_indication() */
-
- return 0;
-}
-
-/*
- * Function ircomm_tty_flow_indication (instance, sap, cmd)
- *
- * This function is called by IrTTP when it wants us to slow down the
- * transmission of data. We just mark the hardware as stopped, and wait
- * for IrTTP to notify us that things are OK again.
- */
-static void ircomm_tty_flow_indication(void *instance, void *sap,
- LOCAL_FLOW cmd)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
- struct tty_struct *tty;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- tty = tty_port_tty_get(&self->port);
-
- switch (cmd) {
- case FLOW_START:
- pr_debug("%s(), hw start!\n", __func__);
- if (tty)
- tty->hw_stopped = 0;
-
- /* ircomm_tty_do_softint will take care of the rest */
- schedule_work(&self->tqueue);
- break;
- default: /* If we get here, something is very wrong, better stop */
- case FLOW_STOP:
- pr_debug("%s(), hw stopped!\n", __func__);
- if (tty)
- tty->hw_stopped = 1;
- break;
- }
-
- tty_kref_put(tty);
- self->flow = cmd;
-}
-
-#ifdef CONFIG_PROC_FS
-static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m)
-{
- struct tty_struct *tty;
- char sep;
-
- seq_printf(m, "State: %s\n", ircomm_tty_state[self->state]);
-
- seq_puts(m, "Service type: ");
- if (self->service_type & IRCOMM_9_WIRE)
- seq_puts(m, "9_WIRE");
- else if (self->service_type & IRCOMM_3_WIRE)
- seq_puts(m, "3_WIRE");
- else if (self->service_type & IRCOMM_3_WIRE_RAW)
- seq_puts(m, "3_WIRE_RAW");
- else
- seq_puts(m, "No common service type!\n");
- seq_putc(m, '\n');
-
- seq_printf(m, "Port name: %s\n", self->settings.port_name);
-
- seq_printf(m, "DTE status:");
- sep = ' ';
- if (self->settings.dte & IRCOMM_RTS) {
- seq_printf(m, "%cRTS", sep);
- sep = '|';
- }
- if (self->settings.dte & IRCOMM_DTR) {
- seq_printf(m, "%cDTR", sep);
- sep = '|';
- }
- seq_putc(m, '\n');
-
- seq_puts(m, "DCE status:");
- sep = ' ';
- if (self->settings.dce & IRCOMM_CTS) {
- seq_printf(m, "%cCTS", sep);
- sep = '|';
- }
- if (self->settings.dce & IRCOMM_DSR) {
- seq_printf(m, "%cDSR", sep);
- sep = '|';
- }
- if (self->settings.dce & IRCOMM_CD) {
- seq_printf(m, "%cCD", sep);
- sep = '|';
- }
- if (self->settings.dce & IRCOMM_RI) {
- seq_printf(m, "%cRI", sep);
- sep = '|';
- }
- seq_putc(m, '\n');
-
- seq_puts(m, "Configuration: ");
- if (!self->settings.null_modem)
- seq_puts(m, "DTE <-> DCE\n");
- else
- seq_puts(m, "DTE <-> DTE (null modem emulation)\n");
-
- seq_printf(m, "Data rate: %d\n", self->settings.data_rate);
-
- seq_puts(m, "Flow control:");
- sep = ' ';
- if (self->settings.flow_control & IRCOMM_XON_XOFF_IN) {
- seq_printf(m, "%cXON_XOFF_IN", sep);
- sep = '|';
- }
- if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT) {
- seq_printf(m, "%cXON_XOFF_OUT", sep);
- sep = '|';
- }
- if (self->settings.flow_control & IRCOMM_RTS_CTS_IN) {
- seq_printf(m, "%cRTS_CTS_IN", sep);
- sep = '|';
- }
- if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT) {
- seq_printf(m, "%cRTS_CTS_OUT", sep);
- sep = '|';
- }
- if (self->settings.flow_control & IRCOMM_DSR_DTR_IN) {
- seq_printf(m, "%cDSR_DTR_IN", sep);
- sep = '|';
- }
- if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT) {
- seq_printf(m, "%cDSR_DTR_OUT", sep);
- sep = '|';
- }
- if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN) {
- seq_printf(m, "%cENQ_ACK_IN", sep);
- sep = '|';
- }
- if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT) {
- seq_printf(m, "%cENQ_ACK_OUT", sep);
- sep = '|';
- }
- seq_putc(m, '\n');
-
- seq_puts(m, "Flags:");
- sep = ' ';
- if (tty_port_cts_enabled(&self->port)) {
- seq_printf(m, "%cASYNC_CTS_FLOW", sep);
- sep = '|';
- }
- if (tty_port_check_carrier(&self->port)) {
- seq_printf(m, "%cASYNC_CHECK_CD", sep);
- sep = '|';
- }
- if (tty_port_initialized(&self->port)) {
- seq_printf(m, "%cASYNC_INITIALIZED", sep);
- sep = '|';
- }
- if (self->port.flags & ASYNC_LOW_LATENCY) {
- seq_printf(m, "%cASYNC_LOW_LATENCY", sep);
- sep = '|';
- }
- if (tty_port_active(&self->port)) {
- seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep);
- sep = '|';
- }
- seq_putc(m, '\n');
-
- seq_printf(m, "Role: %s\n", self->client ? "client" : "server");
- seq_printf(m, "Open count: %d\n", self->port.count);
- seq_printf(m, "Max data size: %d\n", self->max_data_size);
- seq_printf(m, "Max header size: %d\n", self->max_header_size);
-
- tty = tty_port_tty_get(&self->port);
- if (tty) {
- seq_printf(m, "Hardware: %s\n",
- tty->hw_stopped ? "Stopped" : "Running");
- tty_kref_put(tty);
- }
-}
-
-static int ircomm_tty_proc_show(struct seq_file *m, void *v)
-{
- struct ircomm_tty_cb *self;
- unsigned long flags;
-
- spin_lock_irqsave(&ircomm_tty->hb_spinlock, flags);
-
- self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty);
- while (self != NULL) {
- if (self->magic != IRCOMM_TTY_MAGIC)
- break;
-
- ircomm_tty_line_info(self, m);
- self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty);
- }
- spin_unlock_irqrestore(&ircomm_tty->hb_spinlock, flags);
- return 0;
-}
-
-static int ircomm_tty_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ircomm_tty_proc_show, NULL);
-}
-
-static const struct file_operations ircomm_tty_proc_fops = {
- .owner = THIS_MODULE,
- .open = ircomm_tty_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif /* CONFIG_PROC_FS */
-
-MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>");
-MODULE_DESCRIPTION("IrCOMM serial TTY driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CHARDEV_MAJOR(IRCOMM_TTY_MAJOR);
-
-module_init(ircomm_tty_init);
-module_exit(ircomm_tty_cleanup);
diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c
deleted file mode 100644
index 0a411019c098..000000000000
--- a/net/irda/ircomm/ircomm_tty_attach.c
+++ /dev/null
@@ -1,987 +0,0 @@
-/*********************************************************************
- *
- * Filename: ircomm_tty_attach.c
- * Version:
- * Description: Code for attaching the serial driver to IrCOMM
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sat Jun 5 17:42:00 1999
- * Modified at: Tue Jan 4 14:20:49 2000
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/init.h>
-#include <linux/sched.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irttp.h>
-#include <net/irda/irias_object.h>
-#include <net/irda/parameters.h>
-
-#include <net/irda/ircomm_core.h>
-#include <net/irda/ircomm_param.h>
-#include <net/irda/ircomm_event.h>
-
-#include <net/irda/ircomm_tty.h>
-#include <net/irda/ircomm_tty_attach.h>
-
-static void ircomm_tty_ias_register(struct ircomm_tty_cb *self);
-static void ircomm_tty_discovery_indication(discinfo_t *discovery,
- DISCOVERY_MODE mode,
- void *priv);
-static void ircomm_tty_getvalue_confirm(int result, __u16 obj_id,
- struct ias_value *value, void *priv);
-static void ircomm_tty_start_watchdog_timer(struct ircomm_tty_cb *self,
- int timeout);
-static void ircomm_tty_watchdog_timer_expired(void *data);
-
-static int ircomm_tty_state_idle(struct ircomm_tty_cb *self,
- IRCOMM_TTY_EVENT event,
- struct sk_buff *skb,
- struct ircomm_tty_info *info);
-static int ircomm_tty_state_search(struct ircomm_tty_cb *self,
- IRCOMM_TTY_EVENT event,
- struct sk_buff *skb,
- struct ircomm_tty_info *info);
-static int ircomm_tty_state_query_parameters(struct ircomm_tty_cb *self,
- IRCOMM_TTY_EVENT event,
- struct sk_buff *skb,
- struct ircomm_tty_info *info);
-static int ircomm_tty_state_query_lsap_sel(struct ircomm_tty_cb *self,
- IRCOMM_TTY_EVENT event,
- struct sk_buff *skb,
- struct ircomm_tty_info *info);
-static int ircomm_tty_state_setup(struct ircomm_tty_cb *self,
- IRCOMM_TTY_EVENT event,
- struct sk_buff *skb,
- struct ircomm_tty_info *info);
-static int ircomm_tty_state_ready(struct ircomm_tty_cb *self,
- IRCOMM_TTY_EVENT event,
- struct sk_buff *skb,
- struct ircomm_tty_info *info);
-
-const char *const ircomm_tty_state[] = {
- "IRCOMM_TTY_IDLE",
- "IRCOMM_TTY_SEARCH",
- "IRCOMM_TTY_QUERY_PARAMETERS",
- "IRCOMM_TTY_QUERY_LSAP_SEL",
- "IRCOMM_TTY_SETUP",
- "IRCOMM_TTY_READY",
- "*** ERROR *** ",
-};
-
-static const char *const ircomm_tty_event[] __maybe_unused = {
- "IRCOMM_TTY_ATTACH_CABLE",
- "IRCOMM_TTY_DETACH_CABLE",
- "IRCOMM_TTY_DATA_REQUEST",
- "IRCOMM_TTY_DATA_INDICATION",
- "IRCOMM_TTY_DISCOVERY_REQUEST",
- "IRCOMM_TTY_DISCOVERY_INDICATION",
- "IRCOMM_TTY_CONNECT_CONFIRM",
- "IRCOMM_TTY_CONNECT_INDICATION",
- "IRCOMM_TTY_DISCONNECT_REQUEST",
- "IRCOMM_TTY_DISCONNECT_INDICATION",
- "IRCOMM_TTY_WD_TIMER_EXPIRED",
- "IRCOMM_TTY_GOT_PARAMETERS",
- "IRCOMM_TTY_GOT_LSAPSEL",
- "*** ERROR ****",
-};
-
-static int (*state[])(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event,
- struct sk_buff *skb, struct ircomm_tty_info *info) =
-{
- ircomm_tty_state_idle,
- ircomm_tty_state_search,
- ircomm_tty_state_query_parameters,
- ircomm_tty_state_query_lsap_sel,
- ircomm_tty_state_setup,
- ircomm_tty_state_ready,
-};
-
-/*
- * Function ircomm_tty_attach_cable (driver)
- *
- * Try to attach cable (IrCOMM link). This function will only return
- * when the link has been connected, or if an error condition occurs.
- * If success, the return value is the resulting service type.
- */
-int ircomm_tty_attach_cable(struct ircomm_tty_cb *self)
-{
- struct tty_struct *tty;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- /* Check if somebody has already connected to us */
- if (ircomm_is_connected(self->ircomm)) {
- pr_debug("%s(), already connected!\n", __func__);
- return 0;
- }
-
- /* Make sure nobody tries to write before the link is up */
- tty = tty_port_tty_get(&self->port);
- if (tty) {
- tty->hw_stopped = 1;
- tty_kref_put(tty);
- }
-
- ircomm_tty_ias_register(self);
-
- ircomm_tty_do_event(self, IRCOMM_TTY_ATTACH_CABLE, NULL, NULL);
-
- return 0;
-}
-
-/*
- * Function ircomm_detach_cable (driver)
- *
- * Detach cable, or cable has been detached by peer
- *
- */
-void ircomm_tty_detach_cable(struct ircomm_tty_cb *self)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- del_timer(&self->watchdog_timer);
-
- /* Remove discovery handler */
- if (self->ckey) {
- irlmp_unregister_client(self->ckey);
- self->ckey = NULL;
- }
- /* Remove IrCOMM hint bits */
- if (self->skey) {
- irlmp_unregister_service(self->skey);
- self->skey = NULL;
- }
-
- if (self->iriap) {
- iriap_close(self->iriap);
- self->iriap = NULL;
- }
-
- /* Remove LM-IAS object */
- if (self->obj) {
- irias_delete_object(self->obj);
- self->obj = NULL;
- }
-
- ircomm_tty_do_event(self, IRCOMM_TTY_DETACH_CABLE, NULL, NULL);
-
- /* Reset some values */
- self->daddr = self->saddr = 0;
- self->dlsap_sel = self->slsap_sel = 0;
-
- memset(&self->settings, 0, sizeof(struct ircomm_params));
-}
-
-/*
- * Function ircomm_tty_ias_register (self)
- *
- * Register with LM-IAS depending on which service type we are
- *
- */
-static void ircomm_tty_ias_register(struct ircomm_tty_cb *self)
-{
- __u8 oct_seq[6];
- __u16 hints;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- /* Compute hint bits based on service */
- hints = irlmp_service_to_hint(S_COMM);
- if (self->service_type & IRCOMM_3_WIRE_RAW)
- hints |= irlmp_service_to_hint(S_PRINTER);
-
- /* Advertise IrCOMM hint bit in discovery */
- if (!self->skey)
- self->skey = irlmp_register_service(hints);
- /* Set up a discovery handler */
- if (!self->ckey)
- self->ckey = irlmp_register_client(hints,
- ircomm_tty_discovery_indication,
- NULL, (void *) self);
-
- /* If already done, no need to do it again */
- if (self->obj)
- return;
-
- if (self->service_type & IRCOMM_3_WIRE_RAW) {
- /* Register IrLPT with LM-IAS */
- self->obj = irias_new_object("IrLPT", IAS_IRLPT_ID);
- irias_add_integer_attrib(self->obj, "IrDA:IrLMP:LsapSel",
- self->slsap_sel, IAS_KERNEL_ATTR);
- } else {
- /* Register IrCOMM with LM-IAS */
- self->obj = irias_new_object("IrDA:IrCOMM", IAS_IRCOMM_ID);
- irias_add_integer_attrib(self->obj, "IrDA:TinyTP:LsapSel",
- self->slsap_sel, IAS_KERNEL_ATTR);
-
- /* Code the parameters into the buffer */
- irda_param_pack(oct_seq, "bbbbbb",
- IRCOMM_SERVICE_TYPE, 1, self->service_type,
- IRCOMM_PORT_TYPE, 1, IRCOMM_SERIAL);
-
- /* Register parameters with LM-IAS */
- irias_add_octseq_attrib(self->obj, "Parameters", oct_seq, 6,
- IAS_KERNEL_ATTR);
- }
- irias_insert_object(self->obj);
-}
-
-/*
- * Function ircomm_tty_ias_unregister (self)
- *
- * Remove our IAS object and client hook while connected.
- *
- */
-static void ircomm_tty_ias_unregister(struct ircomm_tty_cb *self)
-{
- /* Remove LM-IAS object now so it is not reused.
- * IrCOMM deals very poorly with multiple incoming connections.
- * It should looks a lot more like IrNET, and "dup" a server TSAP
- * to the application TSAP (based on various rules).
- * This is a cheap workaround allowing multiple clients to
- * connect to us. It will not always work.
- * Each IrCOMM socket has an IAS entry. Incoming connection will
- * pick the first one found. So, when we are fully connected,
- * we remove our IAS entries so that the next IAS entry is used.
- * We do that for *both* client and server, because a server
- * can also create client instances.
- * Jean II */
- if (self->obj) {
- irias_delete_object(self->obj);
- self->obj = NULL;
- }
-
-#if 0
- /* Remove discovery handler.
- * While we are connected, we no longer need to receive
- * discovery events. This would be the case if there is
- * multiple IrLAP interfaces. Jean II */
- if (self->ckey) {
- irlmp_unregister_client(self->ckey);
- self->ckey = NULL;
- }
-#endif
-}
-
-/*
- * Function ircomm_send_initial_parameters (self)
- *
- * Send initial parameters to the remote IrCOMM device. These parameters
- * must be sent before any data.
- */
-int ircomm_tty_send_initial_parameters(struct ircomm_tty_cb *self)
-{
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- if (self->service_type & IRCOMM_3_WIRE_RAW)
- return 0;
-
- /*
- * Set default values, but only if the application for some reason
- * haven't set them already
- */
- pr_debug("%s(), data-rate = %d\n", __func__ ,
- self->settings.data_rate);
- if (!self->settings.data_rate)
- self->settings.data_rate = 9600;
- pr_debug("%s(), data-format = %d\n", __func__ ,
- self->settings.data_format);
- if (!self->settings.data_format)
- self->settings.data_format = IRCOMM_WSIZE_8; /* 8N1 */
-
- pr_debug("%s(), flow-control = %d\n", __func__ ,
- self->settings.flow_control);
- /*self->settings.flow_control = IRCOMM_RTS_CTS_IN|IRCOMM_RTS_CTS_OUT;*/
-
- /* Do not set delta values for the initial parameters */
- self->settings.dte = IRCOMM_DTR | IRCOMM_RTS;
-
- /* Only send service type parameter when we are the client */
- if (self->client)
- ircomm_param_request(self, IRCOMM_SERVICE_TYPE, FALSE);
- ircomm_param_request(self, IRCOMM_DATA_RATE, FALSE);
- ircomm_param_request(self, IRCOMM_DATA_FORMAT, FALSE);
-
- /* For a 3 wire service, we just flush the last parameter and return */
- if (self->settings.service_type == IRCOMM_3_WIRE) {
- ircomm_param_request(self, IRCOMM_FLOW_CONTROL, TRUE);
- return 0;
- }
-
- /* Only 9-wire service types continue here */
- ircomm_param_request(self, IRCOMM_FLOW_CONTROL, FALSE);
-#if 0
- ircomm_param_request(self, IRCOMM_XON_XOFF, FALSE);
- ircomm_param_request(self, IRCOMM_ENQ_ACK, FALSE);
-#endif
- /* Notify peer that we are ready to receive data */
- ircomm_param_request(self, IRCOMM_DTE, TRUE);
-
- return 0;
-}
-
-/*
- * Function ircomm_tty_discovery_indication (discovery)
- *
- * Remote device is discovered, try query the remote IAS to see which
- * device it is, and which services it has.
- *
- */
-static void ircomm_tty_discovery_indication(discinfo_t *discovery,
- DISCOVERY_MODE mode,
- void *priv)
-{
- struct ircomm_tty_cb *self;
- struct ircomm_tty_info info;
-
- /* Important note :
- * We need to drop all passive discoveries.
- * The LSAP management of IrComm is deficient and doesn't deal
- * with the case of two instance connecting to each other
- * simultaneously (it will deadlock in LMP).
- * The proper fix would be to use the same technique as in IrNET,
- * to have one server socket and separate instances for the
- * connecting/connected socket.
- * The workaround is to drop passive discovery, which drastically
- * reduce the probability of this happening.
- * Jean II */
- if(mode == DISCOVERY_PASSIVE)
- return;
-
- info.daddr = discovery->daddr;
- info.saddr = discovery->saddr;
-
- self = priv;
- ircomm_tty_do_event(self, IRCOMM_TTY_DISCOVERY_INDICATION,
- NULL, &info);
-}
-
-/*
- * Function ircomm_tty_disconnect_indication (instance, sap, reason, skb)
- *
- * Link disconnected
- *
- */
-void ircomm_tty_disconnect_indication(void *instance, void *sap,
- LM_REASON reason,
- struct sk_buff *skb)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
- struct tty_struct *tty;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- tty = tty_port_tty_get(&self->port);
- if (!tty)
- return;
-
- /* This will stop control data transfers */
- self->flow = FLOW_STOP;
-
- /* Stop data transfers */
- tty->hw_stopped = 1;
-
- ircomm_tty_do_event(self, IRCOMM_TTY_DISCONNECT_INDICATION, NULL,
- NULL);
- tty_kref_put(tty);
-}
-
-/*
- * Function ircomm_tty_getvalue_confirm (result, obj_id, value, priv)
- *
- * Got result from the IAS query we make
- *
- */
-static void ircomm_tty_getvalue_confirm(int result, __u16 obj_id,
- struct ias_value *value,
- void *priv)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) priv;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- /* We probably don't need to make any more queries */
- iriap_close(self->iriap);
- self->iriap = NULL;
-
- /* Check if request succeeded */
- if (result != IAS_SUCCESS) {
- pr_debug("%s(), got NULL value!\n", __func__);
- return;
- }
-
- switch (value->type) {
- case IAS_OCT_SEQ:
- pr_debug("%s(), got octet sequence\n", __func__);
-
- irda_param_extract_all(self, value->t.oct_seq, value->len,
- &ircomm_param_info);
-
- ircomm_tty_do_event(self, IRCOMM_TTY_GOT_PARAMETERS, NULL,
- NULL);
- break;
- case IAS_INTEGER:
- /* Got LSAP selector */
- pr_debug("%s(), got lsapsel = %d\n", __func__ ,
- value->t.integer);
-
- if (value->t.integer == -1) {
- pr_debug("%s(), invalid value!\n", __func__);
- } else
- self->dlsap_sel = value->t.integer;
-
- ircomm_tty_do_event(self, IRCOMM_TTY_GOT_LSAPSEL, NULL, NULL);
- break;
- case IAS_MISSING:
- pr_debug("%s(), got IAS_MISSING\n", __func__);
- break;
- default:
- pr_debug("%s(), got unknown type!\n", __func__);
- break;
- }
- irias_delete_value(value);
-}
-
-/*
- * Function ircomm_tty_connect_confirm (instance, sap, qos, max_sdu_size, skb)
- *
- * Connection confirmed
- *
- */
-void ircomm_tty_connect_confirm(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_data_size,
- __u8 max_header_size,
- struct sk_buff *skb)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- self->client = TRUE;
- self->max_data_size = max_data_size;
- self->max_header_size = max_header_size;
- self->flow = FLOW_START;
-
- ircomm_tty_do_event(self, IRCOMM_TTY_CONNECT_CONFIRM, NULL, NULL);
-
- /* No need to kfree_skb - see ircomm_ttp_connect_confirm() */
-}
-
-/*
- * Function ircomm_tty_connect_indication (instance, sap, qos, max_sdu_size,
- * skb)
- *
- * we are discovered and being requested to connect by remote device !
- *
- */
-void ircomm_tty_connect_indication(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_data_size,
- __u8 max_header_size,
- struct sk_buff *skb)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
- int clen;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- self->client = FALSE;
- self->max_data_size = max_data_size;
- self->max_header_size = max_header_size;
- self->flow = FLOW_START;
-
- clen = skb->data[0];
- if (clen)
- irda_param_extract_all(self, skb->data+1,
- IRDA_MIN(skb->len, clen),
- &ircomm_param_info);
-
- ircomm_tty_do_event(self, IRCOMM_TTY_CONNECT_INDICATION, NULL, NULL);
-
- /* No need to kfree_skb - see ircomm_ttp_connect_indication() */
-}
-
-/*
- * Function ircomm_tty_link_established (self)
- *
- * Called when the IrCOMM link is established
- *
- */
-void ircomm_tty_link_established(struct ircomm_tty_cb *self)
-{
- struct tty_struct *tty;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- tty = tty_port_tty_get(&self->port);
- if (!tty)
- return;
-
- del_timer(&self->watchdog_timer);
-
- /*
- * IrCOMM link is now up, and if we are not using hardware
- * flow-control, then declare the hardware as running. Otherwise we
- * will have to wait for the peer device (DCE) to raise the CTS
- * line.
- */
- if (tty_port_cts_enabled(&self->port) &&
- ((self->settings.dce & IRCOMM_CTS) == 0)) {
- pr_debug("%s(), waiting for CTS ...\n", __func__);
- goto put;
- } else {
- pr_debug("%s(), starting hardware!\n", __func__);
-
- tty->hw_stopped = 0;
-
- /* Wake up processes blocked on open */
- wake_up_interruptible(&self->port.open_wait);
- }
-
- schedule_work(&self->tqueue);
-put:
- tty_kref_put(tty);
-}
-
-/*
- * Function ircomm_tty_start_watchdog_timer (self, timeout)
- *
- * Start the watchdog timer. This timer is used to make sure that any
- * connection attempt is successful, and if not, we will retry after
- * the timeout
- */
-static void ircomm_tty_start_watchdog_timer(struct ircomm_tty_cb *self,
- int timeout)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- irda_start_timer(&self->watchdog_timer, timeout, (void *) self,
- ircomm_tty_watchdog_timer_expired);
-}
-
-/*
- * Function ircomm_tty_watchdog_timer_expired (data)
- *
- * Called when the connect procedure have taken to much time.
- *
- */
-static void ircomm_tty_watchdog_timer_expired(void *data)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- ircomm_tty_do_event(self, IRCOMM_TTY_WD_TIMER_EXPIRED, NULL, NULL);
-}
-
-
-/*
- * Function ircomm_tty_do_event (self, event, skb)
- *
- * Process event
- *
- */
-int ircomm_tty_do_event(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event,
- struct sk_buff *skb, struct ircomm_tty_info *info)
-{
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- pr_debug("%s: state=%s, event=%s\n", __func__ ,
- ircomm_tty_state[self->state], ircomm_tty_event[event]);
-
- return (*state[self->state])(self, event, skb, info);
-}
-
-/*
- * Function ircomm_tty_next_state (self, state)
- *
- * Switch state
- *
- */
-static inline void ircomm_tty_next_state(struct ircomm_tty_cb *self, IRCOMM_TTY_STATE state)
-{
- /*
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
- pr_debug("%s: next state=%s, service type=%d\n", __func__ ,
- ircomm_tty_state[self->state], self->service_type);
- */
- self->state = state;
-}
-
-/*
- * Function ircomm_tty_state_idle (self, event, skb, info)
- *
- * Just hanging around
- *
- */
-static int ircomm_tty_state_idle(struct ircomm_tty_cb *self,
- IRCOMM_TTY_EVENT event,
- struct sk_buff *skb,
- struct ircomm_tty_info *info)
-{
- int ret = 0;
-
- pr_debug("%s: state=%s, event=%s\n", __func__ ,
- ircomm_tty_state[self->state], ircomm_tty_event[event]);
- switch (event) {
- case IRCOMM_TTY_ATTACH_CABLE:
- /* Try to discover any remote devices */
- ircomm_tty_start_watchdog_timer(self, 3*HZ);
- ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
-
- irlmp_discovery_request(DISCOVERY_DEFAULT_SLOTS);
- break;
- case IRCOMM_TTY_DISCOVERY_INDICATION:
- self->daddr = info->daddr;
- self->saddr = info->saddr;
-
- if (self->iriap) {
- net_warn_ratelimited("%s(), busy with a previous query\n",
- __func__);
- return -EBUSY;
- }
-
- self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
- ircomm_tty_getvalue_confirm);
-
- iriap_getvaluebyclass_request(self->iriap,
- self->saddr, self->daddr,
- "IrDA:IrCOMM", "Parameters");
-
- ircomm_tty_start_watchdog_timer(self, 3*HZ);
- ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_PARAMETERS);
- break;
- case IRCOMM_TTY_CONNECT_INDICATION:
- del_timer(&self->watchdog_timer);
-
- /* Accept connection */
- ircomm_connect_response(self->ircomm, NULL);
- ircomm_tty_next_state(self, IRCOMM_TTY_READY);
- break;
- case IRCOMM_TTY_WD_TIMER_EXPIRED:
- /* Just stay idle */
- break;
- case IRCOMM_TTY_DETACH_CABLE:
- ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
- break;
- default:
- pr_debug("%s(), unknown event: %s\n", __func__ ,
- ircomm_tty_event[event]);
- ret = -EINVAL;
- }
- return ret;
-}
-
-/*
- * Function ircomm_tty_state_search (self, event, skb, info)
- *
- * Trying to discover an IrCOMM device
- *
- */
-static int ircomm_tty_state_search(struct ircomm_tty_cb *self,
- IRCOMM_TTY_EVENT event,
- struct sk_buff *skb,
- struct ircomm_tty_info *info)
-{
- int ret = 0;
-
- pr_debug("%s: state=%s, event=%s\n", __func__ ,
- ircomm_tty_state[self->state], ircomm_tty_event[event]);
-
- switch (event) {
- case IRCOMM_TTY_DISCOVERY_INDICATION:
- self->daddr = info->daddr;
- self->saddr = info->saddr;
-
- if (self->iriap) {
- net_warn_ratelimited("%s(), busy with a previous query\n",
- __func__);
- return -EBUSY;
- }
-
- self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
- ircomm_tty_getvalue_confirm);
-
- if (self->service_type == IRCOMM_3_WIRE_RAW) {
- iriap_getvaluebyclass_request(self->iriap, self->saddr,
- self->daddr, "IrLPT",
- "IrDA:IrLMP:LsapSel");
- ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_LSAP_SEL);
- } else {
- iriap_getvaluebyclass_request(self->iriap, self->saddr,
- self->daddr,
- "IrDA:IrCOMM",
- "Parameters");
-
- ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_PARAMETERS);
- }
- ircomm_tty_start_watchdog_timer(self, 3*HZ);
- break;
- case IRCOMM_TTY_CONNECT_INDICATION:
- del_timer(&self->watchdog_timer);
- ircomm_tty_ias_unregister(self);
-
- /* Accept connection */
- ircomm_connect_response(self->ircomm, NULL);
- ircomm_tty_next_state(self, IRCOMM_TTY_READY);
- break;
- case IRCOMM_TTY_WD_TIMER_EXPIRED:
-#if 1
- /* Give up */
-#else
- /* Try to discover any remote devices */
- ircomm_tty_start_watchdog_timer(self, 3*HZ);
- irlmp_discovery_request(DISCOVERY_DEFAULT_SLOTS);
-#endif
- break;
- case IRCOMM_TTY_DETACH_CABLE:
- ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
- break;
- default:
- pr_debug("%s(), unknown event: %s\n", __func__ ,
- ircomm_tty_event[event]);
- ret = -EINVAL;
- }
- return ret;
-}
-
-/*
- * Function ircomm_tty_state_query (self, event, skb, info)
- *
- * Querying the remote LM-IAS for IrCOMM parameters
- *
- */
-static int ircomm_tty_state_query_parameters(struct ircomm_tty_cb *self,
- IRCOMM_TTY_EVENT event,
- struct sk_buff *skb,
- struct ircomm_tty_info *info)
-{
- int ret = 0;
-
- pr_debug("%s: state=%s, event=%s\n", __func__ ,
- ircomm_tty_state[self->state], ircomm_tty_event[event]);
-
- switch (event) {
- case IRCOMM_TTY_GOT_PARAMETERS:
- if (self->iriap) {
- net_warn_ratelimited("%s(), busy with a previous query\n",
- __func__);
- return -EBUSY;
- }
-
- self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
- ircomm_tty_getvalue_confirm);
-
- iriap_getvaluebyclass_request(self->iriap, self->saddr,
- self->daddr, "IrDA:IrCOMM",
- "IrDA:TinyTP:LsapSel");
-
- ircomm_tty_start_watchdog_timer(self, 3*HZ);
- ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_LSAP_SEL);
- break;
- case IRCOMM_TTY_WD_TIMER_EXPIRED:
- /* Go back to search mode */
- ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
- ircomm_tty_start_watchdog_timer(self, 3*HZ);
- break;
- case IRCOMM_TTY_CONNECT_INDICATION:
- del_timer(&self->watchdog_timer);
- ircomm_tty_ias_unregister(self);
-
- /* Accept connection */
- ircomm_connect_response(self->ircomm, NULL);
- ircomm_tty_next_state(self, IRCOMM_TTY_READY);
- break;
- case IRCOMM_TTY_DETACH_CABLE:
- ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
- break;
- default:
- pr_debug("%s(), unknown event: %s\n", __func__ ,
- ircomm_tty_event[event]);
- ret = -EINVAL;
- }
- return ret;
-}
-
-/*
- * Function ircomm_tty_state_query_lsap_sel (self, event, skb, info)
- *
- * Query remote LM-IAS for the LSAP selector which we can connect to
- *
- */
-static int ircomm_tty_state_query_lsap_sel(struct ircomm_tty_cb *self,
- IRCOMM_TTY_EVENT event,
- struct sk_buff *skb,
- struct ircomm_tty_info *info)
-{
- int ret = 0;
-
- pr_debug("%s: state=%s, event=%s\n", __func__ ,
- ircomm_tty_state[self->state], ircomm_tty_event[event]);
-
- switch (event) {
- case IRCOMM_TTY_GOT_LSAPSEL:
- /* Connect to remote device */
- ret = ircomm_connect_request(self->ircomm, self->dlsap_sel,
- self->saddr, self->daddr,
- NULL, self->service_type);
- ircomm_tty_start_watchdog_timer(self, 3*HZ);
- ircomm_tty_next_state(self, IRCOMM_TTY_SETUP);
- break;
- case IRCOMM_TTY_WD_TIMER_EXPIRED:
- /* Go back to search mode */
- ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
- ircomm_tty_start_watchdog_timer(self, 3*HZ);
- break;
- case IRCOMM_TTY_CONNECT_INDICATION:
- del_timer(&self->watchdog_timer);
- ircomm_tty_ias_unregister(self);
-
- /* Accept connection */
- ircomm_connect_response(self->ircomm, NULL);
- ircomm_tty_next_state(self, IRCOMM_TTY_READY);
- break;
- case IRCOMM_TTY_DETACH_CABLE:
- ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
- break;
- default:
- pr_debug("%s(), unknown event: %s\n", __func__ ,
- ircomm_tty_event[event]);
- ret = -EINVAL;
- }
- return ret;
-}
-
-/*
- * Function ircomm_tty_state_setup (self, event, skb, info)
- *
- * Trying to connect
- *
- */
-static int ircomm_tty_state_setup(struct ircomm_tty_cb *self,
- IRCOMM_TTY_EVENT event,
- struct sk_buff *skb,
- struct ircomm_tty_info *info)
-{
- int ret = 0;
-
- pr_debug("%s: state=%s, event=%s\n", __func__ ,
- ircomm_tty_state[self->state], ircomm_tty_event[event]);
-
- switch (event) {
- case IRCOMM_TTY_CONNECT_CONFIRM:
- del_timer(&self->watchdog_timer);
- ircomm_tty_ias_unregister(self);
-
- /*
- * Send initial parameters. This will also send out queued
- * parameters waiting for the connection to come up
- */
- ircomm_tty_send_initial_parameters(self);
- ircomm_tty_link_established(self);
- ircomm_tty_next_state(self, IRCOMM_TTY_READY);
- break;
- case IRCOMM_TTY_CONNECT_INDICATION:
- del_timer(&self->watchdog_timer);
- ircomm_tty_ias_unregister(self);
-
- /* Accept connection */
- ircomm_connect_response(self->ircomm, NULL);
- ircomm_tty_next_state(self, IRCOMM_TTY_READY);
- break;
- case IRCOMM_TTY_WD_TIMER_EXPIRED:
- /* Go back to search mode */
- ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
- ircomm_tty_start_watchdog_timer(self, 3*HZ);
- break;
- case IRCOMM_TTY_DETACH_CABLE:
- /* ircomm_disconnect_request(self->ircomm, NULL); */
- ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
- break;
- default:
- pr_debug("%s(), unknown event: %s\n", __func__ ,
- ircomm_tty_event[event]);
- ret = -EINVAL;
- }
- return ret;
-}
-
-/*
- * Function ircomm_tty_state_ready (self, event, skb, info)
- *
- * IrCOMM is now connected
- *
- */
-static int ircomm_tty_state_ready(struct ircomm_tty_cb *self,
- IRCOMM_TTY_EVENT event,
- struct sk_buff *skb,
- struct ircomm_tty_info *info)
-{
- int ret = 0;
-
- switch (event) {
- case IRCOMM_TTY_DATA_REQUEST:
- ret = ircomm_data_request(self->ircomm, skb);
- break;
- case IRCOMM_TTY_DETACH_CABLE:
- ircomm_disconnect_request(self->ircomm, NULL);
- ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
- break;
- case IRCOMM_TTY_DISCONNECT_INDICATION:
- ircomm_tty_ias_register(self);
- ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
- ircomm_tty_start_watchdog_timer(self, 3*HZ);
-
- if (tty_port_check_carrier(&self->port)) {
- /* Drop carrier */
- self->settings.dce = IRCOMM_DELTA_CD;
- ircomm_tty_check_modem_status(self);
- } else {
- pr_debug("%s(), hanging up!\n", __func__);
- tty_port_tty_hangup(&self->port, false);
- }
- break;
- default:
- pr_debug("%s(), unknown event: %s\n", __func__ ,
- ircomm_tty_event[event]);
- ret = -EINVAL;
- }
- return ret;
-}
-
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c
deleted file mode 100644
index 171c3dee760e..000000000000
--- a/net/irda/ircomm/ircomm_tty_ioctl.c
+++ /dev/null
@@ -1,291 +0,0 @@
-/*********************************************************************
- *
- * Filename: ircomm_tty_ioctl.c
- * Version:
- * Description:
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Thu Jun 10 14:39:09 1999
- * Modified at: Wed Jan 5 14:45:43 2000
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/termios.h>
-#include <linux/tty.h>
-#include <linux/serial.h>
-
-#include <linux/uaccess.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irmod.h>
-
-#include <net/irda/ircomm_core.h>
-#include <net/irda/ircomm_param.h>
-#include <net/irda/ircomm_tty_attach.h>
-#include <net/irda/ircomm_tty.h>
-
-#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
-
-/*
- * Function ircomm_tty_change_speed (driver)
- *
- * Change speed of the driver. If the remote device is a DCE, then this
- * should make it change the speed of its serial port
- */
-static void ircomm_tty_change_speed(struct ircomm_tty_cb *self,
- struct tty_struct *tty)
-{
- unsigned int cflag, cval;
- int baud;
-
- if (!self->ircomm)
- return;
-
- cflag = tty->termios.c_cflag;
-
- /* byte size and parity */
- switch (cflag & CSIZE) {
- case CS5: cval = IRCOMM_WSIZE_5; break;
- case CS6: cval = IRCOMM_WSIZE_6; break;
- case CS7: cval = IRCOMM_WSIZE_7; break;
- case CS8: cval = IRCOMM_WSIZE_8; break;
- default: cval = IRCOMM_WSIZE_5; break;
- }
- if (cflag & CSTOPB)
- cval |= IRCOMM_2_STOP_BIT;
-
- if (cflag & PARENB)
- cval |= IRCOMM_PARITY_ENABLE;
- if (!(cflag & PARODD))
- cval |= IRCOMM_PARITY_EVEN;
-
- /* Determine divisor based on baud rate */
- baud = tty_get_baud_rate(tty);
- if (!baud)
- baud = 9600; /* B0 transition handled in rs_set_termios */
-
- self->settings.data_rate = baud;
- ircomm_param_request(self, IRCOMM_DATA_RATE, FALSE);
-
- /* CTS flow control flag and modem status interrupts */
- tty_port_set_cts_flow(&self->port, cflag & CRTSCTS);
- if (cflag & CRTSCTS) {
- self->settings.flow_control |= IRCOMM_RTS_CTS_IN;
- /* This got me. Bummer. Jean II */
- if (self->service_type == IRCOMM_3_WIRE_RAW)
- net_warn_ratelimited("%s(), enabling RTS/CTS on link that doesn't support it (3-wire-raw)\n",
- __func__);
- } else {
- self->settings.flow_control &= ~IRCOMM_RTS_CTS_IN;
- }
- tty_port_set_check_carrier(&self->port, ~cflag & CLOCAL);
-
- self->settings.data_format = cval;
-
- ircomm_param_request(self, IRCOMM_DATA_FORMAT, FALSE);
- ircomm_param_request(self, IRCOMM_FLOW_CONTROL, TRUE);
-}
-
-/*
- * Function ircomm_tty_set_termios (tty, old_termios)
- *
- * This routine allows the tty driver to be notified when device's
- * termios settings have changed. Note that a well-designed tty driver
- * should be prepared to accept the case where old == NULL, and try to
- * do something rational.
- */
-void ircomm_tty_set_termios(struct tty_struct *tty,
- struct ktermios *old_termios)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
- unsigned int cflag = tty->termios.c_cflag;
-
- if ((cflag == old_termios->c_cflag) &&
- (RELEVANT_IFLAG(tty->termios.c_iflag) ==
- RELEVANT_IFLAG(old_termios->c_iflag)))
- {
- return;
- }
-
- ircomm_tty_change_speed(self, tty);
-
- /* Handle transition to B0 status */
- if ((old_termios->c_cflag & CBAUD) && !(cflag & CBAUD)) {
- self->settings.dte &= ~(IRCOMM_DTR|IRCOMM_RTS);
- ircomm_param_request(self, IRCOMM_DTE, TRUE);
- }
-
- /* Handle transition away from B0 status */
- if (!(old_termios->c_cflag & CBAUD) && (cflag & CBAUD)) {
- self->settings.dte |= IRCOMM_DTR;
- if (!C_CRTSCTS(tty) || !tty_throttled(tty))
- self->settings.dte |= IRCOMM_RTS;
- ircomm_param_request(self, IRCOMM_DTE, TRUE);
- }
-
- /* Handle turning off CRTSCTS */
- if ((old_termios->c_cflag & CRTSCTS) && !C_CRTSCTS(tty))
- {
- tty->hw_stopped = 0;
- ircomm_tty_start(tty);
- }
-}
-
-/*
- * Function ircomm_tty_tiocmget (tty)
- *
- *
- *
- */
-int ircomm_tty_tiocmget(struct tty_struct *tty)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
- unsigned int result;
-
- if (tty_io_error(tty))
- return -EIO;
-
- result = ((self->settings.dte & IRCOMM_RTS) ? TIOCM_RTS : 0)
- | ((self->settings.dte & IRCOMM_DTR) ? TIOCM_DTR : 0)
- | ((self->settings.dce & IRCOMM_CD) ? TIOCM_CAR : 0)
- | ((self->settings.dce & IRCOMM_RI) ? TIOCM_RNG : 0)
- | ((self->settings.dce & IRCOMM_DSR) ? TIOCM_DSR : 0)
- | ((self->settings.dce & IRCOMM_CTS) ? TIOCM_CTS : 0);
- return result;
-}
-
-/*
- * Function ircomm_tty_tiocmset (tty, set, clear)
- *
- *
- *
- */
-int ircomm_tty_tiocmset(struct tty_struct *tty,
- unsigned int set, unsigned int clear)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-
- if (tty_io_error(tty))
- return -EIO;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
- if (set & TIOCM_RTS)
- self->settings.dte |= IRCOMM_RTS;
- if (set & TIOCM_DTR)
- self->settings.dte |= IRCOMM_DTR;
-
- if (clear & TIOCM_RTS)
- self->settings.dte &= ~IRCOMM_RTS;
- if (clear & TIOCM_DTR)
- self->settings.dte &= ~IRCOMM_DTR;
-
- if ((set|clear) & TIOCM_RTS)
- self->settings.dte |= IRCOMM_DELTA_RTS;
- if ((set|clear) & TIOCM_DTR)
- self->settings.dte |= IRCOMM_DELTA_DTR;
-
- ircomm_param_request(self, IRCOMM_DTE, TRUE);
-
- return 0;
-}
-
-/*
- * Function get_serial_info (driver, retinfo)
- *
- *
- *
- */
-static int ircomm_tty_get_serial_info(struct ircomm_tty_cb *self,
- struct serial_struct __user *retinfo)
-{
- struct serial_struct info;
-
- memset(&info, 0, sizeof(info));
- info.line = self->line;
- info.flags = self->port.flags;
- info.baud_base = self->settings.data_rate;
- info.close_delay = self->port.close_delay;
- info.closing_wait = self->port.closing_wait;
-
- /* For compatibility */
- info.type = PORT_16550A;
-
- if (copy_to_user(retinfo, &info, sizeof(*retinfo)))
- return -EFAULT;
-
- return 0;
-}
-
-/*
- * Function set_serial_info (driver, new_info)
- *
- *
- *
- */
-static int ircomm_tty_set_serial_info(struct ircomm_tty_cb *self,
- struct serial_struct __user *new_info)
-{
- return 0;
-}
-
-/*
- * Function ircomm_tty_ioctl (tty, cmd, arg)
- *
- *
- *
- */
-int ircomm_tty_ioctl(struct tty_struct *tty,
- unsigned int cmd, unsigned long arg)
-{
- struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
- int ret = 0;
-
- if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) &&
- (cmd != TIOCSERCONFIG) && (cmd != TIOCSERGSTRUCT) &&
- (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT)) {
- if (tty_io_error(tty))
- return -EIO;
- }
-
- switch (cmd) {
- case TIOCGSERIAL:
- ret = ircomm_tty_get_serial_info(self, (struct serial_struct __user *) arg);
- break;
- case TIOCSSERIAL:
- ret = ircomm_tty_set_serial_info(self, (struct serial_struct __user *) arg);
- break;
- case TIOCMIWAIT:
- pr_debug("(), TIOCMIWAIT, not impl!\n");
- break;
-
- case TIOCGICOUNT:
- pr_debug("%s(), TIOCGICOUNT not impl!\n", __func__);
- return 0;
- default:
- ret = -ENOIOCTLCMD; /* ioctls which we must ignore */
- }
- return ret;
-}
-
-
-
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
deleted file mode 100644
index 890b90d055d5..000000000000
--- a/net/irda/irda_device.c
+++ /dev/null
@@ -1,316 +0,0 @@
-/*********************************************************************
- *
- * Filename: irda_device.c
- * Version: 0.9
- * Description: Utility functions used by the device drivers
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sat Oct 9 09:22:27 1999
- * Modified at: Sun Jan 23 17:41:24 2000
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- * Copyright (c) 2000-2001 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/string.h>
-#include <linux/proc_fs.h>
-#include <linux/skbuff.h>
-#include <linux/capability.h>
-#include <linux/if.h>
-#include <linux/if_ether.h>
-#include <linux/if_arp.h>
-#include <linux/netdevice.h>
-#include <linux/init.h>
-#include <linux/tty.h>
-#include <linux/kmod.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-
-#include <asm/ioctls.h>
-#include <linux/uaccess.h>
-#include <asm/dma.h>
-#include <asm/io.h>
-
-#include <net/irda/irda_device.h>
-#include <net/irda/irlap.h>
-#include <net/irda/timer.h>
-#include <net/irda/wrapper.h>
-
-static void __irda_task_delete(struct irda_task *task);
-
-static hashbin_t *dongles = NULL;
-static hashbin_t *tasks = NULL;
-
-static void irda_task_timer_expired(void *data);
-
-int __init irda_device_init( void)
-{
- dongles = hashbin_new(HB_NOLOCK);
- if (dongles == NULL) {
- net_warn_ratelimited("IrDA: Can't allocate dongles hashbin!\n");
- return -ENOMEM;
- }
- spin_lock_init(&dongles->hb_spinlock);
-
- tasks = hashbin_new(HB_LOCK);
- if (tasks == NULL) {
- net_warn_ratelimited("IrDA: Can't allocate tasks hashbin!\n");
- hashbin_delete(dongles, NULL);
- return -ENOMEM;
- }
-
- /* We no longer initialise the driver ourselves here, we let
- * the system do it for us... - Jean II */
-
- return 0;
-}
-
-static void leftover_dongle(void *arg)
-{
- struct dongle_reg *reg = arg;
- net_warn_ratelimited("IrDA: Dongle type %x not unregistered\n",
- reg->type);
-}
-
-void irda_device_cleanup(void)
-{
- hashbin_delete(tasks, (FREE_FUNC) __irda_task_delete);
-
- hashbin_delete(dongles, leftover_dongle);
-}
-
-/*
- * Function irda_device_set_media_busy (self, status)
- *
- * Called when we have detected that another station is transmitting
- * in contention mode.
- */
-void irda_device_set_media_busy(struct net_device *dev, int status)
-{
- struct irlap_cb *self;
-
- pr_debug("%s(%s)\n", __func__, status ? "TRUE" : "FALSE");
-
- self = (struct irlap_cb *) dev->atalk_ptr;
-
- /* Some drivers may enable the receive interrupt before calling
- * irlap_open(), or they may disable the receive interrupt
- * after calling irlap_close().
- * The IrDA stack is protected from this in irlap_driver_rcv().
- * However, the driver calls directly the wrapper, that calls
- * us directly. Make sure we protect ourselves.
- * Jean II */
- if (!self || self->magic != LAP_MAGIC)
- return;
-
- if (status) {
- self->media_busy = TRUE;
- if (status == SMALL)
- irlap_start_mbusy_timer(self, SMALLBUSY_TIMEOUT);
- else
- irlap_start_mbusy_timer(self, MEDIABUSY_TIMEOUT);
- pr_debug("Media busy!\n");
- } else {
- self->media_busy = FALSE;
- irlap_stop_mbusy_timer(self);
- }
-}
-EXPORT_SYMBOL(irda_device_set_media_busy);
-
-
-/*
- * Function irda_device_is_receiving (dev)
- *
- * Check if the device driver is currently receiving data
- *
- */
-int irda_device_is_receiving(struct net_device *dev)
-{
- struct if_irda_req req;
- int ret;
-
- if (!dev->netdev_ops->ndo_do_ioctl) {
- net_err_ratelimited("%s: do_ioctl not impl. by device driver\n",
- __func__);
- return -1;
- }
-
- ret = (dev->netdev_ops->ndo_do_ioctl)(dev, (struct ifreq *) &req,
- SIOCGRECEIVING);
- if (ret < 0)
- return ret;
-
- return req.ifr_receiving;
-}
-
-static void __irda_task_delete(struct irda_task *task)
-{
- del_timer(&task->timer);
-
- kfree(task);
-}
-
-static void irda_task_delete(struct irda_task *task)
-{
- /* Unregister task */
- hashbin_remove(tasks, (long) task, NULL);
-
- __irda_task_delete(task);
-}
-
-/*
- * Function irda_task_kick (task)
- *
- * Tries to execute a task possible multiple times until the task is either
- * finished, or askes for a timeout. When a task is finished, we do post
- * processing, and notify the parent task, that is waiting for this task
- * to complete.
- */
-static int irda_task_kick(struct irda_task *task)
-{
- int finished = TRUE;
- int count = 0;
- int timeout;
-
- IRDA_ASSERT(task != NULL, return -1;);
- IRDA_ASSERT(task->magic == IRDA_TASK_MAGIC, return -1;);
-
- /* Execute task until it's finished, or askes for a timeout */
- do {
- timeout = task->function(task);
- if (count++ > 100) {
- net_err_ratelimited("%s: error in task handler!\n",
- __func__);
- irda_task_delete(task);
- return TRUE;
- }
- } while ((timeout == 0) && (task->state != IRDA_TASK_DONE));
-
- if (timeout < 0) {
- net_err_ratelimited("%s: Error executing task!\n", __func__);
- irda_task_delete(task);
- return TRUE;
- }
-
- /* Check if we are finished */
- if (task->state == IRDA_TASK_DONE) {
- del_timer(&task->timer);
-
- /* Do post processing */
- if (task->finished)
- task->finished(task);
-
- /* Notify parent */
- if (task->parent) {
- /* Check if parent is waiting for us to complete */
- if (task->parent->state == IRDA_TASK_CHILD_WAIT) {
- task->parent->state = IRDA_TASK_CHILD_DONE;
-
- /* Stop timer now that we are here */
- del_timer(&task->parent->timer);
-
- /* Kick parent task */
- irda_task_kick(task->parent);
- }
- }
- irda_task_delete(task);
- } else if (timeout > 0) {
- irda_start_timer(&task->timer, timeout, (void *) task,
- irda_task_timer_expired);
- finished = FALSE;
- } else {
- pr_debug("%s(), not finished, and no timeout!\n",
- __func__);
- finished = FALSE;
- }
-
- return finished;
-}
-
-/*
- * Function irda_task_timer_expired (data)
- *
- * Task time has expired. We now try to execute task (again), and restart
- * the timer if the task has not finished yet
- */
-static void irda_task_timer_expired(void *data)
-{
- struct irda_task *task;
-
- task = data;
-
- irda_task_kick(task);
-}
-
-/*
- * Function irda_device_setup (dev)
- *
- * This function should be used by low level device drivers in a similar way
- * as ether_setup() is used by normal network device drivers
- */
-static void irda_device_setup(struct net_device *dev)
-{
- dev->hard_header_len = 0;
- dev->addr_len = LAP_ALEN;
-
- dev->type = ARPHRD_IRDA;
- dev->tx_queue_len = 8; /* Window size + 1 s-frame */
-
- memset(dev->broadcast, 0xff, LAP_ALEN);
-
- dev->mtu = 2048;
- dev->flags = IFF_NOARP;
-}
-
-/*
- * Funciton alloc_irdadev
- * Allocates and sets up an IRDA device in a manner similar to
- * alloc_etherdev.
- */
-struct net_device *alloc_irdadev(int sizeof_priv)
-{
- return alloc_netdev(sizeof_priv, "irda%d", NET_NAME_UNKNOWN,
- irda_device_setup);
-}
-EXPORT_SYMBOL(alloc_irdadev);
-
-#ifdef CONFIG_ISA_DMA_API
-/*
- * Function setup_dma (idev, buffer, count, mode)
- *
- * Setup the DMA channel. Commonly used by LPC FIR drivers
- *
- */
-void irda_setup_dma(int channel, dma_addr_t buffer, int count, int mode)
-{
- unsigned long flags;
-
- flags = claim_dma_lock();
-
- disable_dma(channel);
- clear_dma_ff(channel);
- set_dma_mode(channel, mode);
- set_dma_addr(channel, buffer);
- set_dma_count(channel, count);
- enable_dma(channel);
-
- release_dma_lock(flags);
-}
-EXPORT_SYMBOL(irda_setup_dma);
-#endif
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
deleted file mode 100644
index 1138eaf5c682..000000000000
--- a/net/irda/iriap.c
+++ /dev/null
@@ -1,1085 +0,0 @@
-/*********************************************************************
- *
- * Filename: iriap.c
- * Version: 0.8
- * Description: Information Access Protocol (IAP)
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Thu Aug 21 00:02:07 1997
- * Modified at: Sat Dec 25 16:42:42 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>,
- * All Rights Reserved.
- * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-
-#include <asm/byteorder.h>
-#include <asm/unaligned.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irttp.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irias_object.h>
-#include <net/irda/iriap_event.h>
-#include <net/irda/iriap.h>
-
-/* FIXME: This one should go in irlmp.c */
-static const char *const ias_charset_types[] __maybe_unused = {
- "CS_ASCII",
- "CS_ISO_8859_1",
- "CS_ISO_8859_2",
- "CS_ISO_8859_3",
- "CS_ISO_8859_4",
- "CS_ISO_8859_5",
- "CS_ISO_8859_6",
- "CS_ISO_8859_7",
- "CS_ISO_8859_8",
- "CS_ISO_8859_9",
- "CS_UNICODE"
-};
-
-static hashbin_t *iriap = NULL;
-static void *service_handle;
-
-static void __iriap_close(struct iriap_cb *self);
-static int iriap_register_lsap(struct iriap_cb *self, __u8 slsap_sel, int mode);
-static void iriap_disconnect_indication(void *instance, void *sap,
- LM_REASON reason, struct sk_buff *skb);
-static void iriap_connect_indication(void *instance, void *sap,
- struct qos_info *qos, __u32 max_sdu_size,
- __u8 max_header_size,
- struct sk_buff *skb);
-static void iriap_connect_confirm(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_sdu_size, __u8 max_header_size,
- struct sk_buff *skb);
-static int iriap_data_indication(void *instance, void *sap,
- struct sk_buff *skb);
-
-static void iriap_watchdog_timer_expired(void *data);
-
-static inline void iriap_start_watchdog_timer(struct iriap_cb *self,
- int timeout)
-{
- irda_start_timer(&self->watchdog_timer, timeout, self,
- iriap_watchdog_timer_expired);
-}
-
-static struct lock_class_key irias_objects_key;
-
-/*
- * Function iriap_init (void)
- *
- * Initializes the IrIAP layer, called by the module initialization code
- * in irmod.c
- */
-int __init iriap_init(void)
-{
- struct ias_object *obj;
- struct iriap_cb *server;
- __u8 oct_seq[6];
- __u16 hints;
-
- /* Allocate master array */
- iriap = hashbin_new(HB_LOCK);
- if (!iriap)
- return -ENOMEM;
-
- /* Object repository - defined in irias_object.c */
- irias_objects = hashbin_new(HB_LOCK);
- if (!irias_objects) {
- net_warn_ratelimited("%s: Can't allocate irias_objects hashbin!\n",
- __func__);
- hashbin_delete(iriap, NULL);
- return -ENOMEM;
- }
-
- lockdep_set_class_and_name(&irias_objects->hb_spinlock, &irias_objects_key,
- "irias_objects");
-
- /*
- * Register some default services for IrLMP
- */
- hints = irlmp_service_to_hint(S_COMPUTER);
- service_handle = irlmp_register_service(hints);
-
- /* Register the Device object with LM-IAS */
- obj = irias_new_object("Device", IAS_DEVICE_ID);
- irias_add_string_attrib(obj, "DeviceName", "Linux", IAS_KERNEL_ATTR);
-
- oct_seq[0] = 0x01; /* Version 1 */
- oct_seq[1] = 0x00; /* IAS support bits */
- oct_seq[2] = 0x00; /* LM-MUX support bits */
-#ifdef CONFIG_IRDA_ULTRA
- oct_seq[2] |= 0x04; /* Connectionless Data support */
-#endif
- irias_add_octseq_attrib(obj, "IrLMPSupport", oct_seq, 3,
- IAS_KERNEL_ATTR);
- irias_insert_object(obj);
-
- /*
- * Register server support with IrLMP so we can accept incoming
- * connections
- */
- server = iriap_open(LSAP_IAS, IAS_SERVER, NULL, NULL);
- if (!server) {
- pr_debug("%s(), unable to open server\n", __func__);
- return -1;
- }
- iriap_register_lsap(server, LSAP_IAS, IAS_SERVER);
-
- return 0;
-}
-
-/*
- * Function iriap_cleanup (void)
- *
- * Initializes the IrIAP layer, called by the module cleanup code in
- * irmod.c
- */
-void iriap_cleanup(void)
-{
- irlmp_unregister_service(service_handle);
-
- hashbin_delete(iriap, (FREE_FUNC) __iriap_close);
- hashbin_delete(irias_objects, (FREE_FUNC) __irias_delete_object);
-}
-
-/*
- * Function iriap_open (void)
- *
- * Opens an instance of the IrIAP layer, and registers with IrLMP
- */
-struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv,
- CONFIRM_CALLBACK callback)
-{
- struct iriap_cb *self;
-
- self = kzalloc(sizeof(*self), GFP_ATOMIC);
- if (!self)
- return NULL;
-
- /*
- * Initialize instance
- */
-
- self->magic = IAS_MAGIC;
- self->mode = mode;
- if (mode == IAS_CLIENT) {
- if (iriap_register_lsap(self, slsap_sel, mode)) {
- kfree(self);
- return NULL;
- }
- }
-
- self->confirm = callback;
- self->priv = priv;
-
- /* iriap_getvaluebyclass_request() will construct packets before
- * we connect, so this must have a sane value... Jean II */
- self->max_header_size = LMP_MAX_HEADER;
-
- init_timer(&self->watchdog_timer);
-
- hashbin_insert(iriap, (irda_queue_t *) self, (long) self, NULL);
-
- /* Initialize state machines */
- iriap_next_client_state(self, S_DISCONNECT);
- iriap_next_call_state(self, S_MAKE_CALL);
- iriap_next_server_state(self, R_DISCONNECT);
- iriap_next_r_connect_state(self, R_WAITING);
-
- return self;
-}
-EXPORT_SYMBOL(iriap_open);
-
-/*
- * Function __iriap_close (self)
- *
- * Removes (deallocates) the IrIAP instance
- *
- */
-static void __iriap_close(struct iriap_cb *self)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- del_timer(&self->watchdog_timer);
-
- if (self->request_skb)
- dev_kfree_skb(self->request_skb);
-
- self->magic = 0;
-
- kfree(self);
-}
-
-/*
- * Function iriap_close (void)
- *
- * Closes IrIAP and deregisters with IrLMP
- */
-void iriap_close(struct iriap_cb *self)
-{
- struct iriap_cb *entry;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- if (self->lsap) {
- irlmp_close_lsap(self->lsap);
- self->lsap = NULL;
- }
-
- entry = (struct iriap_cb *) hashbin_remove(iriap, (long) self, NULL);
- IRDA_ASSERT(entry == self, return;);
-
- __iriap_close(self);
-}
-EXPORT_SYMBOL(iriap_close);
-
-static int iriap_register_lsap(struct iriap_cb *self, __u8 slsap_sel, int mode)
-{
- notify_t notify;
-
- irda_notify_init(&notify);
- notify.connect_confirm = iriap_connect_confirm;
- notify.connect_indication = iriap_connect_indication;
- notify.disconnect_indication = iriap_disconnect_indication;
- notify.data_indication = iriap_data_indication;
- notify.instance = self;
- if (mode == IAS_CLIENT)
- strcpy(notify.name, "IrIAS cli");
- else
- strcpy(notify.name, "IrIAS srv");
-
- self->lsap = irlmp_open_lsap(slsap_sel, &notify, 0);
- if (self->lsap == NULL) {
- net_err_ratelimited("%s: Unable to allocated LSAP!\n",
- __func__);
- return -1;
- }
- self->slsap_sel = self->lsap->slsap_sel;
-
- return 0;
-}
-
-/*
- * Function iriap_disconnect_indication (handle, reason)
- *
- * Got disconnect, so clean up everything associated with this connection
- *
- */
-static void iriap_disconnect_indication(void *instance, void *sap,
- LM_REASON reason,
- struct sk_buff *skb)
-{
- struct iriap_cb *self;
-
- pr_debug("%s(), reason=%s [%d]\n", __func__,
- irlmp_reason_str(reason), reason);
-
- self = instance;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- IRDA_ASSERT(iriap != NULL, return;);
-
- del_timer(&self->watchdog_timer);
-
- /* Not needed */
- if (skb)
- dev_kfree_skb(skb);
-
- if (self->mode == IAS_CLIENT) {
- pr_debug("%s(), disconnect as client\n", __func__);
-
-
- iriap_do_client_event(self, IAP_LM_DISCONNECT_INDICATION,
- NULL);
- /*
- * Inform service user that the request failed by sending
- * it a NULL value. Warning, the client might close us, so
- * remember no to use self anymore after calling confirm
- */
- if (self->confirm)
- self->confirm(IAS_DISCONNECT, 0, NULL, self->priv);
- } else {
- pr_debug("%s(), disconnect as server\n", __func__);
- iriap_do_server_event(self, IAP_LM_DISCONNECT_INDICATION,
- NULL);
- iriap_close(self);
- }
-}
-
-/*
- * Function iriap_disconnect_request (handle)
- */
-static void iriap_disconnect_request(struct iriap_cb *self)
-{
- struct sk_buff *tx_skb;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
- if (tx_skb == NULL) {
- pr_debug("%s(), Could not allocate an sk_buff of length %d\n",
- __func__, LMP_MAX_HEADER);
- return;
- }
-
- /*
- * Reserve space for MUX control and LAP header
- */
- skb_reserve(tx_skb, LMP_MAX_HEADER);
-
- irlmp_disconnect_request(self->lsap, tx_skb);
-}
-
-/*
- * Function iriap_getvaluebyclass (addr, name, attr)
- *
- * Retrieve all values from attribute in all objects with given class
- * name
- */
-int iriap_getvaluebyclass_request(struct iriap_cb *self,
- __u32 saddr, __u32 daddr,
- char *name, char *attr)
-{
- struct sk_buff *tx_skb;
- int name_len, attr_len, skb_len;
- __u8 *frame;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return -1;);
-
- /* Client must supply the destination device address */
- if (!daddr)
- return -1;
-
- self->daddr = daddr;
- self->saddr = saddr;
-
- /*
- * Save operation, so we know what the later indication is about
- */
- self->operation = GET_VALUE_BY_CLASS;
-
- /* Give ourselves 10 secs to finish this operation */
- iriap_start_watchdog_timer(self, 10*HZ);
-
- name_len = strlen(name); /* Up to IAS_MAX_CLASSNAME = 60 */
- attr_len = strlen(attr); /* Up to IAS_MAX_ATTRIBNAME = 60 */
-
- skb_len = self->max_header_size+2+name_len+1+attr_len+4;
- tx_skb = alloc_skb(skb_len, GFP_ATOMIC);
- if (!tx_skb)
- return -ENOMEM;
-
- /* Reserve space for MUX and LAP header */
- skb_reserve(tx_skb, self->max_header_size);
- skb_put(tx_skb, 3+name_len+attr_len);
- frame = tx_skb->data;
-
- /* Build frame */
- frame[0] = IAP_LST | GET_VALUE_BY_CLASS;
- frame[1] = name_len; /* Insert length of name */
- memcpy(frame+2, name, name_len); /* Insert name */
- frame[2+name_len] = attr_len; /* Insert length of attr */
- memcpy(frame+3+name_len, attr, attr_len); /* Insert attr */
-
- iriap_do_client_event(self, IAP_CALL_REQUEST_GVBC, tx_skb);
-
- /* Drop reference count - see state_s_disconnect(). */
- dev_kfree_skb(tx_skb);
-
- return 0;
-}
-EXPORT_SYMBOL(iriap_getvaluebyclass_request);
-
-/*
- * Function iriap_getvaluebyclass_confirm (self, skb)
- *
- * Got result from GetValueByClass command. Parse it and return result
- * to service user.
- *
- */
-static void iriap_getvaluebyclass_confirm(struct iriap_cb *self,
- struct sk_buff *skb)
-{
- struct ias_value *value;
- int charset;
- __u32 value_len;
- __u32 tmp_cpu32;
- __u16 obj_id;
- __u16 len;
- __u8 type;
- __u8 *fp;
- int n;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
-
- /* Initialize variables */
- fp = skb->data;
- n = 2;
-
- /* Get length, MSB first */
- len = get_unaligned_be16(fp + n);
- n += 2;
-
- pr_debug("%s(), len=%d\n", __func__, len);
-
- /* Get object ID, MSB first */
- obj_id = get_unaligned_be16(fp + n);
- n += 2;
-
- type = fp[n++];
- pr_debug("%s(), Value type = %d\n", __func__, type);
-
- switch (type) {
- case IAS_INTEGER:
- memcpy(&tmp_cpu32, fp+n, 4); n += 4;
- be32_to_cpus(&tmp_cpu32);
- value = irias_new_integer_value(tmp_cpu32);
-
- /* Legal values restricted to 0x01-0x6f, page 15 irttp */
- pr_debug("%s(), lsap=%d\n", __func__, value->t.integer);
- break;
- case IAS_STRING:
- charset = fp[n++];
-
- switch (charset) {
- case CS_ASCII:
- break;
-/* case CS_ISO_8859_1: */
-/* case CS_ISO_8859_2: */
-/* case CS_ISO_8859_3: */
-/* case CS_ISO_8859_4: */
-/* case CS_ISO_8859_5: */
-/* case CS_ISO_8859_6: */
-/* case CS_ISO_8859_7: */
-/* case CS_ISO_8859_8: */
-/* case CS_ISO_8859_9: */
-/* case CS_UNICODE: */
- default:
- pr_debug("%s(), charset [%d] %s, not supported\n",
- __func__, charset,
- charset < ARRAY_SIZE(ias_charset_types) ?
- ias_charset_types[charset] :
- "(unknown)");
-
- /* Aborting, close connection! */
- iriap_disconnect_request(self);
- return;
- /* break; */
- }
- value_len = fp[n++];
- pr_debug("%s(), strlen=%d\n", __func__, value_len);
-
- /* Make sure the string is null-terminated */
- if (n + value_len < skb->len)
- fp[n + value_len] = 0x00;
- pr_debug("Got string %s\n", fp+n);
-
- /* Will truncate to IAS_MAX_STRING bytes */
- value = irias_new_string_value(fp+n);
- break;
- case IAS_OCT_SEQ:
- value_len = get_unaligned_be16(fp + n);
- n += 2;
-
- /* Will truncate to IAS_MAX_OCTET_STRING bytes */
- value = irias_new_octseq_value(fp+n, value_len);
- break;
- default:
- value = irias_new_missing_value();
- break;
- }
-
- /* Finished, close connection! */
- iriap_disconnect_request(self);
-
- /* Warning, the client might close us, so remember no to use self
- * anymore after calling confirm
- */
- if (self->confirm)
- self->confirm(IAS_SUCCESS, obj_id, value, self->priv);
- else {
- pr_debug("%s(), missing handler!\n", __func__);
- irias_delete_value(value);
- }
-}
-
-/*
- * Function iriap_getvaluebyclass_response ()
- *
- * Send answer back to remote LM-IAS
- *
- */
-static void iriap_getvaluebyclass_response(struct iriap_cb *self,
- __u16 obj_id,
- __u8 ret_code,
- struct ias_value *value)
-{
- struct sk_buff *tx_skb;
- int n;
- __be32 tmp_be32;
- __be16 tmp_be16;
- __u8 *fp;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
- IRDA_ASSERT(value != NULL, return;);
- IRDA_ASSERT(value->len <= 1024, return;);
-
- /* Initialize variables */
- n = 0;
-
- /*
- * We must adjust the size of the response after the length of the
- * value. We add 32 bytes because of the 6 bytes for the frame and
- * max 5 bytes for the value coding.
- */
- tx_skb = alloc_skb(value->len + self->max_header_size + 32,
- GFP_ATOMIC);
- if (!tx_skb)
- return;
-
- /* Reserve space for MUX and LAP header */
- skb_reserve(tx_skb, self->max_header_size);
- skb_put(tx_skb, 6);
-
- fp = tx_skb->data;
-
- /* Build frame */
- fp[n++] = GET_VALUE_BY_CLASS | IAP_LST;
- fp[n++] = ret_code;
-
- /* Insert list length (MSB first) */
- tmp_be16 = htons(0x0001);
- memcpy(fp+n, &tmp_be16, 2); n += 2;
-
- /* Insert object identifier ( MSB first) */
- tmp_be16 = cpu_to_be16(obj_id);
- memcpy(fp+n, &tmp_be16, 2); n += 2;
-
- switch (value->type) {
- case IAS_STRING:
- skb_put(tx_skb, 3 + value->len);
- fp[n++] = value->type;
- fp[n++] = 0; /* ASCII */
- fp[n++] = (__u8) value->len;
- memcpy(fp+n, value->t.string, value->len); n+=value->len;
- break;
- case IAS_INTEGER:
- skb_put(tx_skb, 5);
- fp[n++] = value->type;
-
- tmp_be32 = cpu_to_be32(value->t.integer);
- memcpy(fp+n, &tmp_be32, 4); n += 4;
- break;
- case IAS_OCT_SEQ:
- skb_put(tx_skb, 3 + value->len);
- fp[n++] = value->type;
-
- tmp_be16 = cpu_to_be16(value->len);
- memcpy(fp+n, &tmp_be16, 2); n += 2;
- memcpy(fp+n, value->t.oct_seq, value->len); n+=value->len;
- break;
- case IAS_MISSING:
- pr_debug("%s: sending IAS_MISSING\n", __func__);
- skb_put(tx_skb, 1);
- fp[n++] = value->type;
- break;
- default:
- pr_debug("%s(), type not implemented!\n", __func__);
- break;
- }
- iriap_do_r_connect_event(self, IAP_CALL_RESPONSE, tx_skb);
-
- /* Drop reference count - see state_r_execute(). */
- dev_kfree_skb(tx_skb);
-}
-
-/*
- * Function iriap_getvaluebyclass_indication (self, skb)
- *
- * getvaluebyclass is requested from peer LM-IAS
- *
- */
-static void iriap_getvaluebyclass_indication(struct iriap_cb *self,
- struct sk_buff *skb)
-{
- struct ias_object *obj;
- struct ias_attrib *attrib;
- int name_len;
- int attr_len;
- char name[IAS_MAX_CLASSNAME + 1]; /* 60 bytes */
- char attr[IAS_MAX_ATTRIBNAME + 1]; /* 60 bytes */
- __u8 *fp;
- int n;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
-
- fp = skb->data;
- n = 1;
-
- name_len = fp[n++];
-
- IRDA_ASSERT(name_len < IAS_MAX_CLASSNAME + 1, return;);
-
- memcpy(name, fp+n, name_len); n+=name_len;
- name[name_len] = '\0';
-
- attr_len = fp[n++];
-
- IRDA_ASSERT(attr_len < IAS_MAX_ATTRIBNAME + 1, return;);
-
- memcpy(attr, fp+n, attr_len); n+=attr_len;
- attr[attr_len] = '\0';
-
- pr_debug("LM-IAS: Looking up %s: %s\n", name, attr);
- obj = irias_find_object(name);
-
- if (obj == NULL) {
- pr_debug("LM-IAS: Object %s not found\n", name);
- iriap_getvaluebyclass_response(self, 0x1235, IAS_CLASS_UNKNOWN,
- &irias_missing);
- return;
- }
- pr_debug("LM-IAS: found %s, id=%d\n", obj->name, obj->id);
-
- attrib = irias_find_attrib(obj, attr);
- if (attrib == NULL) {
- pr_debug("LM-IAS: Attribute %s not found\n", attr);
- iriap_getvaluebyclass_response(self, obj->id,
- IAS_ATTRIB_UNKNOWN,
- &irias_missing);
- return;
- }
-
- /* We have a match; send the value. */
- iriap_getvaluebyclass_response(self, obj->id, IAS_SUCCESS,
- attrib->value);
-}
-
-/*
- * Function iriap_send_ack (void)
- *
- * Currently not used
- *
- */
-void iriap_send_ack(struct iriap_cb *self)
-{
- struct sk_buff *tx_skb;
- __u8 *frame;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- tx_skb = alloc_skb(LMP_MAX_HEADER + 1, GFP_ATOMIC);
- if (!tx_skb)
- return;
-
- /* Reserve space for MUX and LAP header */
- skb_reserve(tx_skb, self->max_header_size);
- skb_put(tx_skb, 1);
- frame = tx_skb->data;
-
- /* Build frame */
- frame[0] = IAP_LST | IAP_ACK | self->operation;
-
- irlmp_data_request(self->lsap, tx_skb);
-}
-
-void iriap_connect_request(struct iriap_cb *self)
-{
- int ret;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- ret = irlmp_connect_request(self->lsap, LSAP_IAS,
- self->saddr, self->daddr,
- NULL, NULL);
- if (ret < 0) {
- pr_debug("%s(), connect failed!\n", __func__);
- self->confirm(IAS_DISCONNECT, 0, NULL, self->priv);
- }
-}
-
-/*
- * Function iriap_connect_confirm (handle, skb)
- *
- * LSAP connection confirmed!
- *
- */
-static void iriap_connect_confirm(void *instance, void *sap,
- struct qos_info *qos, __u32 max_seg_size,
- __u8 max_header_size,
- struct sk_buff *skb)
-{
- struct iriap_cb *self;
-
- self = instance;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
-
- self->max_data_size = max_seg_size;
- self->max_header_size = max_header_size;
-
- del_timer(&self->watchdog_timer);
-
- iriap_do_client_event(self, IAP_LM_CONNECT_CONFIRM, skb);
-
- /* Drop reference count - see state_s_make_call(). */
- dev_kfree_skb(skb);
-}
-
-/*
- * Function iriap_connect_indication ( handle, skb)
- *
- * Remote LM-IAS is requesting connection
- *
- */
-static void iriap_connect_indication(void *instance, void *sap,
- struct qos_info *qos, __u32 max_seg_size,
- __u8 max_header_size,
- struct sk_buff *skb)
-{
- struct iriap_cb *self, *new;
-
- self = instance;
-
- IRDA_ASSERT(skb != NULL, return;);
- IRDA_ASSERT(self != NULL, goto out;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, goto out;);
-
- /* Start new server */
- new = iriap_open(LSAP_IAS, IAS_SERVER, NULL, NULL);
- if (!new) {
- pr_debug("%s(), open failed\n", __func__);
- goto out;
- }
-
- /* Now attach up the new "socket" */
- new->lsap = irlmp_dup(self->lsap, new);
- if (!new->lsap) {
- pr_debug("%s(), dup failed!\n", __func__);
- goto out;
- }
-
- new->max_data_size = max_seg_size;
- new->max_header_size = max_header_size;
-
- /* Clean up the original one to keep it in listen state */
- irlmp_listen(self->lsap);
-
- iriap_do_server_event(new, IAP_LM_CONNECT_INDICATION, skb);
-
-out:
- /* Drop reference count - see state_r_disconnect(). */
- dev_kfree_skb(skb);
-}
-
-/*
- * Function iriap_data_indication (handle, skb)
- *
- * Receives data from connection identified by handle from IrLMP
- *
- */
-static int iriap_data_indication(void *instance, void *sap,
- struct sk_buff *skb)
-{
- struct iriap_cb *self;
- __u8 *frame;
- __u8 opcode;
-
- self = instance;
-
- IRDA_ASSERT(skb != NULL, return 0;);
- IRDA_ASSERT(self != NULL, goto out;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, goto out;);
-
- frame = skb->data;
-
- if (self->mode == IAS_SERVER) {
- /* Call server */
- pr_debug("%s(), Calling server!\n", __func__);
- iriap_do_r_connect_event(self, IAP_RECV_F_LST, skb);
- goto out;
- }
- opcode = frame[0];
- if (~opcode & IAP_LST) {
- net_warn_ratelimited("%s:, IrIAS multiframe commands or results is not implemented yet!\n",
- __func__);
- goto out;
- }
-
- /* Check for ack frames since they don't contain any data */
- if (opcode & IAP_ACK) {
- pr_debug("%s() Got ack frame!\n", __func__);
- goto out;
- }
-
- opcode &= ~IAP_LST; /* Mask away LST bit */
-
- switch (opcode) {
- case GET_INFO_BASE:
- pr_debug("IrLMP GetInfoBaseDetails not implemented!\n");
- break;
- case GET_VALUE_BY_CLASS:
- iriap_do_call_event(self, IAP_RECV_F_LST, NULL);
-
- switch (frame[1]) {
- case IAS_SUCCESS:
- iriap_getvaluebyclass_confirm(self, skb);
- break;
- case IAS_CLASS_UNKNOWN:
- pr_debug("%s(), No such class!\n", __func__);
- /* Finished, close connection! */
- iriap_disconnect_request(self);
-
- /*
- * Warning, the client might close us, so remember
- * no to use self anymore after calling confirm
- */
- if (self->confirm)
- self->confirm(IAS_CLASS_UNKNOWN, 0, NULL,
- self->priv);
- break;
- case IAS_ATTRIB_UNKNOWN:
- pr_debug("%s(), No such attribute!\n", __func__);
- /* Finished, close connection! */
- iriap_disconnect_request(self);
-
- /*
- * Warning, the client might close us, so remember
- * no to use self anymore after calling confirm
- */
- if (self->confirm)
- self->confirm(IAS_ATTRIB_UNKNOWN, 0, NULL,
- self->priv);
- break;
- }
- break;
- default:
- pr_debug("%s(), Unknown op-code: %02x\n", __func__,
- opcode);
- break;
- }
-
-out:
- /* Cleanup - sub-calls will have done skb_get() as needed. */
- dev_kfree_skb(skb);
- return 0;
-}
-
-/*
- * Function iriap_call_indication (self, skb)
- *
- * Received call to server from peer LM-IAS
- *
- */
-void iriap_call_indication(struct iriap_cb *self, struct sk_buff *skb)
-{
- __u8 *fp;
- __u8 opcode;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
-
- fp = skb->data;
-
- opcode = fp[0];
- if (~opcode & 0x80) {
- net_warn_ratelimited("%s: IrIAS multiframe commands or results is not implemented yet!\n",
- __func__);
- return;
- }
- opcode &= 0x7f; /* Mask away LST bit */
-
- switch (opcode) {
- case GET_INFO_BASE:
- net_warn_ratelimited("%s: GetInfoBaseDetails not implemented yet!\n",
- __func__);
- break;
- case GET_VALUE_BY_CLASS:
- iriap_getvaluebyclass_indication(self, skb);
- break;
- }
- /* skb will be cleaned up in iriap_data_indication */
-}
-
-/*
- * Function iriap_watchdog_timer_expired (data)
- *
- * Query has taken too long time, so abort
- *
- */
-static void iriap_watchdog_timer_expired(void *data)
-{
- struct iriap_cb *self = (struct iriap_cb *) data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- /* iriap_close(self); */
-}
-
-#ifdef CONFIG_PROC_FS
-
-static const char *const ias_value_types[] = {
- "IAS_MISSING",
- "IAS_INTEGER",
- "IAS_OCT_SEQ",
- "IAS_STRING"
-};
-
-static inline struct ias_object *irias_seq_idx(loff_t pos)
-{
- struct ias_object *obj;
-
- for (obj = (struct ias_object *) hashbin_get_first(irias_objects);
- obj; obj = (struct ias_object *) hashbin_get_next(irias_objects)) {
- if (pos-- == 0)
- break;
- }
-
- return obj;
-}
-
-static void *irias_seq_start(struct seq_file *seq, loff_t *pos)
-{
- spin_lock_irq(&irias_objects->hb_spinlock);
-
- return *pos ? irias_seq_idx(*pos - 1) : SEQ_START_TOKEN;
-}
-
-static void *irias_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- ++*pos;
-
- return (v == SEQ_START_TOKEN)
- ? (void *) hashbin_get_first(irias_objects)
- : (void *) hashbin_get_next(irias_objects);
-}
-
-static void irias_seq_stop(struct seq_file *seq, void *v)
-{
- spin_unlock_irq(&irias_objects->hb_spinlock);
-}
-
-static int irias_seq_show(struct seq_file *seq, void *v)
-{
- if (v == SEQ_START_TOKEN)
- seq_puts(seq, "LM-IAS Objects:\n");
- else {
- struct ias_object *obj = v;
- struct ias_attrib *attrib;
-
- IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return -EINVAL;);
-
- seq_printf(seq, "name: %s, id=%d\n",
- obj->name, obj->id);
-
- /* Careful for priority inversions here !
- * All other uses of attrib spinlock are independent of
- * the object spinlock, so we are safe. Jean II */
- spin_lock(&obj->attribs->hb_spinlock);
-
- /* List all attributes for this object */
- for (attrib = (struct ias_attrib *) hashbin_get_first(obj->attribs);
- attrib != NULL;
- attrib = (struct ias_attrib *) hashbin_get_next(obj->attribs)) {
-
- IRDA_ASSERT(attrib->magic == IAS_ATTRIB_MAGIC,
- goto outloop; );
-
- seq_printf(seq, " - Attribute name: \"%s\", ",
- attrib->name);
- seq_printf(seq, "value[%s]: ",
- ias_value_types[attrib->value->type]);
-
- switch (attrib->value->type) {
- case IAS_INTEGER:
- seq_printf(seq, "%d\n",
- attrib->value->t.integer);
- break;
- case IAS_STRING:
- seq_printf(seq, "\"%s\"\n",
- attrib->value->t.string);
- break;
- case IAS_OCT_SEQ:
- seq_printf(seq, "octet sequence (%d bytes)\n",
- attrib->value->len);
- break;
- case IAS_MISSING:
- seq_puts(seq, "missing\n");
- break;
- default:
- seq_printf(seq, "type %d?\n",
- attrib->value->type);
- }
- seq_putc(seq, '\n');
-
- }
- IRDA_ASSERT_LABEL(outloop:)
- spin_unlock(&obj->attribs->hb_spinlock);
- }
-
- return 0;
-}
-
-static const struct seq_operations irias_seq_ops = {
- .start = irias_seq_start,
- .next = irias_seq_next,
- .stop = irias_seq_stop,
- .show = irias_seq_show,
-};
-
-static int irias_seq_open(struct inode *inode, struct file *file)
-{
- IRDA_ASSERT( irias_objects != NULL, return -EINVAL;);
-
- return seq_open(file, &irias_seq_ops);
-}
-
-const struct file_operations irias_seq_fops = {
- .owner = THIS_MODULE,
- .open = irias_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-#endif /* PROC_FS */
diff --git a/net/irda/iriap_event.c b/net/irda/iriap_event.c
deleted file mode 100644
index e6098b2e048a..000000000000
--- a/net/irda/iriap_event.c
+++ /dev/null
@@ -1,496 +0,0 @@
-/*********************************************************************
- *
- * Filename: iriap_event.c
- * Version: 0.1
- * Description: IAP Finite State Machine
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Thu Aug 21 00:02:07 1997
- * Modified at: Wed Mar 1 11:28:34 2000
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1997, 1999-2000 Dag Brattli <dagb@cs.uit.no>,
- * All Rights Reserved.
- * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/slab.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/iriap.h>
-#include <net/irda/iriap_event.h>
-
-static void state_s_disconnect (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-static void state_s_connecting (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-static void state_s_call (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-
-static void state_s_make_call (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-static void state_s_calling (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-static void state_s_outstanding (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-static void state_s_replying (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-static void state_s_wait_for_call(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-static void state_s_wait_active (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-
-static void state_r_disconnect (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-static void state_r_call (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-static void state_r_waiting (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-static void state_r_wait_active (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-static void state_r_receiving (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-static void state_r_execute (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-static void state_r_returning (struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb);
-
-static void (*iriap_state[])(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb) = {
- /* Client FSM */
- state_s_disconnect,
- state_s_connecting,
- state_s_call,
-
- /* S-Call FSM */
- state_s_make_call,
- state_s_calling,
- state_s_outstanding,
- state_s_replying,
- state_s_wait_for_call,
- state_s_wait_active,
-
- /* Server FSM */
- state_r_disconnect,
- state_r_call,
-
- /* R-Connect FSM */
- state_r_waiting,
- state_r_wait_active,
- state_r_receiving,
- state_r_execute,
- state_r_returning,
-};
-
-void iriap_next_client_state(struct iriap_cb *self, IRIAP_STATE state)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- self->client_state = state;
-}
-
-void iriap_next_call_state(struct iriap_cb *self, IRIAP_STATE state)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- self->call_state = state;
-}
-
-void iriap_next_server_state(struct iriap_cb *self, IRIAP_STATE state)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- self->server_state = state;
-}
-
-void iriap_next_r_connect_state(struct iriap_cb *self, IRIAP_STATE state)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- self->r_connect_state = state;
-}
-
-void iriap_do_client_event(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- (*iriap_state[ self->client_state]) (self, event, skb);
-}
-
-void iriap_do_call_event(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- (*iriap_state[ self->call_state]) (self, event, skb);
-}
-
-void iriap_do_server_event(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- (*iriap_state[ self->server_state]) (self, event, skb);
-}
-
-void iriap_do_r_connect_event(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- (*iriap_state[ self->r_connect_state]) (self, event, skb);
-}
-
-
-/*
- * Function state_s_disconnect (event, skb)
- *
- * S-Disconnect, The device has no LSAP connection to a particular
- * remote device.
- */
-static void state_s_disconnect(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- switch (event) {
- case IAP_CALL_REQUEST_GVBC:
- iriap_next_client_state(self, S_CONNECTING);
- IRDA_ASSERT(self->request_skb == NULL, return;);
- /* Don't forget to refcount it -
- * see iriap_getvaluebyclass_request(). */
- skb_get(skb);
- self->request_skb = skb;
- iriap_connect_request(self);
- break;
- case IAP_LM_DISCONNECT_INDICATION:
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__, event);
- break;
- }
-}
-
-/*
- * Function state_s_connecting (self, event, skb)
- *
- * S-Connecting
- *
- */
-static void state_s_connecting(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- switch (event) {
- case IAP_LM_CONNECT_CONFIRM:
- /*
- * Jump to S-Call FSM
- */
- iriap_do_call_event(self, IAP_CALL_REQUEST, skb);
- /* iriap_call_request(self, 0,0,0); */
- iriap_next_client_state(self, S_CALL);
- break;
- case IAP_LM_DISCONNECT_INDICATION:
- /* Abort calls */
- iriap_next_call_state(self, S_MAKE_CALL);
- iriap_next_client_state(self, S_DISCONNECT);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__, event);
- break;
- }
-}
-
-/*
- * Function state_s_call (self, event, skb)
- *
- * S-Call, The device can process calls to a specific remote
- * device. Whenever the LSAP connection is disconnected, this state
- * catches that event and clears up
- */
-static void state_s_call(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
-
- switch (event) {
- case IAP_LM_DISCONNECT_INDICATION:
- /* Abort calls */
- iriap_next_call_state(self, S_MAKE_CALL);
- iriap_next_client_state(self, S_DISCONNECT);
- break;
- default:
- pr_debug("state_s_call: Unknown event %d\n", event);
- break;
- }
-}
-
-/*
- * Function state_s_make_call (event, skb)
- *
- * S-Make-Call
- *
- */
-static void state_s_make_call(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- struct sk_buff *tx_skb;
-
- IRDA_ASSERT(self != NULL, return;);
-
- switch (event) {
- case IAP_CALL_REQUEST:
- /* Already refcounted - see state_s_disconnect() */
- tx_skb = self->request_skb;
- self->request_skb = NULL;
-
- irlmp_data_request(self->lsap, tx_skb);
- iriap_next_call_state(self, S_OUTSTANDING);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__, event);
- break;
- }
-}
-
-/*
- * Function state_s_calling (event, skb)
- *
- * S-Calling
- *
- */
-static void state_s_calling(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- pr_debug("%s(), Not implemented\n", __func__);
-}
-
-/*
- * Function state_s_outstanding (event, skb)
- *
- * S-Outstanding, The device is waiting for a response to a command
- *
- */
-static void state_s_outstanding(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
-
- switch (event) {
- case IAP_RECV_F_LST:
- /*iriap_send_ack(self);*/
- /*LM_Idle_request(idle); */
-
- iriap_next_call_state(self, S_WAIT_FOR_CALL);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__, event);
- break;
- }
-}
-
-/*
- * Function state_s_replying (event, skb)
- *
- * S-Replying, The device is collecting a multiple part response
- */
-static void state_s_replying(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- pr_debug("%s(), Not implemented\n", __func__);
-}
-
-/*
- * Function state_s_wait_for_call (event, skb)
- *
- * S-Wait-for-Call
- *
- */
-static void state_s_wait_for_call(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- pr_debug("%s(), Not implemented\n", __func__);
-}
-
-
-/*
- * Function state_s_wait_active (event, skb)
- *
- * S-Wait-Active
- *
- */
-static void state_s_wait_active(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- pr_debug("%s(), Not implemented\n", __func__);
-}
-
-/**************************************************************************
- *
- * Server FSM
- *
- **************************************************************************/
-
-/*
- * Function state_r_disconnect (self, event, skb)
- *
- * LM-IAS server is disconnected (not processing any requests!)
- *
- */
-static void state_r_disconnect(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- struct sk_buff *tx_skb;
-
- switch (event) {
- case IAP_LM_CONNECT_INDICATION:
- tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
- if (tx_skb == NULL)
- return;
-
- /* Reserve space for MUX_CONTROL and LAP header */
- skb_reserve(tx_skb, LMP_MAX_HEADER);
-
- irlmp_connect_response(self->lsap, tx_skb);
- /*LM_Idle_request(idle); */
-
- iriap_next_server_state(self, R_CALL);
-
- /*
- * Jump to R-Connect FSM, we skip R-Waiting since we do not
- * care about LM_Idle_request()!
- */
- iriap_next_r_connect_state(self, R_RECEIVING);
- break;
- default:
- pr_debug("%s(), unknown event %d\n", __func__, event);
- break;
- }
-}
-
-/*
- * Function state_r_call (self, event, skb)
- */
-static void state_r_call(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- switch (event) {
- case IAP_LM_DISCONNECT_INDICATION:
- /* Abort call */
- iriap_next_server_state(self, R_DISCONNECT);
- iriap_next_r_connect_state(self, R_WAITING);
- break;
- default:
- pr_debug("%s(), unknown event!\n", __func__);
- break;
- }
-}
-
-/*
- * R-Connect FSM
- */
-
-/*
- * Function state_r_waiting (self, event, skb)
- */
-static void state_r_waiting(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- pr_debug("%s(), Not implemented\n", __func__);
-}
-
-static void state_r_wait_active(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- pr_debug("%s(), Not implemented\n", __func__);
-}
-
-/*
- * Function state_r_receiving (self, event, skb)
- *
- * We are receiving a command
- *
- */
-static void state_r_receiving(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- switch (event) {
- case IAP_RECV_F_LST:
- iriap_next_r_connect_state(self, R_EXECUTE);
-
- iriap_call_indication(self, skb);
- break;
- default:
- pr_debug("%s(), unknown event!\n", __func__);
- break;
- }
-}
-
-/*
- * Function state_r_execute (self, event, skb)
- *
- * The server is processing the request
- *
- */
-static void state_r_execute(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(skb != NULL, return;);
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
- switch (event) {
- case IAP_CALL_RESPONSE:
- /*
- * Since we don't implement the Waiting state, we return
- * to state Receiving instead, DB.
- */
- iriap_next_r_connect_state(self, R_RECEIVING);
-
- /* Don't forget to refcount it - see
- * iriap_getvaluebyclass_response(). */
- skb_get(skb);
-
- irlmp_data_request(self->lsap, skb);
- break;
- default:
- pr_debug("%s(), unknown event!\n", __func__);
- break;
- }
-}
-
-static void state_r_returning(struct iriap_cb *self, IRIAP_EVENT event,
- struct sk_buff *skb)
-{
- pr_debug("%s(), event=%d\n", __func__, event);
-
- switch (event) {
- case IAP_RECV_F_LST:
- break;
- default:
- break;
- }
-}
diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c
deleted file mode 100644
index 53b86d0e1630..000000000000
--- a/net/irda/irias_object.c
+++ /dev/null
@@ -1,555 +0,0 @@
-/*********************************************************************
- *
- * Filename: irias_object.c
- * Version: 0.3
- * Description: IAS object database and functions
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Thu Oct 1 22:50:04 1998
- * Modified at: Wed Dec 15 11:23:16 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/module.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irias_object.h>
-
-hashbin_t *irias_objects;
-
-/*
- * Used when a missing value needs to be returned
- */
-struct ias_value irias_missing = { IAS_MISSING, 0, 0, 0, {0}};
-
-
-/*
- * Function ias_new_object (name, id)
- *
- * Create a new IAS object
- *
- */
-struct ias_object *irias_new_object( char *name, int id)
-{
- struct ias_object *obj;
-
- obj = kzalloc(sizeof(struct ias_object), GFP_ATOMIC);
- if (obj == NULL) {
- net_warn_ratelimited("%s(), Unable to allocate object!\n",
- __func__);
- return NULL;
- }
-
- obj->magic = IAS_OBJECT_MAGIC;
- obj->name = kstrndup(name, IAS_MAX_CLASSNAME, GFP_ATOMIC);
- if (!obj->name) {
- net_warn_ratelimited("%s(), Unable to allocate name!\n",
- __func__);
- kfree(obj);
- return NULL;
- }
- obj->id = id;
-
- /* Locking notes : the attrib spinlock has lower precendence
- * than the objects spinlock. Never grap the objects spinlock
- * while holding any attrib spinlock (risk of deadlock). Jean II */
- obj->attribs = hashbin_new(HB_LOCK);
-
- if (obj->attribs == NULL) {
- net_warn_ratelimited("%s(), Unable to allocate attribs!\n",
- __func__);
- kfree(obj->name);
- kfree(obj);
- return NULL;
- }
-
- return obj;
-}
-EXPORT_SYMBOL(irias_new_object);
-
-/*
- * Function irias_delete_attrib (attrib)
- *
- * Delete given attribute and deallocate all its memory
- *
- */
-static void __irias_delete_attrib(struct ias_attrib *attrib)
-{
- IRDA_ASSERT(attrib != NULL, return;);
- IRDA_ASSERT(attrib->magic == IAS_ATTRIB_MAGIC, return;);
-
- kfree(attrib->name);
-
- irias_delete_value(attrib->value);
- attrib->magic = ~IAS_ATTRIB_MAGIC;
-
- kfree(attrib);
-}
-
-void __irias_delete_object(struct ias_object *obj)
-{
- IRDA_ASSERT(obj != NULL, return;);
- IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
-
- kfree(obj->name);
-
- hashbin_delete(obj->attribs, (FREE_FUNC) __irias_delete_attrib);
-
- obj->magic = ~IAS_OBJECT_MAGIC;
-
- kfree(obj);
-}
-
-/*
- * Function irias_delete_object (obj)
- *
- * Remove object from hashbin and deallocate all attributes associated with
- * with this object and the object itself
- *
- */
-int irias_delete_object(struct ias_object *obj)
-{
- struct ias_object *node;
-
- IRDA_ASSERT(obj != NULL, return -1;);
- IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return -1;);
-
- /* Remove from list */
- node = hashbin_remove_this(irias_objects, (irda_queue_t *) obj);
- if (!node)
- pr_debug("%s(), object already removed!\n",
- __func__);
-
- /* Destroy */
- __irias_delete_object(obj);
-
- return 0;
-}
-EXPORT_SYMBOL(irias_delete_object);
-
-/*
- * Function irias_delete_attrib (obj)
- *
- * Remove attribute from hashbin and, if it was the last attribute of
- * the object, remove the object as well.
- *
- */
-int irias_delete_attrib(struct ias_object *obj, struct ias_attrib *attrib,
- int cleanobject)
-{
- struct ias_attrib *node;
-
- IRDA_ASSERT(obj != NULL, return -1;);
- IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return -1;);
- IRDA_ASSERT(attrib != NULL, return -1;);
-
- /* Remove attribute from object */
- node = hashbin_remove_this(obj->attribs, (irda_queue_t *) attrib);
- if (!node)
- return 0; /* Already removed or non-existent */
-
- /* Deallocate attribute */
- __irias_delete_attrib(node);
-
- /* Check if object has still some attributes, destroy it if none.
- * At first glance, this look dangerous, as the kernel reference
- * various IAS objects. However, we only use this function on
- * user attributes, not kernel attributes, so there is no risk
- * of deleting a kernel object this way. Jean II */
- node = (struct ias_attrib *) hashbin_get_first(obj->attribs);
- if (cleanobject && !node)
- irias_delete_object(obj);
-
- return 0;
-}
-
-/*
- * Function irias_insert_object (obj)
- *
- * Insert an object into the LM-IAS database
- *
- */
-void irias_insert_object(struct ias_object *obj)
-{
- IRDA_ASSERT(obj != NULL, return;);
- IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
-
- hashbin_insert(irias_objects, (irda_queue_t *) obj, 0, obj->name);
-}
-EXPORT_SYMBOL(irias_insert_object);
-
-/*
- * Function irias_find_object (name)
- *
- * Find object with given name
- *
- */
-struct ias_object *irias_find_object(char *name)
-{
- IRDA_ASSERT(name != NULL, return NULL;);
-
- /* Unsafe (locking), object might change */
- return hashbin_lock_find(irias_objects, 0, name);
-}
-EXPORT_SYMBOL(irias_find_object);
-
-/*
- * Function irias_find_attrib (obj, name)
- *
- * Find named attribute in object
- *
- */
-struct ias_attrib *irias_find_attrib(struct ias_object *obj, char *name)
-{
- struct ias_attrib *attrib;
-
- IRDA_ASSERT(obj != NULL, return NULL;);
- IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return NULL;);
- IRDA_ASSERT(name != NULL, return NULL;);
-
- attrib = hashbin_lock_find(obj->attribs, 0, name);
- if (attrib == NULL)
- return NULL;
-
- /* Unsafe (locking), attrib might change */
- return attrib;
-}
-
-/*
- * Function irias_add_attribute (obj, attrib)
- *
- * Add attribute to object
- *
- */
-static void irias_add_attrib(struct ias_object *obj, struct ias_attrib *attrib,
- int owner)
-{
- IRDA_ASSERT(obj != NULL, return;);
- IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
-
- IRDA_ASSERT(attrib != NULL, return;);
- IRDA_ASSERT(attrib->magic == IAS_ATTRIB_MAGIC, return;);
-
- /* Set if attrib is owned by kernel or user space */
- attrib->value->owner = owner;
-
- hashbin_insert(obj->attribs, (irda_queue_t *) attrib, 0, attrib->name);
-}
-
-/*
- * Function irias_object_change_attribute (obj_name, attrib_name, new_value)
- *
- * Change the value of an objects attribute.
- *
- */
-int irias_object_change_attribute(char *obj_name, char *attrib_name,
- struct ias_value *new_value)
-{
- struct ias_object *obj;
- struct ias_attrib *attrib;
- unsigned long flags;
-
- /* Find object */
- obj = hashbin_lock_find(irias_objects, 0, obj_name);
- if (obj == NULL) {
- net_warn_ratelimited("%s: Unable to find object: %s\n",
- __func__, obj_name);
- return -1;
- }
-
- /* Slightly unsafe (obj might get removed under us) */
- spin_lock_irqsave(&obj->attribs->hb_spinlock, flags);
-
- /* Find attribute */
- attrib = hashbin_find(obj->attribs, 0, attrib_name);
- if (attrib == NULL) {
- net_warn_ratelimited("%s: Unable to find attribute: %s\n",
- __func__, attrib_name);
- spin_unlock_irqrestore(&obj->attribs->hb_spinlock, flags);
- return -1;
- }
-
- if ( attrib->value->type != new_value->type) {
- pr_debug("%s(), changing value type not allowed!\n",
- __func__);
- spin_unlock_irqrestore(&obj->attribs->hb_spinlock, flags);
- return -1;
- }
-
- /* Delete old value */
- irias_delete_value(attrib->value);
-
- /* Insert new value */
- attrib->value = new_value;
-
- /* Success */
- spin_unlock_irqrestore(&obj->attribs->hb_spinlock, flags);
- return 0;
-}
-EXPORT_SYMBOL(irias_object_change_attribute);
-
-/*
- * Function irias_object_add_integer_attrib (obj, name, value)
- *
- * Add an integer attribute to an LM-IAS object
- *
- */
-void irias_add_integer_attrib(struct ias_object *obj, char *name, int value,
- int owner)
-{
- struct ias_attrib *attrib;
-
- IRDA_ASSERT(obj != NULL, return;);
- IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
- IRDA_ASSERT(name != NULL, return;);
-
- attrib = kzalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
- if (attrib == NULL) {
- net_warn_ratelimited("%s: Unable to allocate attribute!\n",
- __func__);
- return;
- }
-
- attrib->magic = IAS_ATTRIB_MAGIC;
- attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC);
-
- /* Insert value */
- attrib->value = irias_new_integer_value(value);
- if (!attrib->name || !attrib->value) {
- net_warn_ratelimited("%s: Unable to allocate attribute!\n",
- __func__);
- if (attrib->value)
- irias_delete_value(attrib->value);
- kfree(attrib->name);
- kfree(attrib);
- return;
- }
-
- irias_add_attrib(obj, attrib, owner);
-}
-EXPORT_SYMBOL(irias_add_integer_attrib);
-
- /*
- * Function irias_add_octseq_attrib (obj, name, octet_seq, len)
- *
- * Add a octet sequence attribute to an LM-IAS object
- *
- */
-
-void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets,
- int len, int owner)
-{
- struct ias_attrib *attrib;
-
- IRDA_ASSERT(obj != NULL, return;);
- IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
-
- IRDA_ASSERT(name != NULL, return;);
- IRDA_ASSERT(octets != NULL, return;);
-
- attrib = kzalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
- if (attrib == NULL) {
- net_warn_ratelimited("%s: Unable to allocate attribute!\n",
- __func__);
- return;
- }
-
- attrib->magic = IAS_ATTRIB_MAGIC;
- attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC);
-
- attrib->value = irias_new_octseq_value( octets, len);
- if (!attrib->name || !attrib->value) {
- net_warn_ratelimited("%s: Unable to allocate attribute!\n",
- __func__);
- if (attrib->value)
- irias_delete_value(attrib->value);
- kfree(attrib->name);
- kfree(attrib);
- return;
- }
-
- irias_add_attrib(obj, attrib, owner);
-}
-EXPORT_SYMBOL(irias_add_octseq_attrib);
-
-/*
- * Function irias_object_add_string_attrib (obj, string)
- *
- * Add a string attribute to an LM-IAS object
- *
- */
-void irias_add_string_attrib(struct ias_object *obj, char *name, char *value,
- int owner)
-{
- struct ias_attrib *attrib;
-
- IRDA_ASSERT(obj != NULL, return;);
- IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
-
- IRDA_ASSERT(name != NULL, return;);
- IRDA_ASSERT(value != NULL, return;);
-
- attrib = kzalloc(sizeof( struct ias_attrib), GFP_ATOMIC);
- if (attrib == NULL) {
- net_warn_ratelimited("%s: Unable to allocate attribute!\n",
- __func__);
- return;
- }
-
- attrib->magic = IAS_ATTRIB_MAGIC;
- attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC);
-
- attrib->value = irias_new_string_value(value);
- if (!attrib->name || !attrib->value) {
- net_warn_ratelimited("%s: Unable to allocate attribute!\n",
- __func__);
- if (attrib->value)
- irias_delete_value(attrib->value);
- kfree(attrib->name);
- kfree(attrib);
- return;
- }
-
- irias_add_attrib(obj, attrib, owner);
-}
-EXPORT_SYMBOL(irias_add_string_attrib);
-
-/*
- * Function irias_new_integer_value (integer)
- *
- * Create new IAS integer value
- *
- */
-struct ias_value *irias_new_integer_value(int integer)
-{
- struct ias_value *value;
-
- value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
- if (value == NULL)
- return NULL;
-
- value->type = IAS_INTEGER;
- value->len = 4;
- value->t.integer = integer;
-
- return value;
-}
-EXPORT_SYMBOL(irias_new_integer_value);
-
-/*
- * Function irias_new_string_value (string)
- *
- * Create new IAS string value
- *
- * Per IrLMP 1.1, 4.3.3.2, strings are up to 256 chars - Jean II
- */
-struct ias_value *irias_new_string_value(char *string)
-{
- struct ias_value *value;
-
- value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
- if (value == NULL)
- return NULL;
-
- value->type = IAS_STRING;
- value->charset = CS_ASCII;
- value->t.string = kstrndup(string, IAS_MAX_STRING, GFP_ATOMIC);
- if (!value->t.string) {
- net_warn_ratelimited("%s: Unable to kmalloc!\n", __func__);
- kfree(value);
- return NULL;
- }
-
- value->len = strlen(value->t.string);
-
- return value;
-}
-
-/*
- * Function irias_new_octseq_value (octets, len)
- *
- * Create new IAS octet-sequence value
- *
- * Per IrLMP 1.1, 4.3.3.2, octet-sequence are up to 1024 bytes - Jean II
- */
-struct ias_value *irias_new_octseq_value(__u8 *octseq , int len)
-{
- struct ias_value *value;
-
- value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
- if (value == NULL)
- return NULL;
-
- value->type = IAS_OCT_SEQ;
- /* Check length */
- if(len > IAS_MAX_OCTET_STRING)
- len = IAS_MAX_OCTET_STRING;
- value->len = len;
-
- value->t.oct_seq = kmemdup(octseq, len, GFP_ATOMIC);
- if (value->t.oct_seq == NULL){
- net_warn_ratelimited("%s: Unable to kmalloc!\n", __func__);
- kfree(value);
- return NULL;
- }
- return value;
-}
-
-struct ias_value *irias_new_missing_value(void)
-{
- struct ias_value *value;
-
- value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
- if (value == NULL)
- return NULL;
-
- value->type = IAS_MISSING;
-
- return value;
-}
-
-/*
- * Function irias_delete_value (value)
- *
- * Delete IAS value
- *
- */
-void irias_delete_value(struct ias_value *value)
-{
- IRDA_ASSERT(value != NULL, return;);
-
- switch (value->type) {
- case IAS_INTEGER: /* Fallthrough */
- case IAS_MISSING:
- /* No need to deallocate */
- break;
- case IAS_STRING:
- /* Deallocate string */
- kfree(value->t.string);
- break;
- case IAS_OCT_SEQ:
- /* Deallocate byte stream */
- kfree(value->t.oct_seq);
- break;
- default:
- pr_debug("%s(), Unknown value type!\n", __func__);
- break;
- }
- kfree(value);
-}
-EXPORT_SYMBOL(irias_delete_value);
diff --git a/net/irda/irlan/Kconfig b/net/irda/irlan/Kconfig
deleted file mode 100644
index 951abc2e3a7f..000000000000
--- a/net/irda/irlan/Kconfig
+++ /dev/null
@@ -1,14 +0,0 @@
-config IRLAN
- tristate "IrLAN protocol"
- depends on IRDA
- help
- Say Y here if you want to build support for the IrLAN protocol.
- To compile it as a module, choose M here: the module will be called
- irlan. IrLAN emulates an Ethernet and makes it possible to put up
- a wireless LAN using infrared beams.
-
- The IrLAN protocol can be used to talk with infrared access points
- like the HP NetbeamIR, or the ESI JetEye NET. You can also connect
- to another Linux machine running the IrLAN protocol for ad-hoc
- networking!
-
diff --git a/net/irda/irlan/Makefile b/net/irda/irlan/Makefile
deleted file mode 100644
index 94eefbc8e6b9..000000000000
--- a/net/irda/irlan/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for the Linux IrDA IrLAN protocol layer.
-#
-
-obj-$(CONFIG_IRLAN) += irlan.o
-
-irlan-y := irlan_common.o irlan_eth.o irlan_event.o irlan_client.o irlan_provider.o irlan_filter.o irlan_provider_event.o irlan_client_event.o
diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c
deleted file mode 100644
index c5837a40c78e..000000000000
--- a/net/irda/irlan/irlan_client.c
+++ /dev/null
@@ -1,559 +0,0 @@
-/*********************************************************************
- *
- * Filename: irlan_client.c
- * Version: 0.9
- * Description: IrDA LAN Access Protocol (IrLAN) Client
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sun Aug 31 20:14:37 1997
- * Modified at: Tue Dec 14 15:47:02 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- * Sources: skeleton.c by Donald Becker <becker@CESDIS.gsfc.nasa.gov>
- * slip.c by Laurence Culhane, <loz@holmes.demon.co.uk>
- * Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org>
- *
- * Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>,
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_arp.h>
-#include <linux/bitops.h>
-#include <net/arp.h>
-
-#include <asm/byteorder.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irttp.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irias_object.h>
-#include <net/irda/iriap.h>
-#include <net/irda/timer.h>
-
-#include <net/irda/irlan_common.h>
-#include <net/irda/irlan_event.h>
-#include <net/irda/irlan_eth.h>
-#include <net/irda/irlan_provider.h>
-#include <net/irda/irlan_client.h>
-
-#undef CONFIG_IRLAN_GRATUITOUS_ARP
-
-static void irlan_client_ctrl_disconnect_indication(void *instance, void *sap,
- LM_REASON reason,
- struct sk_buff *);
-static int irlan_client_ctrl_data_indication(void *instance, void *sap,
- struct sk_buff *skb);
-static void irlan_client_ctrl_connect_confirm(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_sdu_size,
- __u8 max_header_size,
- struct sk_buff *);
-static void irlan_check_response_param(struct irlan_cb *self, char *param,
- char *value, int val_len);
-static void irlan_client_open_ctrl_tsap(struct irlan_cb *self);
-
-static void irlan_client_kick_timer_expired(void *data)
-{
- struct irlan_cb *self = (struct irlan_cb *) data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- /*
- * If we are in peer mode, the client may not have got the discovery
- * indication it needs to make progress. If the client is still in
- * IDLE state, we must kick it to, but only if the provider is not IDLE
- */
- if ((self->provider.access_type == ACCESS_PEER) &&
- (self->client.state == IRLAN_IDLE) &&
- (self->provider.state != IRLAN_IDLE)) {
- irlan_client_wakeup(self, self->saddr, self->daddr);
- }
-}
-
-static void irlan_client_start_kick_timer(struct irlan_cb *self, int timeout)
-{
- irda_start_timer(&self->client.kick_timer, timeout, (void *) self,
- irlan_client_kick_timer_expired);
-}
-
-/*
- * Function irlan_client_wakeup (self, saddr, daddr)
- *
- * Wake up client
- *
- */
-void irlan_client_wakeup(struct irlan_cb *self, __u32 saddr, __u32 daddr)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- /*
- * Check if we are already awake, or if we are a provider in direct
- * mode (in that case we must leave the client idle
- */
- if ((self->client.state != IRLAN_IDLE) ||
- (self->provider.access_type == ACCESS_DIRECT))
- {
- pr_debug("%s(), already awake!\n", __func__);
- return;
- }
-
- /* Addresses may have changed! */
- self->saddr = saddr;
- self->daddr = daddr;
-
- if (self->disconnect_reason == LM_USER_REQUEST) {
- pr_debug("%s(), still stopped by user\n", __func__);
- return;
- }
-
- /* Open TSAPs */
- irlan_client_open_ctrl_tsap(self);
- irlan_open_data_tsap(self);
-
- irlan_do_client_event(self, IRLAN_DISCOVERY_INDICATION, NULL);
-
- /* Start kick timer */
- irlan_client_start_kick_timer(self, 2*HZ);
-}
-
-/*
- * Function irlan_discovery_indication (daddr)
- *
- * Remote device with IrLAN server support discovered
- *
- */
-void irlan_client_discovery_indication(discinfo_t *discovery,
- DISCOVERY_MODE mode,
- void *priv)
-{
- struct irlan_cb *self;
- __u32 saddr, daddr;
-
- IRDA_ASSERT(discovery != NULL, return;);
-
- /*
- * I didn't check it, but I bet that IrLAN suffer from the same
- * deficiency as IrComm and doesn't handle two instances
- * simultaneously connecting to each other.
- * Same workaround, drop passive discoveries.
- * Jean II */
- if(mode == DISCOVERY_PASSIVE)
- return;
-
- saddr = discovery->saddr;
- daddr = discovery->daddr;
-
- /* Find instance */
- rcu_read_lock();
- self = irlan_get_any();
- if (self) {
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, goto out;);
-
- pr_debug("%s(), Found instance (%08x)!\n", __func__ ,
- daddr);
-
- irlan_client_wakeup(self, saddr, daddr);
- }
-IRDA_ASSERT_LABEL(out:)
- rcu_read_unlock();
-}
-
-/*
- * Function irlan_client_data_indication (handle, skb)
- *
- * This function gets the data that is received on the control channel
- *
- */
-static int irlan_client_ctrl_data_indication(void *instance, void *sap,
- struct sk_buff *skb)
-{
- struct irlan_cb *self;
-
- self = instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
- IRDA_ASSERT(skb != NULL, return -1;);
-
- irlan_do_client_event(self, IRLAN_DATA_INDICATION, skb);
-
- /* Ready for a new command */
- pr_debug("%s(), clearing tx_busy\n", __func__);
- self->client.tx_busy = FALSE;
-
- /* Check if we have some queued commands waiting to be sent */
- irlan_run_ctrl_tx_queue(self);
-
- return 0;
-}
-
-static void irlan_client_ctrl_disconnect_indication(void *instance, void *sap,
- LM_REASON reason,
- struct sk_buff *userdata)
-{
- struct irlan_cb *self;
- struct tsap_cb *tsap;
- struct sk_buff *skb;
-
- pr_debug("%s(), reason=%d\n", __func__ , reason);
-
- self = instance;
- tsap = sap;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
- IRDA_ASSERT(tsap != NULL, return;);
- IRDA_ASSERT(tsap->magic == TTP_TSAP_MAGIC, return;);
-
- IRDA_ASSERT(tsap == self->client.tsap_ctrl, return;);
-
- /* Remove frames queued on the control channel */
- while ((skb = skb_dequeue(&self->client.txq)) != NULL) {
- dev_kfree_skb(skb);
- }
- self->client.tx_busy = FALSE;
-
- irlan_do_client_event(self, IRLAN_LMP_DISCONNECT, NULL);
-}
-
-/*
- * Function irlan_client_open_tsaps (self)
- *
- * Initialize callbacks and open IrTTP TSAPs
- *
- */
-static void irlan_client_open_ctrl_tsap(struct irlan_cb *self)
-{
- struct tsap_cb *tsap;
- notify_t notify;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- /* Check if already open */
- if (self->client.tsap_ctrl)
- return;
-
- irda_notify_init(&notify);
-
- /* Set up callbacks */
- notify.data_indication = irlan_client_ctrl_data_indication;
- notify.connect_confirm = irlan_client_ctrl_connect_confirm;
- notify.disconnect_indication = irlan_client_ctrl_disconnect_indication;
- notify.instance = self;
- strlcpy(notify.name, "IrLAN ctrl (c)", sizeof(notify.name));
-
- tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT, &notify);
- if (!tsap) {
- pr_debug("%s(), Got no tsap!\n", __func__);
- return;
- }
- self->client.tsap_ctrl = tsap;
-}
-
-/*
- * Function irlan_client_connect_confirm (handle, skb)
- *
- * Connection to peer IrLAN laye confirmed
- *
- */
-static void irlan_client_ctrl_connect_confirm(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_sdu_size,
- __u8 max_header_size,
- struct sk_buff *skb)
-{
- struct irlan_cb *self;
-
- self = instance;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- self->client.max_sdu_size = max_sdu_size;
- self->client.max_header_size = max_header_size;
-
- /* TODO: we could set the MTU depending on the max_sdu_size */
-
- irlan_do_client_event(self, IRLAN_CONNECT_COMPLETE, NULL);
-}
-
-/*
- * Function print_ret_code (code)
- *
- * Print return code of request to peer IrLAN layer.
- *
- */
-static void print_ret_code(__u8 code)
-{
- switch(code) {
- case 0:
- printk(KERN_INFO "Success\n");
- break;
- case 1:
- net_warn_ratelimited("IrLAN: Insufficient resources\n");
- break;
- case 2:
- net_warn_ratelimited("IrLAN: Invalid command format\n");
- break;
- case 3:
- net_warn_ratelimited("IrLAN: Command not supported\n");
- break;
- case 4:
- net_warn_ratelimited("IrLAN: Parameter not supported\n");
- break;
- case 5:
- net_warn_ratelimited("IrLAN: Value not supported\n");
- break;
- case 6:
- net_warn_ratelimited("IrLAN: Not open\n");
- break;
- case 7:
- net_warn_ratelimited("IrLAN: Authentication required\n");
- break;
- case 8:
- net_warn_ratelimited("IrLAN: Invalid password\n");
- break;
- case 9:
- net_warn_ratelimited("IrLAN: Protocol error\n");
- break;
- case 255:
- net_warn_ratelimited("IrLAN: Asynchronous status\n");
- break;
- }
-}
-
-/*
- * Function irlan_client_parse_response (self, skb)
- *
- * Extract all parameters from received buffer, then feed them to
- * check_params for parsing
- */
-void irlan_client_parse_response(struct irlan_cb *self, struct sk_buff *skb)
-{
- __u8 *frame;
- __u8 *ptr;
- int count;
- int ret;
- __u16 val_len;
- int i;
- char *name;
- char *value;
-
- IRDA_ASSERT(skb != NULL, return;);
-
- pr_debug("%s() skb->len=%d\n", __func__ , (int)skb->len);
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- if (!skb) {
- net_err_ratelimited("%s(), Got NULL skb!\n", __func__);
- return;
- }
- frame = skb->data;
-
- /*
- * Check return code and print it if not success
- */
- if (frame[0]) {
- print_ret_code(frame[0]);
- return;
- }
-
- name = kmalloc(255, GFP_ATOMIC);
- if (!name)
- return;
- value = kmalloc(1016, GFP_ATOMIC);
- if (!value) {
- kfree(name);
- return;
- }
-
- /* How many parameters? */
- count = frame[1];
-
- pr_debug("%s(), got %d parameters\n", __func__ , count);
-
- ptr = frame+2;
-
- /* For all parameters */
- for (i=0; i<count;i++) {
- ret = irlan_extract_param(ptr, name, value, &val_len);
- if (ret < 0) {
- pr_debug("%s(), IrLAN, Error!\n", __func__);
- break;
- }
- ptr += ret;
- irlan_check_response_param(self, name, value, val_len);
- }
- /* Cleanup */
- kfree(name);
- kfree(value);
-}
-
-/*
- * Function irlan_check_response_param (self, param, value, val_len)
- *
- * Check which parameter is received and update local variables
- *
- */
-static void irlan_check_response_param(struct irlan_cb *self, char *param,
- char *value, int val_len)
-{
- __u16 tmp_cpu; /* Temporary value in host order */
- __u8 *bytes;
- int i;
-
- pr_debug("%s(), parm=%s\n", __func__ , param);
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- /* Media type */
- if (strcmp(param, "MEDIA") == 0) {
- if (strcmp(value, "802.3") == 0)
- self->media = MEDIA_802_3;
- else
- self->media = MEDIA_802_5;
- return;
- }
- if (strcmp(param, "FILTER_TYPE") == 0) {
- if (strcmp(value, "DIRECTED") == 0)
- self->client.filter_type |= IRLAN_DIRECTED;
- else if (strcmp(value, "FUNCTIONAL") == 0)
- self->client.filter_type |= IRLAN_FUNCTIONAL;
- else if (strcmp(value, "GROUP") == 0)
- self->client.filter_type |= IRLAN_GROUP;
- else if (strcmp(value, "MAC_FRAME") == 0)
- self->client.filter_type |= IRLAN_MAC_FRAME;
- else if (strcmp(value, "MULTICAST") == 0)
- self->client.filter_type |= IRLAN_MULTICAST;
- else if (strcmp(value, "BROADCAST") == 0)
- self->client.filter_type |= IRLAN_BROADCAST;
- else if (strcmp(value, "IPX_SOCKET") == 0)
- self->client.filter_type |= IRLAN_IPX_SOCKET;
-
- }
- if (strcmp(param, "ACCESS_TYPE") == 0) {
- if (strcmp(value, "DIRECT") == 0)
- self->client.access_type = ACCESS_DIRECT;
- else if (strcmp(value, "PEER") == 0)
- self->client.access_type = ACCESS_PEER;
- else if (strcmp(value, "HOSTED") == 0)
- self->client.access_type = ACCESS_HOSTED;
- else {
- pr_debug("%s(), unknown access type!\n", __func__);
- }
- }
- /* IRLAN version */
- if (strcmp(param, "IRLAN_VER") == 0) {
- pr_debug("IrLAN version %d.%d\n", (__u8)value[0],
- (__u8)value[1]);
-
- self->version[0] = value[0];
- self->version[1] = value[1];
- return;
- }
- /* Which remote TSAP to use for data channel */
- if (strcmp(param, "DATA_CHAN") == 0) {
- self->dtsap_sel_data = value[0];
- pr_debug("Data TSAP = %02x\n", self->dtsap_sel_data);
- return;
- }
- if (strcmp(param, "CON_ARB") == 0) {
- memcpy(&tmp_cpu, value, 2); /* Align value */
- le16_to_cpus(&tmp_cpu); /* Convert to host order */
- self->client.recv_arb_val = tmp_cpu;
- pr_debug("%s(), receive arb val=%d\n", __func__ ,
- self->client.recv_arb_val);
- }
- if (strcmp(param, "MAX_FRAME") == 0) {
- memcpy(&tmp_cpu, value, 2); /* Align value */
- le16_to_cpus(&tmp_cpu); /* Convert to host order */
- self->client.max_frame = tmp_cpu;
- pr_debug("%s(), max frame=%d\n", __func__ ,
- self->client.max_frame);
- }
-
- /* RECONNECT_KEY, in case the link goes down! */
- if (strcmp(param, "RECONNECT_KEY") == 0) {
- pr_debug("Got reconnect key: ");
- /* for (i = 0; i < val_len; i++) */
-/* printk("%02x", value[i]); */
- memcpy(self->client.reconnect_key, value, val_len);
- self->client.key_len = val_len;
- pr_debug("\n");
- }
- /* FILTER_ENTRY, have we got an ethernet address? */
- if (strcmp(param, "FILTER_ENTRY") == 0) {
- bytes = value;
- pr_debug("Ethernet address = %pM\n", bytes);
- for (i = 0; i < 6; i++)
- self->dev->dev_addr[i] = bytes[i];
- }
-}
-
-/*
- * Function irlan_client_get_value_confirm (obj_id, value)
- *
- * Got results from remote LM-IAS
- *
- */
-void irlan_client_get_value_confirm(int result, __u16 obj_id,
- struct ias_value *value, void *priv)
-{
- struct irlan_cb *self;
-
- IRDA_ASSERT(priv != NULL, return;);
-
- self = priv;
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- /* We probably don't need to make any more queries */
- iriap_close(self->client.iriap);
- self->client.iriap = NULL;
-
- /* Check if request succeeded */
- if (result != IAS_SUCCESS) {
- pr_debug("%s(), got NULL value!\n", __func__);
- irlan_do_client_event(self, IRLAN_IAS_PROVIDER_NOT_AVAIL,
- NULL);
- return;
- }
-
- switch (value->type) {
- case IAS_INTEGER:
- self->dtsap_sel_ctrl = value->t.integer;
-
- if (value->t.integer != -1) {
- irlan_do_client_event(self, IRLAN_IAS_PROVIDER_AVAIL,
- NULL);
- return;
- }
- irias_delete_value(value);
- break;
- default:
- pr_debug("%s(), unknown type!\n", __func__);
- break;
- }
- irlan_do_client_event(self, IRLAN_IAS_PROVIDER_NOT_AVAIL, NULL);
-}
diff --git a/net/irda/irlan/irlan_client_event.c b/net/irda/irlan/irlan_client_event.c
deleted file mode 100644
index cc93fabbbb19..000000000000
--- a/net/irda/irlan/irlan_client_event.c
+++ /dev/null
@@ -1,511 +0,0 @@
-/*********************************************************************
- *
- * Filename: irlan_client_event.c
- * Version: 0.9
- * Description: IrLAN client state machine
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sun Aug 31 20:14:37 1997
- * Modified at: Sun Dec 26 21:52:24 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>,
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/skbuff.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/timer.h>
-#include <net/irda/irmod.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irttp.h>
-
-#include <net/irda/irlan_common.h>
-#include <net/irda/irlan_client.h>
-#include <net/irda/irlan_event.h>
-
-static int irlan_client_state_idle (struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-static int irlan_client_state_query(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-static int irlan_client_state_conn (struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-static int irlan_client_state_info (struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-static int irlan_client_state_media(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-static int irlan_client_state_open (struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-static int irlan_client_state_wait (struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-static int irlan_client_state_arb (struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-static int irlan_client_state_data (struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-static int irlan_client_state_close(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-static int irlan_client_state_sync (struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-
-static int (*state[])(struct irlan_cb *, IRLAN_EVENT event, struct sk_buff *) =
-{
- irlan_client_state_idle,
- irlan_client_state_query,
- irlan_client_state_conn,
- irlan_client_state_info,
- irlan_client_state_media,
- irlan_client_state_open,
- irlan_client_state_wait,
- irlan_client_state_arb,
- irlan_client_state_data,
- irlan_client_state_close,
- irlan_client_state_sync
-};
-
-void irlan_do_client_event(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- (*state[ self->client.state]) (self, event, skb);
-}
-
-/*
- * Function irlan_client_state_idle (event, skb, info)
- *
- * IDLE, We are waiting for an indication that there is a provider
- * available.
- */
-static int irlan_client_state_idle(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-
- switch (event) {
- case IRLAN_DISCOVERY_INDICATION:
- if (self->client.iriap) {
- net_warn_ratelimited("%s(), busy with a previous query\n",
- __func__);
- return -EBUSY;
- }
-
- self->client.iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
- irlan_client_get_value_confirm);
- /* Get some values from peer IAS */
- irlan_next_client_state(self, IRLAN_QUERY);
- iriap_getvaluebyclass_request(self->client.iriap,
- self->saddr, self->daddr,
- "IrLAN", "IrDA:TinyTP:LsapSel");
- break;
- case IRLAN_WATCHDOG_TIMEOUT:
- pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__ , event);
- break;
- }
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-/*
- * Function irlan_client_state_query (event, skb, info)
- *
- * QUERY, We have queryed the remote IAS and is ready to connect
- * to provider, just waiting for the confirm.
- *
- */
-static int irlan_client_state_query(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-
- switch(event) {
- case IRLAN_IAS_PROVIDER_AVAIL:
- IRDA_ASSERT(self->dtsap_sel_ctrl != 0, return -1;);
-
- self->client.open_retries = 0;
-
- irttp_connect_request(self->client.tsap_ctrl,
- self->dtsap_sel_ctrl,
- self->saddr, self->daddr, NULL,
- IRLAN_MTU, NULL);
- irlan_next_client_state(self, IRLAN_CONN);
- break;
- case IRLAN_IAS_PROVIDER_NOT_AVAIL:
- pr_debug("%s(), IAS_PROVIDER_NOT_AVAIL\n", __func__);
- irlan_next_client_state(self, IRLAN_IDLE);
-
- /* Give the client a kick! */
- if ((self->provider.access_type == ACCESS_PEER) &&
- (self->provider.state != IRLAN_IDLE))
- irlan_client_wakeup(self, self->saddr, self->daddr);
- break;
- case IRLAN_LMP_DISCONNECT:
- case IRLAN_LAP_DISCONNECT:
- irlan_next_client_state(self, IRLAN_IDLE);
- break;
- case IRLAN_WATCHDOG_TIMEOUT:
- pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__ , event);
- break;
- }
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-/*
- * Function irlan_client_state_conn (event, skb, info)
- *
- * CONN, We have connected to a provider but has not issued any
- * commands yet.
- *
- */
-static int irlan_client_state_conn(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return -1;);
-
- switch (event) {
- case IRLAN_CONNECT_COMPLETE:
- /* Send getinfo cmd */
- irlan_get_provider_info(self);
- irlan_next_client_state(self, IRLAN_INFO);
- break;
- case IRLAN_LMP_DISCONNECT:
- case IRLAN_LAP_DISCONNECT:
- irlan_next_client_state(self, IRLAN_IDLE);
- break;
- case IRLAN_WATCHDOG_TIMEOUT:
- pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__ , event);
- break;
- }
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-/*
- * Function irlan_client_state_info (self, event, skb, info)
- *
- * INFO, We have issued a GetInfo command and is awaiting a reply.
- */
-static int irlan_client_state_info(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return -1;);
-
- switch (event) {
- case IRLAN_DATA_INDICATION:
- IRDA_ASSERT(skb != NULL, return -1;);
-
- irlan_client_parse_response(self, skb);
-
- irlan_next_client_state(self, IRLAN_MEDIA);
-
- irlan_get_media_char(self);
- break;
-
- case IRLAN_LMP_DISCONNECT:
- case IRLAN_LAP_DISCONNECT:
- irlan_next_client_state(self, IRLAN_IDLE);
- break;
- case IRLAN_WATCHDOG_TIMEOUT:
- pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__ , event);
- break;
- }
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-/*
- * Function irlan_client_state_media (self, event, skb, info)
- *
- * MEDIA, The irlan_client has issued a GetMedia command and is awaiting a
- * reply.
- *
- */
-static int irlan_client_state_media(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return -1;);
-
- switch(event) {
- case IRLAN_DATA_INDICATION:
- irlan_client_parse_response(self, skb);
- irlan_open_data_channel(self);
- irlan_next_client_state(self, IRLAN_OPEN);
- break;
- case IRLAN_LMP_DISCONNECT:
- case IRLAN_LAP_DISCONNECT:
- irlan_next_client_state(self, IRLAN_IDLE);
- break;
- case IRLAN_WATCHDOG_TIMEOUT:
- pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__ , event);
- break;
- }
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-/*
- * Function irlan_client_state_open (self, event, skb, info)
- *
- * OPEN, The irlan_client has issued a OpenData command and is awaiting a
- * reply
- *
- */
-static int irlan_client_state_open(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- struct qos_info qos;
-
- IRDA_ASSERT(self != NULL, return -1;);
-
- switch(event) {
- case IRLAN_DATA_INDICATION:
- irlan_client_parse_response(self, skb);
-
- /*
- * Check if we have got the remote TSAP for data
- * communications
- */
- IRDA_ASSERT(self->dtsap_sel_data != 0, return -1;);
-
- /* Check which access type we are dealing with */
- switch (self->client.access_type) {
- case ACCESS_PEER:
- if (self->provider.state == IRLAN_OPEN) {
-
- irlan_next_client_state(self, IRLAN_ARB);
- irlan_do_client_event(self, IRLAN_CHECK_CON_ARB,
- NULL);
- } else {
-
- irlan_next_client_state(self, IRLAN_WAIT);
- }
- break;
- case ACCESS_DIRECT:
- case ACCESS_HOSTED:
- qos.link_disc_time.bits = 0x01; /* 3 secs */
-
- irttp_connect_request(self->tsap_data,
- self->dtsap_sel_data,
- self->saddr, self->daddr, &qos,
- IRLAN_MTU, NULL);
-
- irlan_next_client_state(self, IRLAN_DATA);
- break;
- default:
- pr_debug("%s(), unknown access type!\n", __func__);
- break;
- }
- break;
- case IRLAN_LMP_DISCONNECT:
- case IRLAN_LAP_DISCONNECT:
- irlan_next_client_state(self, IRLAN_IDLE);
- break;
- case IRLAN_WATCHDOG_TIMEOUT:
- pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__ , event);
- break;
- }
-
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-/*
- * Function irlan_client_state_wait (self, event, skb, info)
- *
- * WAIT, The irlan_client is waiting for the local provider to enter the
- * provider OPEN state.
- *
- */
-static int irlan_client_state_wait(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return -1;);
-
- switch(event) {
- case IRLAN_PROVIDER_SIGNAL:
- irlan_next_client_state(self, IRLAN_ARB);
- irlan_do_client_event(self, IRLAN_CHECK_CON_ARB, NULL);
- break;
- case IRLAN_LMP_DISCONNECT:
- case IRLAN_LAP_DISCONNECT:
- irlan_next_client_state(self, IRLAN_IDLE);
- break;
- case IRLAN_WATCHDOG_TIMEOUT:
- pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__ , event);
- break;
- }
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-static int irlan_client_state_arb(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- struct qos_info qos;
-
- IRDA_ASSERT(self != NULL, return -1;);
-
- switch(event) {
- case IRLAN_CHECK_CON_ARB:
- if (self->client.recv_arb_val == self->provider.send_arb_val) {
- irlan_next_client_state(self, IRLAN_CLOSE);
- irlan_close_data_channel(self);
- } else if (self->client.recv_arb_val <
- self->provider.send_arb_val)
- {
- qos.link_disc_time.bits = 0x01; /* 3 secs */
-
- irlan_next_client_state(self, IRLAN_DATA);
- irttp_connect_request(self->tsap_data,
- self->dtsap_sel_data,
- self->saddr, self->daddr, &qos,
- IRLAN_MTU, NULL);
- } else if (self->client.recv_arb_val >
- self->provider.send_arb_val)
- {
- pr_debug("%s(), lost the battle :-(\n", __func__);
- }
- break;
- case IRLAN_DATA_CONNECT_INDICATION:
- irlan_next_client_state(self, IRLAN_DATA);
- break;
- case IRLAN_LMP_DISCONNECT:
- case IRLAN_LAP_DISCONNECT:
- irlan_next_client_state(self, IRLAN_IDLE);
- break;
- case IRLAN_WATCHDOG_TIMEOUT:
- pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__ , event);
- break;
- }
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-/*
- * Function irlan_client_state_data (self, event, skb, info)
- *
- * DATA, The data channel is connected, allowing data transfers between
- * the local and remote machines.
- *
- */
-static int irlan_client_state_data(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-
- switch(event) {
- case IRLAN_DATA_INDICATION:
- irlan_client_parse_response(self, skb);
- break;
- case IRLAN_LMP_DISCONNECT: /* FALLTHROUGH */
- case IRLAN_LAP_DISCONNECT:
- irlan_next_client_state(self, IRLAN_IDLE);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__ , event);
- break;
- }
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-/*
- * Function irlan_client_state_close (self, event, skb, info)
- *
- *
- *
- */
-static int irlan_client_state_close(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-/*
- * Function irlan_client_state_sync (self, event, skb, info)
- *
- *
- *
- */
-static int irlan_client_state_sync(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
deleted file mode 100644
index 481bbc2a4349..000000000000
--- a/net/irda/irlan/irlan_common.c
+++ /dev/null
@@ -1,1176 +0,0 @@
-/*********************************************************************
- *
- * Filename: irlan_common.c
- * Version: 0.9
- * Description: IrDA LAN Access Protocol Implementation
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sun Aug 31 20:14:37 1997
- * Modified at: Sun Dec 26 21:53:10 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1997, 1999 Dag Brattli <dagb@cs.uit.no>,
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/module.h>
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/proc_fs.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/random.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/rtnetlink.h>
-#include <linux/moduleparam.h>
-#include <linux/bitops.h>
-
-#include <asm/byteorder.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irttp.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/iriap.h>
-#include <net/irda/timer.h>
-
-#include <net/irda/irlan_common.h>
-#include <net/irda/irlan_client.h>
-#include <net/irda/irlan_provider.h>
-#include <net/irda/irlan_eth.h>
-#include <net/irda/irlan_filter.h>
-
-
-/* extern char sysctl_devname[]; */
-
-/*
- * Master structure
- */
-static LIST_HEAD(irlans);
-
-static void *ckey;
-static void *skey;
-
-/* Module parameters */
-static bool eth; /* Use "eth" or "irlan" name for devices */
-static int access = ACCESS_PEER; /* PEER, DIRECT or HOSTED */
-
-#ifdef CONFIG_PROC_FS
-static const char *const irlan_access[] = {
- "UNKNOWN",
- "DIRECT",
- "PEER",
- "HOSTED"
-};
-
-static const char *const irlan_media[] = {
- "UNKNOWN",
- "802.3",
- "802.5"
-};
-
-extern struct proc_dir_entry *proc_irda;
-
-static int irlan_seq_open(struct inode *inode, struct file *file);
-
-static const struct file_operations irlan_fops = {
- .owner = THIS_MODULE,
- .open = irlan_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-extern struct proc_dir_entry *proc_irda;
-#endif /* CONFIG_PROC_FS */
-
-static struct irlan_cb __init *irlan_open(__u32 saddr, __u32 daddr);
-static void __irlan_close(struct irlan_cb *self);
-static int __irlan_insert_param(struct sk_buff *skb, char *param, int type,
- __u8 value_byte, __u16 value_short,
- __u8 *value_array, __u16 value_len);
-static void irlan_open_unicast_addr(struct irlan_cb *self);
-static void irlan_get_unicast_addr(struct irlan_cb *self);
-void irlan_close_tsaps(struct irlan_cb *self);
-
-/*
- * Function irlan_init (void)
- *
- * Initialize IrLAN layer
- *
- */
-static int __init irlan_init(void)
-{
- struct irlan_cb *new;
- __u16 hints;
-
-#ifdef CONFIG_PROC_FS
- { struct proc_dir_entry *proc;
- proc = proc_create("irlan", 0, proc_irda, &irlan_fops);
- if (!proc) {
- printk(KERN_ERR "irlan_init: can't create /proc entry!\n");
- return -ENODEV;
- }
- }
-#endif /* CONFIG_PROC_FS */
-
- hints = irlmp_service_to_hint(S_LAN);
-
- /* Register with IrLMP as a client */
- ckey = irlmp_register_client(hints, &irlan_client_discovery_indication,
- NULL, NULL);
- if (!ckey)
- goto err_ckey;
-
- /* Register with IrLMP as a service */
- skey = irlmp_register_service(hints);
- if (!skey)
- goto err_skey;
-
- /* Start the master IrLAN instance (the only one for now) */
- new = irlan_open(DEV_ADDR_ANY, DEV_ADDR_ANY);
- if (!new)
- goto err_open;
-
- /* The master will only open its (listen) control TSAP */
- irlan_provider_open_ctrl_tsap(new);
-
- /* Do some fast discovery! */
- irlmp_discovery_request(DISCOVERY_DEFAULT_SLOTS);
-
- return 0;
-
-err_open:
- irlmp_unregister_service(skey);
-err_skey:
- irlmp_unregister_client(ckey);
-err_ckey:
-#ifdef CONFIG_PROC_FS
- remove_proc_entry("irlan", proc_irda);
-#endif /* CONFIG_PROC_FS */
-
- return -ENOMEM;
-}
-
-static void __exit irlan_cleanup(void)
-{
- struct irlan_cb *self, *next;
-
- irlmp_unregister_client(ckey);
- irlmp_unregister_service(skey);
-
-#ifdef CONFIG_PROC_FS
- remove_proc_entry("irlan", proc_irda);
-#endif /* CONFIG_PROC_FS */
-
- /* Cleanup any leftover network devices */
- rtnl_lock();
- list_for_each_entry_safe(self, next, &irlans, dev_list) {
- __irlan_close(self);
- }
- rtnl_unlock();
-}
-
-/*
- * Function irlan_open (void)
- *
- * Open new instance of a client/provider, we should only register the
- * network device if this instance is ment for a particular client/provider
- */
-static struct irlan_cb __init *irlan_open(__u32 saddr, __u32 daddr)
-{
- struct net_device *dev;
- struct irlan_cb *self;
-
- /* Create network device with irlan */
- dev = alloc_irlandev(eth ? "eth%d" : "irlan%d");
- if (!dev)
- return NULL;
-
- self = netdev_priv(dev);
- self->dev = dev;
-
- /*
- * Initialize local device structure
- */
- self->magic = IRLAN_MAGIC;
- self->saddr = saddr;
- self->daddr = daddr;
-
- /* Provider access can only be PEER, DIRECT, or HOSTED */
- self->provider.access_type = access;
- if (access == ACCESS_DIRECT) {
- /*
- * Since we are emulating an IrLAN sever we will have to
- * give ourself an ethernet address!
- */
- dev->dev_addr[0] = 0x40;
- dev->dev_addr[1] = 0x00;
- dev->dev_addr[2] = 0x00;
- dev->dev_addr[3] = 0x00;
- get_random_bytes(dev->dev_addr+4, 1);
- get_random_bytes(dev->dev_addr+5, 1);
- }
-
- self->media = MEDIA_802_3;
- self->disconnect_reason = LM_USER_REQUEST;
- init_timer(&self->watchdog_timer);
- init_timer(&self->client.kick_timer);
- init_waitqueue_head(&self->open_wait);
-
- skb_queue_head_init(&self->client.txq);
-
- irlan_next_client_state(self, IRLAN_IDLE);
- irlan_next_provider_state(self, IRLAN_IDLE);
-
- if (register_netdev(dev)) {
- pr_debug("%s(), register_netdev() failed!\n",
- __func__);
- self = NULL;
- free_netdev(dev);
- } else {
- rtnl_lock();
- list_add_rcu(&self->dev_list, &irlans);
- rtnl_unlock();
- }
-
- return self;
-}
-/*
- * Function __irlan_close (self)
- *
- * This function closes and deallocates the IrLAN client instances. Be
- * aware that other functions which calls client_close() must
- * remove self from irlans list first.
- */
-static void __irlan_close(struct irlan_cb *self)
-{
- ASSERT_RTNL();
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- del_timer_sync(&self->watchdog_timer);
- del_timer_sync(&self->client.kick_timer);
-
- /* Close all open connections and remove TSAPs */
- irlan_close_tsaps(self);
-
- if (self->client.iriap)
- iriap_close(self->client.iriap);
-
- /* Remove frames queued on the control channel */
- skb_queue_purge(&self->client.txq);
-
- /* Unregister and free self via destructor */
- unregister_netdevice(self->dev);
-}
-
-/* Find any instance of irlan, used for client discovery wakeup */
-struct irlan_cb *irlan_get_any(void)
-{
- struct irlan_cb *self;
-
- list_for_each_entry_rcu(self, &irlans, dev_list) {
- return self;
- }
- return NULL;
-}
-
-/*
- * Function irlan_connect_indication (instance, sap, qos, max_sdu_size, skb)
- *
- * Here we receive the connect indication for the data channel
- *
- */
-static void irlan_connect_indication(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_sdu_size,
- __u8 max_header_size,
- struct sk_buff *skb)
-{
- struct irlan_cb *self;
- struct tsap_cb *tsap;
-
- self = instance;
- tsap = sap;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
- IRDA_ASSERT(tsap == self->tsap_data,return;);
-
- self->max_sdu_size = max_sdu_size;
- self->max_header_size = max_header_size;
-
- pr_debug("%s: We are now connected!\n", __func__);
-
- del_timer(&self->watchdog_timer);
-
- /* If you want to pass the skb to *both* state machines, you will
- * need to skb_clone() it, so that you don't free it twice.
- * As the state machines don't need it, git rid of it here...
- * Jean II */
- if (skb)
- dev_kfree_skb(skb);
-
- irlan_do_provider_event(self, IRLAN_DATA_CONNECT_INDICATION, NULL);
- irlan_do_client_event(self, IRLAN_DATA_CONNECT_INDICATION, NULL);
-
- if (self->provider.access_type == ACCESS_PEER) {
- /*
- * Data channel is open, so we are now allowed to
- * configure the remote filter
- */
- irlan_get_unicast_addr(self);
- irlan_open_unicast_addr(self);
- }
- /* Ready to transfer Ethernet frames (at last) */
- netif_start_queue(self->dev); /* Clear reason */
-}
-
-static void irlan_connect_confirm(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_sdu_size,
- __u8 max_header_size,
- struct sk_buff *skb)
-{
- struct irlan_cb *self;
-
- self = instance;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- self->max_sdu_size = max_sdu_size;
- self->max_header_size = max_header_size;
-
- /* TODO: we could set the MTU depending on the max_sdu_size */
-
- pr_debug("%s: We are now connected!\n", __func__);
- del_timer(&self->watchdog_timer);
-
- /*
- * Data channel is open, so we are now allowed to configure the remote
- * filter
- */
- irlan_get_unicast_addr(self);
- irlan_open_unicast_addr(self);
-
- /* Open broadcast and multicast filter by default */
- irlan_set_broadcast_filter(self, TRUE);
- irlan_set_multicast_filter(self, TRUE);
-
- /* Ready to transfer Ethernet frames */
- netif_start_queue(self->dev);
- self->disconnect_reason = 0; /* Clear reason */
- wake_up_interruptible(&self->open_wait);
-}
-
-/*
- * Function irlan_client_disconnect_indication (handle)
- *
- * Callback function for the IrTTP layer. Indicates a disconnection of
- * the specified connection (handle)
- */
-static void irlan_disconnect_indication(void *instance,
- void *sap, LM_REASON reason,
- struct sk_buff *userdata)
-{
- struct irlan_cb *self;
- struct tsap_cb *tsap;
-
- pr_debug("%s(), reason=%d\n", __func__ , reason);
-
- self = instance;
- tsap = sap;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
- IRDA_ASSERT(tsap != NULL, return;);
- IRDA_ASSERT(tsap->magic == TTP_TSAP_MAGIC, return;);
-
- IRDA_ASSERT(tsap == self->tsap_data, return;);
-
- pr_debug("IrLAN, data channel disconnected by peer!\n");
-
- /* Save reason so we know if we should try to reconnect or not */
- self->disconnect_reason = reason;
-
- switch (reason) {
- case LM_USER_REQUEST: /* User request */
- pr_debug("%s(), User requested\n", __func__);
- break;
- case LM_LAP_DISCONNECT: /* Unexpected IrLAP disconnect */
- pr_debug("%s(), Unexpected IrLAP disconnect\n", __func__);
- break;
- case LM_CONNECT_FAILURE: /* Failed to establish IrLAP connection */
- pr_debug("%s(), IrLAP connect failed\n", __func__);
- break;
- case LM_LAP_RESET: /* IrLAP reset */
- pr_debug("%s(), IrLAP reset\n", __func__);
- break;
- case LM_INIT_DISCONNECT:
- pr_debug("%s(), IrLMP connect failed\n", __func__);
- break;
- default:
- net_err_ratelimited("%s(), Unknown disconnect reason\n",
- __func__);
- break;
- }
-
- /* If you want to pass the skb to *both* state machines, you will
- * need to skb_clone() it, so that you don't free it twice.
- * As the state machines don't need it, git rid of it here...
- * Jean II */
- if (userdata)
- dev_kfree_skb(userdata);
-
- irlan_do_client_event(self, IRLAN_LMP_DISCONNECT, NULL);
- irlan_do_provider_event(self, IRLAN_LMP_DISCONNECT, NULL);
-
- wake_up_interruptible(&self->open_wait);
-}
-
-void irlan_open_data_tsap(struct irlan_cb *self)
-{
- struct tsap_cb *tsap;
- notify_t notify;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- /* Check if already open */
- if (self->tsap_data)
- return;
-
- irda_notify_init(&notify);
-
- notify.data_indication = irlan_eth_receive;
- notify.udata_indication = irlan_eth_receive;
- notify.connect_indication = irlan_connect_indication;
- notify.connect_confirm = irlan_connect_confirm;
- notify.flow_indication = irlan_eth_flow_indication;
- notify.disconnect_indication = irlan_disconnect_indication;
- notify.instance = self;
- strlcpy(notify.name, "IrLAN data", sizeof(notify.name));
-
- tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT, &notify);
- if (!tsap) {
- pr_debug("%s(), Got no tsap!\n", __func__);
- return;
- }
- self->tsap_data = tsap;
-
- /*
- * This is the data TSAP selector which we will pass to the client
- * when the client ask for it.
- */
- self->stsap_sel_data = self->tsap_data->stsap_sel;
-}
-
-void irlan_close_tsaps(struct irlan_cb *self)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- /* Disconnect and close all open TSAP connections */
- if (self->tsap_data) {
- irttp_disconnect_request(self->tsap_data, NULL, P_NORMAL);
- irttp_close_tsap(self->tsap_data);
- self->tsap_data = NULL;
- }
- if (self->client.tsap_ctrl) {
- irttp_disconnect_request(self->client.tsap_ctrl, NULL,
- P_NORMAL);
- irttp_close_tsap(self->client.tsap_ctrl);
- self->client.tsap_ctrl = NULL;
- }
- if (self->provider.tsap_ctrl) {
- irttp_disconnect_request(self->provider.tsap_ctrl, NULL,
- P_NORMAL);
- irttp_close_tsap(self->provider.tsap_ctrl);
- self->provider.tsap_ctrl = NULL;
- }
- self->disconnect_reason = LM_USER_REQUEST;
-}
-
-/*
- * Function irlan_ias_register (self, tsap_sel)
- *
- * Register with LM-IAS
- *
- */
-void irlan_ias_register(struct irlan_cb *self, __u8 tsap_sel)
-{
- struct ias_object *obj;
- struct ias_value *new_value;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- /*
- * Check if object has already been registered by a previous provider.
- * If that is the case, we just change the value of the attribute
- */
- if (!irias_find_object("IrLAN")) {
- obj = irias_new_object("IrLAN", IAS_IRLAN_ID);
- irias_add_integer_attrib(obj, "IrDA:TinyTP:LsapSel", tsap_sel,
- IAS_KERNEL_ATTR);
- irias_insert_object(obj);
- } else {
- new_value = irias_new_integer_value(tsap_sel);
- irias_object_change_attribute("IrLAN", "IrDA:TinyTP:LsapSel",
- new_value);
- }
-
- /* Register PnP object only if not registered before */
- if (!irias_find_object("PnP")) {
- obj = irias_new_object("PnP", IAS_PNP_ID);
-#if 0
- irias_add_string_attrib(obj, "Name", sysctl_devname,
- IAS_KERNEL_ATTR);
-#else
- irias_add_string_attrib(obj, "Name", "Linux", IAS_KERNEL_ATTR);
-#endif
- irias_add_string_attrib(obj, "DeviceID", "HWP19F0",
- IAS_KERNEL_ATTR);
- irias_add_integer_attrib(obj, "CompCnt", 1, IAS_KERNEL_ATTR);
- if (self->provider.access_type == ACCESS_PEER)
- irias_add_string_attrib(obj, "Comp#01", "PNP8389",
- IAS_KERNEL_ATTR);
- else
- irias_add_string_attrib(obj, "Comp#01", "PNP8294",
- IAS_KERNEL_ATTR);
-
- irias_add_string_attrib(obj, "Manufacturer",
- "Linux-IrDA Project", IAS_KERNEL_ATTR);
- irias_insert_object(obj);
- }
-}
-
-/*
- * Function irlan_run_ctrl_tx_queue (self)
- *
- * Try to send the next command in the control transmit queue
- *
- */
-int irlan_run_ctrl_tx_queue(struct irlan_cb *self)
-{
- struct sk_buff *skb;
-
- if (irda_lock(&self->client.tx_busy) == FALSE)
- return -EBUSY;
-
- skb = skb_dequeue(&self->client.txq);
- if (!skb) {
- self->client.tx_busy = FALSE;
- return 0;
- }
-
- /* Check that it's really possible to send commands */
- if ((self->client.tsap_ctrl == NULL) ||
- (self->client.state == IRLAN_IDLE))
- {
- self->client.tx_busy = FALSE;
- dev_kfree_skb(skb);
- return -1;
- }
- pr_debug("%s(), sending ...\n", __func__);
-
- return irttp_data_request(self->client.tsap_ctrl, skb);
-}
-
-/*
- * Function irlan_ctrl_data_request (self, skb)
- *
- * This function makes sure that commands on the control channel is being
- * sent in a command/response fashion
- */
-static void irlan_ctrl_data_request(struct irlan_cb *self, struct sk_buff *skb)
-{
- /* Queue command */
- skb_queue_tail(&self->client.txq, skb);
-
- /* Try to send command */
- irlan_run_ctrl_tx_queue(self);
-}
-
-/*
- * Function irlan_get_provider_info (self)
- *
- * Send Get Provider Information command to peer IrLAN layer
- *
- */
-void irlan_get_provider_info(struct irlan_cb *self)
-{
- struct sk_buff *skb;
- __u8 *frame;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER,
- GFP_ATOMIC);
- if (!skb)
- return;
-
- /* Reserve space for TTP, LMP, and LAP header */
- skb_reserve(skb, self->client.max_header_size);
- skb_put(skb, 2);
-
- frame = skb->data;
-
- frame[0] = CMD_GET_PROVIDER_INFO;
- frame[1] = 0x00; /* Zero parameters */
-
- irlan_ctrl_data_request(self, skb);
-}
-
-/*
- * Function irlan_open_data_channel (self)
- *
- * Send an Open Data Command to provider
- *
- */
-void irlan_open_data_channel(struct irlan_cb *self)
-{
- struct sk_buff *skb;
- __u8 *frame;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
- IRLAN_STRING_PARAMETER_LEN("MEDIA", "802.3") +
- IRLAN_STRING_PARAMETER_LEN("ACCESS_TYPE", "DIRECT"),
- GFP_ATOMIC);
- if (!skb)
- return;
-
- skb_reserve(skb, self->client.max_header_size);
- skb_put(skb, 2);
-
- frame = skb->data;
-
- /* Build frame */
- frame[0] = CMD_OPEN_DATA_CHANNEL;
- frame[1] = 0x02; /* Two parameters */
-
- irlan_insert_string_param(skb, "MEDIA", "802.3");
- irlan_insert_string_param(skb, "ACCESS_TYPE", "DIRECT");
- /* irlan_insert_string_param(skb, "MODE", "UNRELIABLE"); */
-
-/* self->use_udata = TRUE; */
-
- irlan_ctrl_data_request(self, skb);
-}
-
-void irlan_close_data_channel(struct irlan_cb *self)
-{
- struct sk_buff *skb;
- __u8 *frame;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- /* Check if the TSAP is still there */
- if (self->client.tsap_ctrl == NULL)
- return;
-
- skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
- IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN"),
- GFP_ATOMIC);
- if (!skb)
- return;
-
- skb_reserve(skb, self->client.max_header_size);
- skb_put(skb, 2);
-
- frame = skb->data;
-
- /* Build frame */
- frame[0] = CMD_CLOSE_DATA_CHAN;
- frame[1] = 0x01; /* One parameter */
-
- irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data);
-
- irlan_ctrl_data_request(self, skb);
-}
-
-/*
- * Function irlan_open_unicast_addr (self)
- *
- * Make IrLAN provider accept ethernet frames addressed to the unicast
- * address.
- *
- */
-static void irlan_open_unicast_addr(struct irlan_cb *self)
-{
- struct sk_buff *skb;
- __u8 *frame;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
- IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") +
- IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") +
- IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "FILTER"),
- GFP_ATOMIC);
- if (!skb)
- return;
-
- /* Reserve space for TTP, LMP, and LAP header */
- skb_reserve(skb, self->max_header_size);
- skb_put(skb, 2);
-
- frame = skb->data;
-
- frame[0] = CMD_FILTER_OPERATION;
- frame[1] = 0x03; /* Three parameters */
- irlan_insert_byte_param(skb, "DATA_CHAN" , self->dtsap_sel_data);
- irlan_insert_string_param(skb, "FILTER_TYPE", "DIRECTED");
- irlan_insert_string_param(skb, "FILTER_MODE", "FILTER");
-
- irlan_ctrl_data_request(self, skb);
-}
-
-/*
- * Function irlan_set_broadcast_filter (self, status)
- *
- * Make IrLAN provider accept ethernet frames addressed to the broadcast
- * address. Be careful with the use of this one, since there may be a lot
- * of broadcast traffic out there. We can still function without this
- * one but then _we_ have to initiate all communication with other
- * hosts, since ARP request for this host will not be answered.
- */
-void irlan_set_broadcast_filter(struct irlan_cb *self, int status)
-{
- struct sk_buff *skb;
- __u8 *frame;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
- IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") +
- IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "BROADCAST") +
- /* We may waste one byte here...*/
- IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "FILTER"),
- GFP_ATOMIC);
- if (!skb)
- return;
-
- /* Reserve space for TTP, LMP, and LAP header */
- skb_reserve(skb, self->client.max_header_size);
- skb_put(skb, 2);
-
- frame = skb->data;
-
- frame[0] = CMD_FILTER_OPERATION;
- frame[1] = 0x03; /* Three parameters */
- irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data);
- irlan_insert_string_param(skb, "FILTER_TYPE", "BROADCAST");
- if (status)
- irlan_insert_string_param(skb, "FILTER_MODE", "FILTER");
- else
- irlan_insert_string_param(skb, "FILTER_MODE", "NONE");
-
- irlan_ctrl_data_request(self, skb);
-}
-
-/*
- * Function irlan_set_multicast_filter (self, status)
- *
- * Make IrLAN provider accept ethernet frames addressed to the multicast
- * address.
- *
- */
-void irlan_set_multicast_filter(struct irlan_cb *self, int status)
-{
- struct sk_buff *skb;
- __u8 *frame;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
- IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") +
- IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "MULTICAST") +
- /* We may waste one byte here...*/
- IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "NONE"),
- GFP_ATOMIC);
- if (!skb)
- return;
-
- /* Reserve space for TTP, LMP, and LAP header */
- skb_reserve(skb, self->client.max_header_size);
- skb_put(skb, 2);
-
- frame = skb->data;
-
- frame[0] = CMD_FILTER_OPERATION;
- frame[1] = 0x03; /* Three parameters */
- irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data);
- irlan_insert_string_param(skb, "FILTER_TYPE", "MULTICAST");
- if (status)
- irlan_insert_string_param(skb, "FILTER_MODE", "ALL");
- else
- irlan_insert_string_param(skb, "FILTER_MODE", "NONE");
-
- irlan_ctrl_data_request(self, skb);
-}
-
-/*
- * Function irlan_get_unicast_addr (self)
- *
- * Retrieves the unicast address from the IrLAN provider. This address
- * will be inserted into the devices structure, so the ethernet layer
- * can construct its packets.
- *
- */
-static void irlan_get_unicast_addr(struct irlan_cb *self)
-{
- struct sk_buff *skb;
- __u8 *frame;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
- IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") +
- IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") +
- IRLAN_STRING_PARAMETER_LEN("FILTER_OPERATION",
- "DYNAMIC"),
- GFP_ATOMIC);
- if (!skb)
- return;
-
- /* Reserve space for TTP, LMP, and LAP header */
- skb_reserve(skb, self->client.max_header_size);
- skb_put(skb, 2);
-
- frame = skb->data;
-
- frame[0] = CMD_FILTER_OPERATION;
- frame[1] = 0x03; /* Three parameters */
- irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data);
- irlan_insert_string_param(skb, "FILTER_TYPE", "DIRECTED");
- irlan_insert_string_param(skb, "FILTER_OPERATION", "DYNAMIC");
-
- irlan_ctrl_data_request(self, skb);
-}
-
-/*
- * Function irlan_get_media_char (self)
- *
- *
- *
- */
-void irlan_get_media_char(struct irlan_cb *self)
-{
- struct sk_buff *skb;
- __u8 *frame;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
- IRLAN_STRING_PARAMETER_LEN("MEDIA", "802.3"),
- GFP_ATOMIC);
-
- if (!skb)
- return;
-
- /* Reserve space for TTP, LMP, and LAP header */
- skb_reserve(skb, self->client.max_header_size);
- skb_put(skb, 2);
-
- frame = skb->data;
-
- /* Build frame */
- frame[0] = CMD_GET_MEDIA_CHAR;
- frame[1] = 0x01; /* One parameter */
-
- irlan_insert_string_param(skb, "MEDIA", "802.3");
- irlan_ctrl_data_request(self, skb);
-}
-
-/*
- * Function insert_byte_param (skb, param, value)
- *
- * Insert byte parameter into frame
- *
- */
-int irlan_insert_byte_param(struct sk_buff *skb, char *param, __u8 value)
-{
- return __irlan_insert_param(skb, param, IRLAN_BYTE, value, 0, NULL, 0);
-}
-
-int irlan_insert_short_param(struct sk_buff *skb, char *param, __u16 value)
-{
- return __irlan_insert_param(skb, param, IRLAN_SHORT, 0, value, NULL, 0);
-}
-
-/*
- * Function insert_string (skb, param, value)
- *
- * Insert string parameter into frame
- *
- */
-int irlan_insert_string_param(struct sk_buff *skb, char *param, char *string)
-{
- int string_len = strlen(string);
-
- return __irlan_insert_param(skb, param, IRLAN_ARRAY, 0, 0, string,
- string_len);
-}
-
-/*
- * Function insert_array_param(skb, param, value, len_value)
- *
- * Insert array parameter into frame
- *
- */
-int irlan_insert_array_param(struct sk_buff *skb, char *name, __u8 *array,
- __u16 array_len)
-{
- return __irlan_insert_param(skb, name, IRLAN_ARRAY, 0, 0, array,
- array_len);
-}
-
-/*
- * Function insert_param (skb, param, value, byte)
- *
- * Insert parameter at end of buffer, structure of a parameter is:
- *
- * -----------------------------------------------------------------------
- * | Name Length[1] | Param Name[1..255] | Val Length[2] | Value[0..1016]|
- * -----------------------------------------------------------------------
- */
-static int __irlan_insert_param(struct sk_buff *skb, char *param, int type,
- __u8 value_byte, __u16 value_short,
- __u8 *value_array, __u16 value_len)
-{
- __u8 *frame;
- __u8 param_len;
- __le16 tmp_le; /* Temporary value in little endian format */
- int n=0;
-
- if (skb == NULL) {
- pr_debug("%s(), Got NULL skb\n", __func__);
- return 0;
- }
-
- param_len = strlen(param);
- switch (type) {
- case IRLAN_BYTE:
- value_len = 1;
- break;
- case IRLAN_SHORT:
- value_len = 2;
- break;
- case IRLAN_ARRAY:
- IRDA_ASSERT(value_array != NULL, return 0;);
- IRDA_ASSERT(value_len > 0, return 0;);
- break;
- default:
- pr_debug("%s(), Unknown parameter type!\n", __func__);
- return 0;
- }
-
- /* Insert at end of sk-buffer */
- frame = skb_tail_pointer(skb);
-
- /* Make space for data */
- if (skb_tailroom(skb) < (param_len+value_len+3)) {
- pr_debug("%s(), No more space at end of skb\n", __func__);
- return 0;
- }
- skb_put(skb, param_len+value_len+3);
-
- /* Insert parameter length */
- frame[n++] = param_len;
-
- /* Insert parameter */
- memcpy(frame+n, param, param_len); n += param_len;
-
- /* Insert value length (2 byte little endian format, LSB first) */
- tmp_le = cpu_to_le16(value_len);
- memcpy(frame+n, &tmp_le, 2); n += 2; /* To avoid alignment problems */
-
- /* Insert value */
- switch (type) {
- case IRLAN_BYTE:
- frame[n++] = value_byte;
- break;
- case IRLAN_SHORT:
- tmp_le = cpu_to_le16(value_short);
- memcpy(frame+n, &tmp_le, 2); n += 2;
- break;
- case IRLAN_ARRAY:
- memcpy(frame+n, value_array, value_len); n+=value_len;
- break;
- default:
- break;
- }
- IRDA_ASSERT(n == (param_len+value_len+3), return 0;);
-
- return param_len+value_len+3;
-}
-
-/*
- * Function irlan_extract_param (buf, name, value, len)
- *
- * Extracts a single parameter name/value pair from buffer and updates
- * the buffer pointer to point to the next name/value pair.
- */
-int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len)
-{
- __u8 name_len;
- __u16 val_len;
- int n=0;
-
- /* get length of parameter name (1 byte) */
- name_len = buf[n++];
-
- if (name_len > 254) {
- pr_debug("%s(), name_len > 254\n", __func__);
- return -RSP_INVALID_COMMAND_FORMAT;
- }
-
- /* get parameter name */
- memcpy(name, buf+n, name_len);
- name[name_len] = '\0';
- n+=name_len;
-
- /*
- * Get length of parameter value (2 bytes in little endian
- * format)
- */
- memcpy(&val_len, buf+n, 2); /* To avoid alignment problems */
- le16_to_cpus(&val_len); n+=2;
-
- if (val_len >= 1016) {
- pr_debug("%s(), parameter length to long\n", __func__);
- return -RSP_INVALID_COMMAND_FORMAT;
- }
- *len = val_len;
-
- /* get parameter value */
- memcpy(value, buf+n, val_len);
- value[val_len] = '\0';
- n+=val_len;
-
- pr_debug("Parameter: %s ", name);
- pr_debug("Value: %s\n", value);
-
- return n;
-}
-
-#ifdef CONFIG_PROC_FS
-
-/*
- * Start of reading /proc entries.
- * Return entry at pos,
- * or start_token to indicate print header line
- * or NULL if end of file
- */
-static void *irlan_seq_start(struct seq_file *seq, loff_t *pos)
-{
- rcu_read_lock();
- return seq_list_start_head(&irlans, *pos);
-}
-
-/* Return entry after v, and increment pos */
-static void *irlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- return seq_list_next(v, &irlans, pos);
-}
-
-/* End of reading /proc file */
-static void irlan_seq_stop(struct seq_file *seq, void *v)
-{
- rcu_read_unlock();
-}
-
-
-/*
- * Show one entry in /proc file.
- */
-static int irlan_seq_show(struct seq_file *seq, void *v)
-{
- if (v == &irlans)
- seq_puts(seq, "IrLAN instances:\n");
- else {
- struct irlan_cb *self = list_entry(v, struct irlan_cb, dev_list);
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-
- seq_printf(seq,"ifname: %s,\n",
- self->dev->name);
- seq_printf(seq,"client state: %s, ",
- irlan_state[ self->client.state]);
- seq_printf(seq,"provider state: %s,\n",
- irlan_state[ self->provider.state]);
- seq_printf(seq,"saddr: %#08x, ",
- self->saddr);
- seq_printf(seq,"daddr: %#08x\n",
- self->daddr);
- seq_printf(seq,"version: %d.%d,\n",
- self->version[1], self->version[0]);
- seq_printf(seq,"access type: %s\n",
- irlan_access[self->client.access_type]);
- seq_printf(seq,"media: %s\n",
- irlan_media[self->media]);
-
- seq_printf(seq,"local filter:\n");
- seq_printf(seq,"remote filter: ");
- irlan_print_filter(seq, self->client.filter_type);
- seq_printf(seq,"tx busy: %s\n",
- netif_queue_stopped(self->dev) ? "TRUE" : "FALSE");
-
- seq_putc(seq,'\n');
- }
- return 0;
-}
-
-static const struct seq_operations irlan_seq_ops = {
- .start = irlan_seq_start,
- .next = irlan_seq_next,
- .stop = irlan_seq_stop,
- .show = irlan_seq_show,
-};
-
-static int irlan_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &irlan_seq_ops);
-}
-#endif
-
-MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>");
-MODULE_DESCRIPTION("The Linux IrDA LAN protocol");
-MODULE_LICENSE("GPL");
-
-module_param(eth, bool, 0);
-MODULE_PARM_DESC(eth, "Name devices ethX (0) or irlanX (1)");
-module_param(access, int, 0);
-MODULE_PARM_DESC(access, "Access type DIRECT=1, PEER=2, HOSTED=3");
-
-module_init(irlan_init);
-module_exit(irlan_cleanup);
-
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
deleted file mode 100644
index 3be852808a9d..000000000000
--- a/net/irda/irlan/irlan_eth.c
+++ /dev/null
@@ -1,340 +0,0 @@
-/*********************************************************************
- *
- * Filename: irlan_eth.c
- * Version:
- * Description:
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Thu Oct 15 08:37:58 1998
- * Modified at: Tue Mar 21 09:06:41 2000
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- * Sources: skeleton.c by Donald Becker <becker@CESDIS.gsfc.nasa.gov>
- * slip.c by Laurence Culhane, <loz@holmes.demon.co.uk>
- * Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org>
- *
- * Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/if_arp.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <net/arp.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irmod.h>
-#include <net/irda/irlan_common.h>
-#include <net/irda/irlan_client.h>
-#include <net/irda/irlan_event.h>
-#include <net/irda/irlan_eth.h>
-
-static int irlan_eth_open(struct net_device *dev);
-static int irlan_eth_close(struct net_device *dev);
-static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
- struct net_device *dev);
-static void irlan_eth_set_multicast_list(struct net_device *dev);
-
-static const struct net_device_ops irlan_eth_netdev_ops = {
- .ndo_open = irlan_eth_open,
- .ndo_stop = irlan_eth_close,
- .ndo_start_xmit = irlan_eth_xmit,
- .ndo_set_rx_mode = irlan_eth_set_multicast_list,
- .ndo_validate_addr = eth_validate_addr,
-};
-
-/*
- * Function irlan_eth_setup (dev)
- *
- * The network device initialization function.
- *
- */
-static void irlan_eth_setup(struct net_device *dev)
-{
- ether_setup(dev);
-
- dev->netdev_ops = &irlan_eth_netdev_ops;
- dev->needs_free_netdev = true;
- dev->min_mtu = 0;
- dev->max_mtu = ETH_MAX_MTU;
-
- /*
- * Lets do all queueing in IrTTP instead of this device driver.
- * Queueing here as well can introduce some strange latency
- * problems, which we will avoid by setting the queue size to 0.
- */
- /*
- * The bugs in IrTTP and IrLAN that created this latency issue
- * have now been fixed, and we can propagate flow control properly
- * to the network layer. However, this requires a minimal queue of
- * packets for the device.
- * Without flow control, the Tx Queue is 14 (ttp) + 0 (dev) = 14
- * With flow control, the Tx Queue is 7 (ttp) + 4 (dev) = 11
- * See irlan_eth_flow_indication()...
- * Note : this number was randomly selected and would need to
- * be adjusted.
- * Jean II */
- dev->tx_queue_len = 4;
-}
-
-/*
- * Function alloc_irlandev
- *
- * Allocate network device and control block
- *
- */
-struct net_device *alloc_irlandev(const char *name)
-{
- return alloc_netdev(sizeof(struct irlan_cb), name, NET_NAME_UNKNOWN,
- irlan_eth_setup);
-}
-
-/*
- * Function irlan_eth_open (dev)
- *
- * Network device has been opened by user
- *
- */
-static int irlan_eth_open(struct net_device *dev)
-{
- struct irlan_cb *self = netdev_priv(dev);
-
- /* Ready to play! */
- netif_stop_queue(dev); /* Wait until data link is ready */
-
- /* We are now open, so time to do some work */
- self->disconnect_reason = 0;
- irlan_client_wakeup(self, self->saddr, self->daddr);
-
- /* Make sure we have a hardware address before we return,
- so DHCP clients gets happy */
- return wait_event_interruptible(self->open_wait,
- !self->tsap_data->connected);
-}
-
-/*
- * Function irlan_eth_close (dev)
- *
- * Stop the ether network device, his function will usually be called by
- * ifconfig down. We should now disconnect the link, We start the
- * close timer, so that the instance will be removed if we are unable
- * to discover the remote device after the disconnect.
- */
-static int irlan_eth_close(struct net_device *dev)
-{
- struct irlan_cb *self = netdev_priv(dev);
-
- /* Stop device */
- netif_stop_queue(dev);
-
- irlan_close_data_channel(self);
- irlan_close_tsaps(self);
-
- irlan_do_client_event(self, IRLAN_LMP_DISCONNECT, NULL);
- irlan_do_provider_event(self, IRLAN_LMP_DISCONNECT, NULL);
-
- /* Remove frames queued on the control channel */
- skb_queue_purge(&self->client.txq);
-
- self->client.tx_busy = 0;
-
- return 0;
-}
-
-/*
- * Function irlan_eth_tx (skb)
- *
- * Transmits ethernet frames over IrDA link.
- *
- */
-static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
- struct net_device *dev)
-{
- struct irlan_cb *self = netdev_priv(dev);
- int ret;
- unsigned int len;
-
- /* skb headroom large enough to contain all IrDA-headers? */
- if ((skb_headroom(skb) < self->max_header_size) || (skb_shared(skb))) {
- struct sk_buff *new_skb =
- skb_realloc_headroom(skb, self->max_header_size);
-
- /* We have to free the original skb anyway */
- dev_kfree_skb(skb);
-
- /* Did the realloc succeed? */
- if (new_skb == NULL)
- return NETDEV_TX_OK;
-
- /* Use the new skb instead */
- skb = new_skb;
- }
-
- netif_trans_update(dev);
-
- len = skb->len;
- /* Now queue the packet in the transport layer */
- if (self->use_udata)
- ret = irttp_udata_request(self->tsap_data, skb);
- else
- ret = irttp_data_request(self->tsap_data, skb);
-
- if (ret < 0) {
- /*
- * IrTTPs tx queue is full, so we just have to
- * drop the frame! You might think that we should
- * just return -1 and don't deallocate the frame,
- * but that is dangerous since it's possible that
- * we have replaced the original skb with a new
- * one with larger headroom, and that would really
- * confuse do_dev_queue_xmit() in dev.c! I have
- * tried :-) DB
- */
- /* irttp_data_request already free the packet */
- dev->stats.tx_dropped++;
- } else {
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += len;
- }
-
- return NETDEV_TX_OK;
-}
-
-/*
- * Function irlan_eth_receive (handle, skb)
- *
- * This function gets the data that is received on the data channel
- *
- */
-int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
-{
- struct irlan_cb *self = instance;
- struct net_device *dev = self->dev;
-
- if (skb == NULL) {
- dev->stats.rx_dropped++;
- return 0;
- }
- if (skb->len < ETH_HLEN) {
- pr_debug("%s() : IrLAN frame too short (%d)\n",
- __func__, skb->len);
- dev->stats.rx_dropped++;
- dev_kfree_skb(skb);
- return 0;
- }
-
- /*
- * Adopt this frame! Important to set all these fields since they
- * might have been previously set by the low level IrDA network
- * device driver
- */
- skb->protocol = eth_type_trans(skb, dev); /* Remove eth header */
-
- dev->stats.rx_packets++;
- dev->stats.rx_bytes += skb->len;
-
- netif_rx(skb); /* Eat it! */
-
- return 0;
-}
-
-/*
- * Function irlan_eth_flow (status)
- *
- * Do flow control between IP/Ethernet and IrLAN/IrTTP. This is done by
- * controlling the queue stop/start.
- *
- * The IrDA link layer has the advantage to have flow control, and
- * IrTTP now properly handles that. Flow controlling the higher layers
- * prevent us to drop Tx packets in here (up to 15% for a TCP socket,
- * more for UDP socket).
- * Also, this allow us to reduce the overall transmit queue, which means
- * less latency in case of mixed traffic.
- * Jean II
- */
-void irlan_eth_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
-{
- struct irlan_cb *self;
- struct net_device *dev;
-
- self = instance;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- dev = self->dev;
-
- IRDA_ASSERT(dev != NULL, return;);
-
- pr_debug("%s() : flow %s ; running %d\n", __func__,
- flow == FLOW_STOP ? "FLOW_STOP" : "FLOW_START",
- netif_running(dev));
-
- switch (flow) {
- case FLOW_STOP:
- /* IrTTP is full, stop higher layers */
- netif_stop_queue(dev);
- break;
- case FLOW_START:
- default:
- /* Tell upper layers that its time to transmit frames again */
- /* Schedule network layer */
- netif_wake_queue(dev);
- break;
- }
-}
-
-/*
- * Function set_multicast_list (dev)
- *
- * Configure the filtering of the device
- *
- */
-#define HW_MAX_ADDRS 4 /* Must query to get it! */
-static void irlan_eth_set_multicast_list(struct net_device *dev)
-{
- struct irlan_cb *self = netdev_priv(dev);
-
- /* Check if data channel has been connected yet */
- if (self->client.state != IRLAN_DATA) {
- pr_debug("%s(), delaying!\n", __func__);
- return;
- }
-
- if (dev->flags & IFF_PROMISC) {
- /* Enable promiscuous mode */
- net_warn_ratelimited("Promiscuous mode not implemented by IrLAN!\n");
- } else if ((dev->flags & IFF_ALLMULTI) ||
- netdev_mc_count(dev) > HW_MAX_ADDRS) {
- /* Disable promiscuous mode, use normal mode. */
- pr_debug("%s(), Setting multicast filter\n", __func__);
- /* hardware_set_filter(NULL); */
-
- irlan_set_multicast_filter(self, TRUE);
- } else if (!netdev_mc_empty(dev)) {
- pr_debug("%s(), Setting multicast filter\n", __func__);
- /* Walk the address list, and load the filter */
- /* hardware_set_filter(dev->mc_list); */
-
- irlan_set_multicast_filter(self, TRUE);
- } else {
- pr_debug("%s(), Clearing multicast filter\n", __func__);
- irlan_set_multicast_filter(self, FALSE);
- }
-
- if (dev->flags & IFF_BROADCAST)
- irlan_set_broadcast_filter(self, TRUE);
- else
- irlan_set_broadcast_filter(self, FALSE);
-}
diff --git a/net/irda/irlan/irlan_event.c b/net/irda/irlan/irlan_event.c
deleted file mode 100644
index 9a1cc11c16f6..000000000000
--- a/net/irda/irlan/irlan_event.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*********************************************************************
- *
- * Filename: irlan_event.c
- * Version:
- * Description:
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Tue Oct 20 09:10:16 1998
- * Modified at: Sat Oct 30 12:59:01 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <net/irda/irlan_event.h>
-
-const char * const irlan_state[] = {
- "IRLAN_IDLE",
- "IRLAN_QUERY",
- "IRLAN_CONN",
- "IRLAN_INFO",
- "IRLAN_MEDIA",
- "IRLAN_OPEN",
- "IRLAN_WAIT",
- "IRLAN_ARB",
- "IRLAN_DATA",
- "IRLAN_CLOSE",
- "IRLAN_SYNC",
-};
-
-void irlan_next_client_state(struct irlan_cb *self, IRLAN_STATE state)
-{
- pr_debug("%s(), %s\n", __func__ , irlan_state[state]);
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- self->client.state = state;
-}
-
-void irlan_next_provider_state(struct irlan_cb *self, IRLAN_STATE state)
-{
- pr_debug("%s(), %s\n", __func__ , irlan_state[state]);
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- self->provider.state = state;
-}
-
diff --git a/net/irda/irlan/irlan_filter.c b/net/irda/irlan/irlan_filter.c
deleted file mode 100644
index e755e90b2f26..000000000000
--- a/net/irda/irlan/irlan_filter.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*********************************************************************
- *
- * Filename: irlan_filter.c
- * Version:
- * Description:
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Fri Jan 29 11:16:38 1999
- * Modified at: Sat Oct 30 12:58:45 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/skbuff.h>
-#include <linux/random.h>
-#include <linux/seq_file.h>
-
-#include <net/irda/irlan_common.h>
-#include <net/irda/irlan_filter.h>
-
-/*
- * Function irlan_filter_request (self, skb)
- *
- * Handle filter request from client peer device
- *
- */
-void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- if ((self->provider.filter_type == IRLAN_DIRECTED) &&
- (self->provider.filter_operation == DYNAMIC))
- {
- pr_debug("Giving peer a dynamic Ethernet address\n");
- self->provider.mac_address[0] = 0x40;
- self->provider.mac_address[1] = 0x00;
- self->provider.mac_address[2] = 0x00;
- self->provider.mac_address[3] = 0x00;
-
- /* Use arbitration value to generate MAC address */
- if (self->provider.access_type == ACCESS_PEER) {
- self->provider.mac_address[4] =
- self->provider.send_arb_val & 0xff;
- self->provider.mac_address[5] =
- (self->provider.send_arb_val >> 8) & 0xff;
- } else {
- /* Just generate something for now */
- get_random_bytes(self->provider.mac_address+4, 1);
- get_random_bytes(self->provider.mac_address+5, 1);
- }
-
- skb->data[0] = 0x00; /* Success */
- skb->data[1] = 0x03;
- irlan_insert_string_param(skb, "FILTER_MODE", "NONE");
- irlan_insert_short_param(skb, "MAX_ENTRY", 0x0001);
- irlan_insert_array_param(skb, "FILTER_ENTRY",
- self->provider.mac_address, 6);
- return;
- }
-
- if ((self->provider.filter_type == IRLAN_DIRECTED) &&
- (self->provider.filter_mode == FILTER))
- {
- pr_debug("Directed filter on\n");
- skb->data[0] = 0x00; /* Success */
- skb->data[1] = 0x00;
- return;
- }
- if ((self->provider.filter_type == IRLAN_DIRECTED) &&
- (self->provider.filter_mode == NONE))
- {
- pr_debug("Directed filter off\n");
- skb->data[0] = 0x00; /* Success */
- skb->data[1] = 0x00;
- return;
- }
-
- if ((self->provider.filter_type == IRLAN_BROADCAST) &&
- (self->provider.filter_mode == FILTER))
- {
- pr_debug("Broadcast filter on\n");
- skb->data[0] = 0x00; /* Success */
- skb->data[1] = 0x00;
- return;
- }
- if ((self->provider.filter_type == IRLAN_BROADCAST) &&
- (self->provider.filter_mode == NONE))
- {
- pr_debug("Broadcast filter off\n");
- skb->data[0] = 0x00; /* Success */
- skb->data[1] = 0x00;
- return;
- }
- if ((self->provider.filter_type == IRLAN_MULTICAST) &&
- (self->provider.filter_mode == FILTER))
- {
- pr_debug("Multicast filter on\n");
- skb->data[0] = 0x00; /* Success */
- skb->data[1] = 0x00;
- return;
- }
- if ((self->provider.filter_type == IRLAN_MULTICAST) &&
- (self->provider.filter_mode == NONE))
- {
- pr_debug("Multicast filter off\n");
- skb->data[0] = 0x00; /* Success */
- skb->data[1] = 0x00;
- return;
- }
- if ((self->provider.filter_type == IRLAN_MULTICAST) &&
- (self->provider.filter_operation == GET))
- {
- pr_debug("Multicast filter get\n");
- skb->data[0] = 0x00; /* Success? */
- skb->data[1] = 0x02;
- irlan_insert_string_param(skb, "FILTER_MODE", "NONE");
- irlan_insert_short_param(skb, "MAX_ENTRY", 16);
- return;
- }
- skb->data[0] = 0x00; /* Command not supported */
- skb->data[1] = 0x00;
-
- pr_debug("Not implemented!\n");
-}
-
-/*
- * Function check_request_param (self, param, value)
- *
- * Check parameters in request from peer device
- *
- */
-void irlan_check_command_param(struct irlan_cb *self, char *param, char *value)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- pr_debug("%s, %s\n", param, value);
-
- /*
- * This is experimental!! DB.
- */
- if (strcmp(param, "MODE") == 0) {
- self->use_udata = TRUE;
- return;
- }
-
- /*
- * FILTER_TYPE
- */
- if (strcmp(param, "FILTER_TYPE") == 0) {
- if (strcmp(value, "DIRECTED") == 0) {
- self->provider.filter_type = IRLAN_DIRECTED;
- return;
- }
- if (strcmp(value, "MULTICAST") == 0) {
- self->provider.filter_type = IRLAN_MULTICAST;
- return;
- }
- if (strcmp(value, "BROADCAST") == 0) {
- self->provider.filter_type = IRLAN_BROADCAST;
- return;
- }
- }
- /*
- * FILTER_MODE
- */
- if (strcmp(param, "FILTER_MODE") == 0) {
- if (strcmp(value, "ALL") == 0) {
- self->provider.filter_mode = ALL;
- return;
- }
- if (strcmp(value, "FILTER") == 0) {
- self->provider.filter_mode = FILTER;
- return;
- }
- if (strcmp(value, "NONE") == 0) {
- self->provider.filter_mode = FILTER;
- return;
- }
- }
- /*
- * FILTER_OPERATION
- */
- if (strcmp(param, "FILTER_OPERATION") == 0) {
- if (strcmp(value, "DYNAMIC") == 0) {
- self->provider.filter_operation = DYNAMIC;
- return;
- }
- if (strcmp(value, "GET") == 0) {
- self->provider.filter_operation = GET;
- return;
- }
- }
-}
-
-/*
- * Function irlan_print_filter (filter_type, buf)
- *
- * Print status of filter. Used by /proc file system
- *
- */
-#ifdef CONFIG_PROC_FS
-#define MASK2STR(m,s) { .mask = m, .str = s }
-
-void irlan_print_filter(struct seq_file *seq, int filter_type)
-{
- static struct {
- int mask;
- const char *str;
- } filter_mask2str[] = {
- MASK2STR(IRLAN_DIRECTED, "DIRECTED"),
- MASK2STR(IRLAN_FUNCTIONAL, "FUNCTIONAL"),
- MASK2STR(IRLAN_GROUP, "GROUP"),
- MASK2STR(IRLAN_MAC_FRAME, "MAC_FRAME"),
- MASK2STR(IRLAN_MULTICAST, "MULTICAST"),
- MASK2STR(IRLAN_BROADCAST, "BROADCAST"),
- MASK2STR(IRLAN_IPX_SOCKET, "IPX_SOCKET"),
- MASK2STR(0, NULL)
- }, *p;
-
- for (p = filter_mask2str; p->str; p++) {
- if (filter_type & p->mask)
- seq_printf(seq, "%s ", p->str);
- }
- seq_putc(seq, '\n');
-}
-#undef MASK2STR
-#endif
diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c
deleted file mode 100644
index 15c292cf2644..000000000000
--- a/net/irda/irlan/irlan_provider.c
+++ /dev/null
@@ -1,408 +0,0 @@
-/*********************************************************************
- *
- * Filename: irlan_provider.c
- * Version: 0.9
- * Description: IrDA LAN Access Protocol Implementation
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sun Aug 31 20:14:37 1997
- * Modified at: Sat Oct 30 12:52:10 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- * Sources: skeleton.c by Donald Becker <becker@CESDIS.gsfc.nasa.gov>
- * slip.c by Laurence Culhane, <loz@holmes.demon.co.uk>
- * Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org>
- *
- * Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>,
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/bitops.h>
-#include <linux/slab.h>
-
-#include <asm/byteorder.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irttp.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irias_object.h>
-#include <net/irda/iriap.h>
-#include <net/irda/timer.h>
-
-#include <net/irda/irlan_common.h>
-#include <net/irda/irlan_eth.h>
-#include <net/irda/irlan_event.h>
-#include <net/irda/irlan_provider.h>
-#include <net/irda/irlan_filter.h>
-#include <net/irda/irlan_client.h>
-
-static void irlan_provider_connect_indication(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_sdu_size,
- __u8 max_header_size,
- struct sk_buff *skb);
-
-/*
- * Function irlan_provider_control_data_indication (handle, skb)
- *
- * This function gets the data that is received on the control channel
- *
- */
-static int irlan_provider_data_indication(void *instance, void *sap,
- struct sk_buff *skb)
-{
- struct irlan_cb *self;
- __u8 code;
-
- self = instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-
- IRDA_ASSERT(skb != NULL, return -1;);
-
- code = skb->data[0];
- switch(code) {
- case CMD_GET_PROVIDER_INFO:
- pr_debug("Got GET_PROVIDER_INFO command!\n");
- irlan_do_provider_event(self, IRLAN_GET_INFO_CMD, skb);
- break;
-
- case CMD_GET_MEDIA_CHAR:
- pr_debug("Got GET_MEDIA_CHAR command!\n");
- irlan_do_provider_event(self, IRLAN_GET_MEDIA_CMD, skb);
- break;
- case CMD_OPEN_DATA_CHANNEL:
- pr_debug("Got OPEN_DATA_CHANNEL command!\n");
- irlan_do_provider_event(self, IRLAN_OPEN_DATA_CMD, skb);
- break;
- case CMD_FILTER_OPERATION:
- pr_debug("Got FILTER_OPERATION command!\n");
- irlan_do_provider_event(self, IRLAN_FILTER_CONFIG_CMD, skb);
- break;
- case CMD_RECONNECT_DATA_CHAN:
- pr_debug("%s(), Got RECONNECT_DATA_CHAN command\n", __func__);
- pr_debug("%s(), NOT IMPLEMENTED\n", __func__);
- break;
- case CMD_CLOSE_DATA_CHAN:
- pr_debug("Got CLOSE_DATA_CHAN command!\n");
- pr_debug("%s(), NOT IMPLEMENTED\n", __func__);
- break;
- default:
- pr_debug("%s(), Unknown command!\n", __func__);
- break;
- }
- return 0;
-}
-
-/*
- * Function irlan_provider_connect_indication (handle, skb, priv)
- *
- * Got connection from peer IrLAN client
- *
- */
-static void irlan_provider_connect_indication(void *instance, void *sap,
- struct qos_info *qos,
- __u32 max_sdu_size,
- __u8 max_header_size,
- struct sk_buff *skb)
-{
- struct irlan_cb *self;
- struct tsap_cb *tsap;
-
- self = instance;
- tsap = sap;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- IRDA_ASSERT(tsap == self->provider.tsap_ctrl,return;);
- IRDA_ASSERT(self->provider.state == IRLAN_IDLE, return;);
-
- self->provider.max_sdu_size = max_sdu_size;
- self->provider.max_header_size = max_header_size;
-
- irlan_do_provider_event(self, IRLAN_CONNECT_INDICATION, NULL);
-
- /*
- * If we are in peer mode, the client may not have got the discovery
- * indication it needs to make progress. If the client is still in
- * IDLE state, we must kick it.
- */
- if ((self->provider.access_type == ACCESS_PEER) &&
- (self->client.state == IRLAN_IDLE))
- {
- irlan_client_wakeup(self, self->saddr, self->daddr);
- }
-}
-
-/*
- * Function irlan_provider_connect_response (handle)
- *
- * Accept incoming connection
- *
- */
-void irlan_provider_connect_response(struct irlan_cb *self,
- struct tsap_cb *tsap)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- /* Just accept */
- irttp_connect_response(tsap, IRLAN_MTU, NULL);
-}
-
-static void irlan_provider_disconnect_indication(void *instance, void *sap,
- LM_REASON reason,
- struct sk_buff *userdata)
-{
- struct irlan_cb *self;
- struct tsap_cb *tsap;
-
- pr_debug("%s(), reason=%d\n", __func__ , reason);
-
- self = instance;
- tsap = sap;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
- IRDA_ASSERT(tsap != NULL, return;);
- IRDA_ASSERT(tsap->magic == TTP_TSAP_MAGIC, return;);
-
- IRDA_ASSERT(tsap == self->provider.tsap_ctrl, return;);
-
- irlan_do_provider_event(self, IRLAN_LMP_DISCONNECT, NULL);
-}
-
-/*
- * Function irlan_parse_open_data_cmd (self, skb)
- *
- *
- *
- */
-int irlan_parse_open_data_cmd(struct irlan_cb *self, struct sk_buff *skb)
-{
- int ret;
-
- ret = irlan_provider_parse_command(self, CMD_OPEN_DATA_CHANNEL, skb);
-
- /* Open data channel */
- irlan_open_data_tsap(self);
-
- return ret;
-}
-
-/*
- * Function parse_command (skb)
- *
- * Extract all parameters from received buffer, then feed them to
- * check_params for parsing
- *
- */
-int irlan_provider_parse_command(struct irlan_cb *self, int cmd,
- struct sk_buff *skb)
-{
- __u8 *frame;
- __u8 *ptr;
- int count;
- __u16 val_len;
- int i;
- char *name;
- char *value;
- int ret = RSP_SUCCESS;
-
- IRDA_ASSERT(skb != NULL, return -RSP_PROTOCOL_ERROR;);
-
- pr_debug("%s(), skb->len=%d\n", __func__ , (int)skb->len);
-
- IRDA_ASSERT(self != NULL, return -RSP_PROTOCOL_ERROR;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -RSP_PROTOCOL_ERROR;);
-
- if (!skb)
- return -RSP_PROTOCOL_ERROR;
-
- frame = skb->data;
-
- name = kmalloc(255, GFP_ATOMIC);
- if (!name)
- return -RSP_INSUFFICIENT_RESOURCES;
- value = kmalloc(1016, GFP_ATOMIC);
- if (!value) {
- kfree(name);
- return -RSP_INSUFFICIENT_RESOURCES;
- }
-
- /* How many parameters? */
- count = frame[1];
-
- pr_debug("Got %d parameters\n", count);
-
- ptr = frame+2;
-
- /* For all parameters */
- for (i=0; i<count;i++) {
- ret = irlan_extract_param(ptr, name, value, &val_len);
- if (ret < 0) {
- pr_debug("%s(), IrLAN, Error!\n", __func__);
- break;
- }
- ptr+=ret;
- ret = RSP_SUCCESS;
- irlan_check_command_param(self, name, value);
- }
- /* Cleanup */
- kfree(name);
- kfree(value);
-
- return ret;
-}
-
-/*
- * Function irlan_provider_send_reply (self, info)
- *
- * Send reply to query to peer IrLAN layer
- *
- */
-void irlan_provider_send_reply(struct irlan_cb *self, int command,
- int ret_code)
-{
- struct sk_buff *skb;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
- skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
- /* Bigger param length comes from CMD_GET_MEDIA_CHAR */
- IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") +
- IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "BROADCAST") +
- IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "MULTICAST") +
- IRLAN_STRING_PARAMETER_LEN("ACCESS_TYPE", "HOSTED"),
- GFP_ATOMIC);
-
- if (!skb)
- return;
-
- /* Reserve space for TTP, LMP, and LAP header */
- skb_reserve(skb, self->provider.max_header_size);
- skb_put(skb, 2);
-
- switch (command) {
- case CMD_GET_PROVIDER_INFO:
- skb->data[0] = 0x00; /* Success */
- skb->data[1] = 0x02; /* 2 parameters */
- switch (self->media) {
- case MEDIA_802_3:
- irlan_insert_string_param(skb, "MEDIA", "802.3");
- break;
- case MEDIA_802_5:
- irlan_insert_string_param(skb, "MEDIA", "802.5");
- break;
- default:
- pr_debug("%s(), unknown media type!\n", __func__);
- break;
- }
- irlan_insert_short_param(skb, "IRLAN_VER", 0x0101);
- break;
-
- case CMD_GET_MEDIA_CHAR:
- skb->data[0] = 0x00; /* Success */
- skb->data[1] = 0x05; /* 5 parameters */
- irlan_insert_string_param(skb, "FILTER_TYPE", "DIRECTED");
- irlan_insert_string_param(skb, "FILTER_TYPE", "BROADCAST");
- irlan_insert_string_param(skb, "FILTER_TYPE", "MULTICAST");
-
- switch (self->provider.access_type) {
- case ACCESS_DIRECT:
- irlan_insert_string_param(skb, "ACCESS_TYPE", "DIRECT");
- break;
- case ACCESS_PEER:
- irlan_insert_string_param(skb, "ACCESS_TYPE", "PEER");
- break;
- case ACCESS_HOSTED:
- irlan_insert_string_param(skb, "ACCESS_TYPE", "HOSTED");
- break;
- default:
- pr_debug("%s(), Unknown access type\n", __func__);
- break;
- }
- irlan_insert_short_param(skb, "MAX_FRAME", 0x05ee);
- break;
- case CMD_OPEN_DATA_CHANNEL:
- skb->data[0] = 0x00; /* Success */
- if (self->provider.send_arb_val) {
- skb->data[1] = 0x03; /* 3 parameters */
- irlan_insert_short_param(skb, "CON_ARB",
- self->provider.send_arb_val);
- } else
- skb->data[1] = 0x02; /* 2 parameters */
- irlan_insert_byte_param(skb, "DATA_CHAN", self->stsap_sel_data);
- irlan_insert_string_param(skb, "RECONNECT_KEY", "LINUX RULES!");
- break;
- case CMD_FILTER_OPERATION:
- irlan_filter_request(self, skb);
- break;
- default:
- pr_debug("%s(), Unknown command!\n", __func__);
- break;
- }
-
- irttp_data_request(self->provider.tsap_ctrl, skb);
-}
-
-/*
- * Function irlan_provider_register(void)
- *
- * Register provider support so we can accept incoming connections.
- *
- */
-int irlan_provider_open_ctrl_tsap(struct irlan_cb *self)
-{
- struct tsap_cb *tsap;
- notify_t notify;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-
- /* Check if already open */
- if (self->provider.tsap_ctrl)
- return -1;
-
- /*
- * First register well known control TSAP
- */
- irda_notify_init(&notify);
- notify.data_indication = irlan_provider_data_indication;
- notify.connect_indication = irlan_provider_connect_indication;
- notify.disconnect_indication = irlan_provider_disconnect_indication;
- notify.instance = self;
- strlcpy(notify.name, "IrLAN ctrl (p)", sizeof(notify.name));
-
- tsap = irttp_open_tsap(LSAP_ANY, 1, &notify);
- if (!tsap) {
- pr_debug("%s(), Got no tsap!\n", __func__);
- return -1;
- }
- self->provider.tsap_ctrl = tsap;
-
- /* Register with LM-IAS */
- irlan_ias_register(self, tsap->stsap_sel);
-
- return 0;
-}
-
diff --git a/net/irda/irlan/irlan_provider_event.c b/net/irda/irlan/irlan_provider_event.c
deleted file mode 100644
index 9c4f7f51d6b5..000000000000
--- a/net/irda/irlan/irlan_provider_event.c
+++ /dev/null
@@ -1,233 +0,0 @@
-/*********************************************************************
- *
- * Filename: irlan_provider_event.c
- * Version: 0.9
- * Description: IrLAN provider state machine)
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sun Aug 31 20:14:37 1997
- * Modified at: Sat Oct 30 12:52:41 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <net/irda/irda.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irttp.h>
-
-#include <net/irda/irlan_provider.h>
-#include <net/irda/irlan_event.h>
-
-static int irlan_provider_state_idle(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-static int irlan_provider_state_info(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-static int irlan_provider_state_open(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-static int irlan_provider_state_data(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb);
-
-static int (*state[])(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb) =
-{
- irlan_provider_state_idle,
- NULL, /* Query */
- NULL, /* Info */
- irlan_provider_state_info,
- NULL, /* Media */
- irlan_provider_state_open,
- NULL, /* Wait */
- NULL, /* Arb */
- irlan_provider_state_data,
- NULL, /* Close */
- NULL, /* Sync */
-};
-
-void irlan_do_provider_event(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(*state[ self->provider.state] != NULL, return;);
-
- (*state[self->provider.state]) (self, event, skb);
-}
-
-/*
- * Function irlan_provider_state_idle (event, skb, info)
- *
- * IDLE, We are waiting for an indication that there is a provider
- * available.
- */
-static int irlan_provider_state_idle(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return -1;);
-
- switch(event) {
- case IRLAN_CONNECT_INDICATION:
- irlan_provider_connect_response( self, self->provider.tsap_ctrl);
- irlan_next_provider_state( self, IRLAN_INFO);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__ , event);
- break;
- }
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-/*
- * Function irlan_provider_state_info (self, event, skb, info)
- *
- * INFO, We have issued a GetInfo command and is awaiting a reply.
- */
-static int irlan_provider_state_info(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- int ret;
-
- IRDA_ASSERT(self != NULL, return -1;);
-
- switch(event) {
- case IRLAN_GET_INFO_CMD:
- /* Be sure to use 802.3 in case of peer mode */
- if (self->provider.access_type == ACCESS_PEER) {
- self->media = MEDIA_802_3;
-
- /* Check if client has started yet */
- if (self->client.state == IRLAN_IDLE) {
- /* This should get the client going */
- irlmp_discovery_request(8);
- }
- }
-
- irlan_provider_send_reply(self, CMD_GET_PROVIDER_INFO,
- RSP_SUCCESS);
- /* Keep state */
- break;
- case IRLAN_GET_MEDIA_CMD:
- irlan_provider_send_reply(self, CMD_GET_MEDIA_CHAR,
- RSP_SUCCESS);
- /* Keep state */
- break;
- case IRLAN_OPEN_DATA_CMD:
- ret = irlan_parse_open_data_cmd(self, skb);
- if (self->provider.access_type == ACCESS_PEER) {
- /* FIXME: make use of random functions! */
- self->provider.send_arb_val = (jiffies & 0xffff);
- }
- irlan_provider_send_reply(self, CMD_OPEN_DATA_CHANNEL, ret);
-
- if (ret == RSP_SUCCESS) {
- irlan_next_provider_state(self, IRLAN_OPEN);
-
- /* Signal client that we are now open */
- irlan_do_client_event(self, IRLAN_PROVIDER_SIGNAL, NULL);
- }
- break;
- case IRLAN_LMP_DISCONNECT: /* FALLTHROUGH */
- case IRLAN_LAP_DISCONNECT:
- irlan_next_provider_state(self, IRLAN_IDLE);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__ , event);
- break;
- }
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-/*
- * Function irlan_provider_state_open (self, event, skb, info)
- *
- * OPEN, The client has issued a OpenData command and is awaiting a
- * reply
- *
- */
-static int irlan_provider_state_open(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return -1;);
-
- switch(event) {
- case IRLAN_FILTER_CONFIG_CMD:
- irlan_provider_parse_command(self, CMD_FILTER_OPERATION, skb);
- irlan_provider_send_reply(self, CMD_FILTER_OPERATION,
- RSP_SUCCESS);
- /* Keep state */
- break;
- case IRLAN_DATA_CONNECT_INDICATION:
- irlan_next_provider_state(self, IRLAN_DATA);
- irlan_provider_connect_response(self, self->tsap_data);
- break;
- case IRLAN_LMP_DISCONNECT: /* FALLTHROUGH */
- case IRLAN_LAP_DISCONNECT:
- irlan_next_provider_state(self, IRLAN_IDLE);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__ , event);
- break;
- }
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-/*
- * Function irlan_provider_state_data (self, event, skb, info)
- *
- * DATA, The data channel is connected, allowing data transfers between
- * the local and remote machines.
- *
- */
-static int irlan_provider_state_data(struct irlan_cb *self, IRLAN_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-
- switch(event) {
- case IRLAN_FILTER_CONFIG_CMD:
- irlan_provider_parse_command(self, CMD_FILTER_OPERATION, skb);
- irlan_provider_send_reply(self, CMD_FILTER_OPERATION,
- RSP_SUCCESS);
- break;
- case IRLAN_LMP_DISCONNECT: /* FALLTHROUGH */
- case IRLAN_LAP_DISCONNECT:
- irlan_next_provider_state(self, IRLAN_IDLE);
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__ , event);
- break;
- }
- if (skb)
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-
-
-
-
-
-
-
-
-
diff --git a/net/irda/irlap.c b/net/irda/irlap.c
deleted file mode 100644
index 1cde711bcab5..000000000000
--- a/net/irda/irlap.c
+++ /dev/null
@@ -1,1207 +0,0 @@
-/*********************************************************************
- *
- * Filename: irlap.c
- * Version: 1.0
- * Description: IrLAP implementation for Linux
- * Status: Stable
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Mon Aug 4 20:40:53 1997
- * Modified at: Tue Dec 14 09:26:44 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
- * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/skbuff.h>
-#include <linux/delay.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/module.h>
-#include <linux/seq_file.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irda_device.h>
-#include <net/irda/irqueue.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irlmp_frame.h>
-#include <net/irda/irlap_frame.h>
-#include <net/irda/irlap.h>
-#include <net/irda/timer.h>
-#include <net/irda/qos.h>
-
-static hashbin_t *irlap = NULL;
-int sysctl_slot_timeout = SLOT_TIMEOUT * 1000 / HZ;
-
-/* This is the delay of missed pf period before generating an event
- * to the application. The spec mandate 3 seconds, but in some cases
- * it's way too long. - Jean II */
-int sysctl_warn_noreply_time = 3;
-
-extern void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb);
-static void __irlap_close(struct irlap_cb *self);
-static void irlap_init_qos_capabilities(struct irlap_cb *self,
- struct qos_info *qos_user);
-
-static const char *const lap_reasons[] __maybe_unused = {
- "ERROR, NOT USED",
- "LAP_DISC_INDICATION",
- "LAP_NO_RESPONSE",
- "LAP_RESET_INDICATION",
- "LAP_FOUND_NONE",
- "LAP_MEDIA_BUSY",
- "LAP_PRIMARY_CONFLICT",
- "ERROR, NOT USED",
-};
-
-int __init irlap_init(void)
-{
- /* Check if the compiler did its job properly.
- * May happen on some ARM configuration, check with Russell King. */
- IRDA_ASSERT(sizeof(struct xid_frame) == 14, ;);
- IRDA_ASSERT(sizeof(struct test_frame) == 10, ;);
- IRDA_ASSERT(sizeof(struct ua_frame) == 10, ;);
- IRDA_ASSERT(sizeof(struct snrm_frame) == 11, ;);
-
- /* Allocate master array */
- irlap = hashbin_new(HB_LOCK);
- if (irlap == NULL) {
- net_err_ratelimited("%s: can't allocate irlap hashbin!\n",
- __func__);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void irlap_cleanup(void)
-{
- IRDA_ASSERT(irlap != NULL, return;);
-
- hashbin_delete(irlap, (FREE_FUNC) __irlap_close);
-}
-
-/*
- * Function irlap_open (driver)
- *
- * Initialize IrLAP layer
- *
- */
-struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos,
- const char *hw_name)
-{
- struct irlap_cb *self;
-
- /* Initialize the irlap structure. */
- self = kzalloc(sizeof(struct irlap_cb), GFP_KERNEL);
- if (self == NULL)
- return NULL;
-
- self->magic = LAP_MAGIC;
-
- /* Make a binding between the layers */
- self->netdev = dev;
- self->qos_dev = qos;
- /* Copy hardware name */
- if(hw_name != NULL) {
- strlcpy(self->hw_name, hw_name, sizeof(self->hw_name));
- } else {
- self->hw_name[0] = '\0';
- }
-
- /* FIXME: should we get our own field? */
- dev->atalk_ptr = self;
-
- self->state = LAP_OFFLINE;
-
- /* Initialize transmit queue */
- skb_queue_head_init(&self->txq);
- skb_queue_head_init(&self->txq_ultra);
- skb_queue_head_init(&self->wx_list);
-
- /* My unique IrLAP device address! */
- /* We don't want the broadcast address, neither the NULL address
- * (most often used to signify "invalid"), and we don't want an
- * address already in use (otherwise connect won't be able
- * to select the proper link). - Jean II */
- do {
- get_random_bytes(&self->saddr, sizeof(self->saddr));
- } while ((self->saddr == 0x0) || (self->saddr == BROADCAST) ||
- (hashbin_lock_find(irlap, self->saddr, NULL)) );
- /* Copy to the driver */
- memcpy(dev->dev_addr, &self->saddr, 4);
-
- init_timer(&self->slot_timer);
- init_timer(&self->query_timer);
- init_timer(&self->discovery_timer);
- init_timer(&self->final_timer);
- init_timer(&self->poll_timer);
- init_timer(&self->wd_timer);
- init_timer(&self->backoff_timer);
- init_timer(&self->media_busy_timer);
-
- irlap_apply_default_connection_parameters(self);
-
- self->N3 = 3; /* # connections attempts to try before giving up */
-
- self->state = LAP_NDM;
-
- hashbin_insert(irlap, (irda_queue_t *) self, self->saddr, NULL);
-
- irlmp_register_link(self, self->saddr, &self->notify);
-
- return self;
-}
-EXPORT_SYMBOL(irlap_open);
-
-/*
- * Function __irlap_close (self)
- *
- * Remove IrLAP and all allocated memory. Stop any pending timers.
- *
- */
-static void __irlap_close(struct irlap_cb *self)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- /* Stop timers */
- del_timer(&self->slot_timer);
- del_timer(&self->query_timer);
- del_timer(&self->discovery_timer);
- del_timer(&self->final_timer);
- del_timer(&self->poll_timer);
- del_timer(&self->wd_timer);
- del_timer(&self->backoff_timer);
- del_timer(&self->media_busy_timer);
-
- irlap_flush_all_queues(self);
-
- self->magic = 0;
-
- kfree(self);
-}
-
-/*
- * Function irlap_close (self)
- *
- * Remove IrLAP instance
- *
- */
-void irlap_close(struct irlap_cb *self)
-{
- struct irlap_cb *lap;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- /* We used to send a LAP_DISC_INDICATION here, but this was
- * racy. This has been move within irlmp_unregister_link()
- * itself. Jean II */
-
- /* Kill the LAP and all LSAPs on top of it */
- irlmp_unregister_link(self->saddr);
- self->notify.instance = NULL;
-
- /* Be sure that we manage to remove ourself from the hash */
- lap = hashbin_remove(irlap, self->saddr, NULL);
- if (!lap) {
- pr_debug("%s(), Didn't find myself!\n", __func__);
- return;
- }
- __irlap_close(lap);
-}
-EXPORT_SYMBOL(irlap_close);
-
-/*
- * Function irlap_connect_indication (self, skb)
- *
- * Another device is attempting to make a connection
- *
- */
-void irlap_connect_indication(struct irlap_cb *self, struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- irlap_init_qos_capabilities(self, NULL); /* No user QoS! */
-
- irlmp_link_connect_indication(self->notify.instance, self->saddr,
- self->daddr, &self->qos_tx, skb);
-}
-
-/*
- * Function irlap_connect_response (self, skb)
- *
- * Service user has accepted incoming connection
- *
- */
-void irlap_connect_response(struct irlap_cb *self, struct sk_buff *userdata)
-{
- irlap_do_event(self, CONNECT_RESPONSE, userdata, NULL);
-}
-
-/*
- * Function irlap_connect_request (self, daddr, qos_user, sniff)
- *
- * Request connection with another device, sniffing is not implemented
- * yet.
- *
- */
-void irlap_connect_request(struct irlap_cb *self, __u32 daddr,
- struct qos_info *qos_user, int sniff)
-{
- pr_debug("%s(), daddr=0x%08x\n", __func__, daddr);
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- self->daddr = daddr;
-
- /*
- * If the service user specifies QoS values for this connection,
- * then use them
- */
- irlap_init_qos_capabilities(self, qos_user);
-
- if ((self->state == LAP_NDM) && !self->media_busy)
- irlap_do_event(self, CONNECT_REQUEST, NULL, NULL);
- else
- self->connect_pending = TRUE;
-}
-
-/*
- * Function irlap_connect_confirm (self, skb)
- *
- * Connection request has been accepted
- *
- */
-void irlap_connect_confirm(struct irlap_cb *self, struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- irlmp_link_connect_confirm(self->notify.instance, &self->qos_tx, skb);
-}
-
-/*
- * Function irlap_data_indication (self, skb)
- *
- * Received data frames from IR-port, so we just pass them up to
- * IrLMP for further processing
- *
- */
-void irlap_data_indication(struct irlap_cb *self, struct sk_buff *skb,
- int unreliable)
-{
- /* Hide LAP header from IrLMP layer */
- skb_pull(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER);
-
- irlmp_link_data_indication(self->notify.instance, skb, unreliable);
-}
-
-
-/*
- * Function irlap_data_request (self, skb)
- *
- * Queue data for transmission, must wait until XMIT state
- *
- */
-void irlap_data_request(struct irlap_cb *self, struct sk_buff *skb,
- int unreliable)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- IRDA_ASSERT(skb_headroom(skb) >= (LAP_ADDR_HEADER+LAP_CTRL_HEADER),
- return;);
- skb_push(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER);
-
- /*
- * Must set frame format now so that the rest of the code knows
- * if its dealing with an I or an UI frame
- */
- if (unreliable)
- skb->data[1] = UI_FRAME;
- else
- skb->data[1] = I_FRAME;
-
- /* Don't forget to refcount it - see irlmp_connect_request(). */
- skb_get(skb);
-
- /* Add at the end of the queue (keep ordering) - Jean II */
- skb_queue_tail(&self->txq, skb);
-
- /*
- * Send event if this frame only if we are in the right state
- * FIXME: udata should be sent first! (skb_queue_head?)
- */
- if ((self->state == LAP_XMIT_P) || (self->state == LAP_XMIT_S)) {
- /* If we are not already processing the Tx queue, trigger
- * transmission immediately - Jean II */
- if((skb_queue_len(&self->txq) <= 1) && (!self->local_busy))
- irlap_do_event(self, DATA_REQUEST, skb, NULL);
- /* Otherwise, the packets will be sent normally at the
- * next pf-poll - Jean II */
- }
-}
-
-/*
- * Function irlap_unitdata_request (self, skb)
- *
- * Send Ultra data. This is data that must be sent outside any connection
- *
- */
-#ifdef CONFIG_IRDA_ULTRA
-void irlap_unitdata_request(struct irlap_cb *self, struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- IRDA_ASSERT(skb_headroom(skb) >= (LAP_ADDR_HEADER+LAP_CTRL_HEADER),
- return;);
- skb_push(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER);
-
- skb->data[0] = CBROADCAST;
- skb->data[1] = UI_FRAME;
-
- /* Don't need to refcount, see irlmp_connless_data_request() */
-
- skb_queue_tail(&self->txq_ultra, skb);
-
- irlap_do_event(self, SEND_UI_FRAME, NULL, NULL);
-}
-#endif /*CONFIG_IRDA_ULTRA */
-
-/*
- * Function irlap_udata_indication (self, skb)
- *
- * Receive Ultra data. This is data that is received outside any connection
- *
- */
-#ifdef CONFIG_IRDA_ULTRA
-void irlap_unitdata_indication(struct irlap_cb *self, struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
-
- /* Hide LAP header from IrLMP layer */
- skb_pull(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER);
-
- irlmp_link_unitdata_indication(self->notify.instance, skb);
-}
-#endif /* CONFIG_IRDA_ULTRA */
-
-/*
- * Function irlap_disconnect_request (void)
- *
- * Request to disconnect connection by service user
- */
-void irlap_disconnect_request(struct irlap_cb *self)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- /* Don't disconnect until all data frames are successfully sent */
- if (!skb_queue_empty(&self->txq)) {
- self->disconnect_pending = TRUE;
- return;
- }
-
- /* Check if we are in the right state for disconnecting */
- switch (self->state) {
- case LAP_XMIT_P: /* FALLTHROUGH */
- case LAP_XMIT_S: /* FALLTHROUGH */
- case LAP_CONN: /* FALLTHROUGH */
- case LAP_RESET_WAIT: /* FALLTHROUGH */
- case LAP_RESET_CHECK:
- irlap_do_event(self, DISCONNECT_REQUEST, NULL, NULL);
- break;
- default:
- pr_debug("%s(), disconnect pending!\n", __func__);
- self->disconnect_pending = TRUE;
- break;
- }
-}
-
-/*
- * Function irlap_disconnect_indication (void)
- *
- * Disconnect request from other device
- *
- */
-void irlap_disconnect_indication(struct irlap_cb *self, LAP_REASON reason)
-{
- pr_debug("%s(), reason=%s\n", __func__, lap_reasons[reason]);
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- /* Flush queues */
- irlap_flush_all_queues(self);
-
- switch (reason) {
- case LAP_RESET_INDICATION:
- pr_debug("%s(), Sending reset request!\n", __func__);
- irlap_do_event(self, RESET_REQUEST, NULL, NULL);
- break;
- case LAP_NO_RESPONSE: /* FALLTHROUGH */
- case LAP_DISC_INDICATION: /* FALLTHROUGH */
- case LAP_FOUND_NONE: /* FALLTHROUGH */
- case LAP_MEDIA_BUSY:
- irlmp_link_disconnect_indication(self->notify.instance, self,
- reason, NULL);
- break;
- default:
- net_err_ratelimited("%s: Unknown reason %d\n",
- __func__, reason);
- }
-}
-
-/*
- * Function irlap_discovery_request (gen_addr_bit)
- *
- * Start one single discovery operation.
- *
- */
-void irlap_discovery_request(struct irlap_cb *self, discovery_t *discovery)
-{
- struct irlap_info info;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
- IRDA_ASSERT(discovery != NULL, return;);
-
- pr_debug("%s(), nslots = %d\n", __func__, discovery->nslots);
-
- IRDA_ASSERT((discovery->nslots == 1) || (discovery->nslots == 6) ||
- (discovery->nslots == 8) || (discovery->nslots == 16),
- return;);
-
- /* Discovery is only possible in NDM mode */
- if (self->state != LAP_NDM) {
- pr_debug("%s(), discovery only possible in NDM mode\n",
- __func__);
- irlap_discovery_confirm(self, NULL);
- /* Note : in theory, if we are not in NDM, we could postpone
- * the discovery like we do for connection request.
- * In practice, it's not worth it. If the media was busy,
- * it's likely next time around it won't be busy. If we are
- * in REPLY state, we will get passive discovery info & event.
- * Jean II */
- return;
- }
-
- /* Check if last discovery request finished in time, or if
- * it was aborted due to the media busy flag. */
- if (self->discovery_log != NULL) {
- hashbin_delete(self->discovery_log, (FREE_FUNC) kfree);
- self->discovery_log = NULL;
- }
-
- /* All operations will occur at predictable time, no need to lock */
- self->discovery_log = hashbin_new(HB_NOLOCK);
-
- if (self->discovery_log == NULL) {
- net_warn_ratelimited("%s(), Unable to allocate discovery log!\n",
- __func__);
- return;
- }
-
- info.S = discovery->nslots; /* Number of slots */
- info.s = 0; /* Current slot */
-
- self->discovery_cmd = discovery;
- info.discovery = discovery;
-
- /* sysctl_slot_timeout bounds are checked in irsysctl.c - Jean II */
- self->slot_timeout = msecs_to_jiffies(sysctl_slot_timeout);
-
- irlap_do_event(self, DISCOVERY_REQUEST, NULL, &info);
-}
-
-/*
- * Function irlap_discovery_confirm (log)
- *
- * A device has been discovered in front of this station, we
- * report directly to LMP.
- */
-void irlap_discovery_confirm(struct irlap_cb *self, hashbin_t *discovery_log)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- IRDA_ASSERT(self->notify.instance != NULL, return;);
-
- /*
- * Check for successful discovery, since we are then allowed to clear
- * the media busy condition (IrLAP 6.13.4 - p.94). This should allow
- * us to make connection attempts much faster and easier (i.e. no
- * collisions).
- * Setting media busy to false will also generate an event allowing
- * to process pending events in NDM state machine.
- * Note : the spec doesn't define what's a successful discovery is.
- * If we want Ultra to work, it's successful even if there is
- * nobody discovered - Jean II
- */
- if (discovery_log)
- irda_device_set_media_busy(self->netdev, FALSE);
-
- /* Inform IrLMP */
- irlmp_link_discovery_confirm(self->notify.instance, discovery_log);
-}
-
-/*
- * Function irlap_discovery_indication (log)
- *
- * Somebody is trying to discover us!
- *
- */
-void irlap_discovery_indication(struct irlap_cb *self, discovery_t *discovery)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
- IRDA_ASSERT(discovery != NULL, return;);
-
- IRDA_ASSERT(self->notify.instance != NULL, return;);
-
- /* A device is very likely to connect immediately after it performs
- * a successful discovery. This means that in our case, we are much
- * more likely to receive a connection request over the medium.
- * So, we backoff to avoid collisions.
- * IrLAP spec 6.13.4 suggest 100ms...
- * Note : this little trick actually make a *BIG* difference. If I set
- * my Linux box with discovery enabled and one Ultra frame sent every
- * second, my Palm has no trouble connecting to it every time !
- * Jean II */
- irda_device_set_media_busy(self->netdev, SMALL);
-
- irlmp_link_discovery_indication(self->notify.instance, discovery);
-}
-
-/*
- * Function irlap_status_indication (quality_of_link)
- */
-void irlap_status_indication(struct irlap_cb *self, int quality_of_link)
-{
- switch (quality_of_link) {
- case STATUS_NO_ACTIVITY:
- net_info_ratelimited("IrLAP, no activity on link!\n");
- break;
- case STATUS_NOISY:
- net_info_ratelimited("IrLAP, noisy link!\n");
- break;
- default:
- break;
- }
- irlmp_status_indication(self->notify.instance,
- quality_of_link, LOCK_NO_CHANGE);
-}
-
-/*
- * Function irlap_reset_indication (void)
- */
-void irlap_reset_indication(struct irlap_cb *self)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- if (self->state == LAP_RESET_WAIT)
- irlap_do_event(self, RESET_REQUEST, NULL, NULL);
- else
- irlap_do_event(self, RESET_RESPONSE, NULL, NULL);
-}
-
-/*
- * Function irlap_reset_confirm (void)
- */
-void irlap_reset_confirm(void)
-{
-}
-
-/*
- * Function irlap_generate_rand_time_slot (S, s)
- *
- * Generate a random time slot between s and S-1 where
- * S = Number of slots (0 -> S-1)
- * s = Current slot
- */
-int irlap_generate_rand_time_slot(int S, int s)
-{
- static int rand;
- int slot;
-
- IRDA_ASSERT((S - s) > 0, return 0;);
-
- rand += jiffies;
- rand ^= (rand << 12);
- rand ^= (rand >> 20);
-
- slot = s + rand % (S-s);
-
- IRDA_ASSERT((slot >= s) || (slot < S), return 0;);
-
- return slot;
-}
-
-/*
- * Function irlap_update_nr_received (nr)
- *
- * Remove all acknowledged frames in current window queue. This code is
- * not intuitive and you should not try to change it. If you think it
- * contains bugs, please mail a patch to the author instead.
- */
-void irlap_update_nr_received(struct irlap_cb *self, int nr)
-{
- struct sk_buff *skb = NULL;
- int count = 0;
-
- /*
- * Remove all the ack-ed frames from the window queue.
- */
-
- /*
- * Optimize for the common case. It is most likely that the receiver
- * will acknowledge all the frames we have sent! So in that case we
- * delete all frames stored in window.
- */
- if (nr == self->vs) {
- while ((skb = skb_dequeue(&self->wx_list)) != NULL) {
- dev_kfree_skb(skb);
- }
- /* The last acked frame is the next to send minus one */
- self->va = nr - 1;
- } else {
- /* Remove all acknowledged frames in current window */
- while ((skb_peek(&self->wx_list) != NULL) &&
- (((self->va+1) % 8) != nr))
- {
- skb = skb_dequeue(&self->wx_list);
- dev_kfree_skb(skb);
-
- self->va = (self->va + 1) % 8;
- count++;
- }
- }
-
- /* Advance window */
- self->window = self->window_size - skb_queue_len(&self->wx_list);
-}
-
-/*
- * Function irlap_validate_ns_received (ns)
- *
- * Validate the next to send (ns) field from received frame.
- */
-int irlap_validate_ns_received(struct irlap_cb *self, int ns)
-{
- /* ns as expected? */
- if (ns == self->vr)
- return NS_EXPECTED;
- /*
- * Stations are allowed to treat invalid NS as unexpected NS
- * IrLAP, Recv ... with-invalid-Ns. p. 84
- */
- return NS_UNEXPECTED;
-
- /* return NR_INVALID; */
-}
-/*
- * Function irlap_validate_nr_received (nr)
- *
- * Validate the next to receive (nr) field from received frame.
- *
- */
-int irlap_validate_nr_received(struct irlap_cb *self, int nr)
-{
- /* nr as expected? */
- if (nr == self->vs) {
- pr_debug("%s(), expected!\n", __func__);
- return NR_EXPECTED;
- }
-
- /*
- * unexpected nr? (but within current window), first we check if the
- * ns numbers of the frames in the current window wrap.
- */
- if (self->va < self->vs) {
- if ((nr >= self->va) && (nr <= self->vs))
- return NR_UNEXPECTED;
- } else {
- if ((nr >= self->va) || (nr <= self->vs))
- return NR_UNEXPECTED;
- }
-
- /* Invalid nr! */
- return NR_INVALID;
-}
-
-/*
- * Function irlap_initiate_connection_state ()
- *
- * Initialize the connection state parameters
- *
- */
-void irlap_initiate_connection_state(struct irlap_cb *self)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- /* Next to send and next to receive */
- self->vs = self->vr = 0;
-
- /* Last frame which got acked (0 - 1) % 8 */
- self->va = 7;
-
- self->window = 1;
-
- self->remote_busy = FALSE;
- self->retry_count = 0;
-}
-
-/*
- * Function irlap_wait_min_turn_around (self, qos)
- *
- * Wait negotiated minimum turn around time, this function actually sets
- * the number of BOS's that must be sent before the next transmitted
- * frame in order to delay for the specified amount of time. This is
- * done to avoid using timers, and the forbidden udelay!
- */
-void irlap_wait_min_turn_around(struct irlap_cb *self, struct qos_info *qos)
-{
- __u32 min_turn_time;
- __u32 speed;
-
- /* Get QoS values. */
- speed = qos->baud_rate.value;
- min_turn_time = qos->min_turn_time.value;
-
- /* No need to calculate XBOFs for speeds over 115200 bps */
- if (speed > 115200) {
- self->mtt_required = min_turn_time;
- return;
- }
-
- /*
- * Send additional BOF's for the next frame for the requested
- * min turn time, so now we must calculate how many chars (XBOF's) we
- * must send for the requested time period (min turn time)
- */
- self->xbofs_delay = irlap_min_turn_time_in_bytes(speed, min_turn_time);
-}
-
-/*
- * Function irlap_flush_all_queues (void)
- *
- * Flush all queues
- *
- */
-void irlap_flush_all_queues(struct irlap_cb *self)
-{
- struct sk_buff* skb;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- /* Free transmission queue */
- while ((skb = skb_dequeue(&self->txq)) != NULL)
- dev_kfree_skb(skb);
-
- while ((skb = skb_dequeue(&self->txq_ultra)) != NULL)
- dev_kfree_skb(skb);
-
- /* Free sliding window buffered packets */
- while ((skb = skb_dequeue(&self->wx_list)) != NULL)
- dev_kfree_skb(skb);
-}
-
-/*
- * Function irlap_setspeed (self, speed)
- *
- * Change the speed of the IrDA port
- *
- */
-static void irlap_change_speed(struct irlap_cb *self, __u32 speed, int now)
-{
- struct sk_buff *skb;
-
- pr_debug("%s(), setting speed to %d\n", __func__, speed);
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- self->speed = speed;
-
- /* Change speed now, or just piggyback speed on frames */
- if (now) {
- /* Send down empty frame to trigger speed change */
- skb = alloc_skb(0, GFP_ATOMIC);
- if (skb)
- irlap_queue_xmit(self, skb);
- }
-}
-
-/*
- * Function irlap_init_qos_capabilities (self, qos)
- *
- * Initialize QoS for this IrLAP session, What we do is to compute the
- * intersection of the QoS capabilities for the user, driver and for
- * IrLAP itself. Normally, IrLAP will not specify any values, but it can
- * be used to restrict certain values.
- */
-static void irlap_init_qos_capabilities(struct irlap_cb *self,
- struct qos_info *qos_user)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
- IRDA_ASSERT(self->netdev != NULL, return;);
-
- /* Start out with the maximum QoS support possible */
- irda_init_max_qos_capabilies(&self->qos_rx);
-
- /* Apply drivers QoS capabilities */
- irda_qos_compute_intersection(&self->qos_rx, self->qos_dev);
-
- /*
- * Check for user supplied QoS parameters. The service user is only
- * allowed to supply these values. We check each parameter since the
- * user may not have set all of them.
- */
- if (qos_user) {
- pr_debug("%s(), Found user specified QoS!\n", __func__);
-
- if (qos_user->baud_rate.bits)
- self->qos_rx.baud_rate.bits &= qos_user->baud_rate.bits;
-
- if (qos_user->max_turn_time.bits)
- self->qos_rx.max_turn_time.bits &= qos_user->max_turn_time.bits;
- if (qos_user->data_size.bits)
- self->qos_rx.data_size.bits &= qos_user->data_size.bits;
-
- if (qos_user->link_disc_time.bits)
- self->qos_rx.link_disc_time.bits &= qos_user->link_disc_time.bits;
- }
-
- /* Use 500ms in IrLAP for now */
- self->qos_rx.max_turn_time.bits &= 0x01;
-
- /* Set data size */
- /*self->qos_rx.data_size.bits &= 0x03;*/
-
- irda_qos_bits_to_value(&self->qos_rx);
-}
-
-/*
- * Function irlap_apply_default_connection_parameters (void, now)
- *
- * Use the default connection and transmission parameters
- */
-void irlap_apply_default_connection_parameters(struct irlap_cb *self)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- /* xbofs : Default value in NDM */
- self->next_bofs = 12;
- self->bofs_count = 12;
-
- /* NDM Speed is 9600 */
- irlap_change_speed(self, 9600, TRUE);
-
- /* Set mbusy when going to NDM state */
- irda_device_set_media_busy(self->netdev, TRUE);
-
- /*
- * Generate random connection address for this session, which must
- * be 7 bits wide and different from 0x00 and 0xfe
- */
- while ((self->caddr == 0x00) || (self->caddr == 0xfe)) {
- get_random_bytes(&self->caddr, sizeof(self->caddr));
- self->caddr &= 0xfe;
- }
-
- /* Use default values until connection has been negitiated */
- self->slot_timeout = sysctl_slot_timeout;
- self->final_timeout = FINAL_TIMEOUT;
- self->poll_timeout = POLL_TIMEOUT;
- self->wd_timeout = WD_TIMEOUT;
-
- /* Set some default values */
- self->qos_tx.baud_rate.value = 9600;
- self->qos_rx.baud_rate.value = 9600;
- self->qos_tx.max_turn_time.value = 0;
- self->qos_rx.max_turn_time.value = 0;
- self->qos_tx.min_turn_time.value = 0;
- self->qos_rx.min_turn_time.value = 0;
- self->qos_tx.data_size.value = 64;
- self->qos_rx.data_size.value = 64;
- self->qos_tx.window_size.value = 1;
- self->qos_rx.window_size.value = 1;
- self->qos_tx.additional_bofs.value = 12;
- self->qos_rx.additional_bofs.value = 12;
- self->qos_tx.link_disc_time.value = 0;
- self->qos_rx.link_disc_time.value = 0;
-
- irlap_flush_all_queues(self);
-
- self->disconnect_pending = FALSE;
- self->connect_pending = FALSE;
-}
-
-/*
- * Function irlap_apply_connection_parameters (qos, now)
- *
- * Initialize IrLAP with the negotiated QoS values
- *
- * If 'now' is false, the speed and xbofs will be changed after the next
- * frame is sent.
- * If 'now' is true, the speed and xbofs is changed immediately
- */
-void irlap_apply_connection_parameters(struct irlap_cb *self, int now)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- /* Set the negotiated xbofs value */
- self->next_bofs = self->qos_tx.additional_bofs.value;
- if (now)
- self->bofs_count = self->next_bofs;
-
- /* Set the negotiated link speed (may need the new xbofs value) */
- irlap_change_speed(self, self->qos_tx.baud_rate.value, now);
-
- self->window_size = self->qos_tx.window_size.value;
- self->window = self->qos_tx.window_size.value;
-
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
- /*
- * Calculate how many bytes it is possible to transmit before the
- * link must be turned around
- */
- self->line_capacity =
- irlap_max_line_capacity(self->qos_tx.baud_rate.value,
- self->qos_tx.max_turn_time.value);
- self->bytes_left = self->line_capacity;
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
-
-
- /*
- * Initialize timeout values, some of the rules are listed on
- * page 92 in IrLAP.
- */
- IRDA_ASSERT(self->qos_tx.max_turn_time.value != 0, return;);
- IRDA_ASSERT(self->qos_rx.max_turn_time.value != 0, return;);
- /* The poll timeout applies only to the primary station.
- * It defines the maximum time the primary stay in XMIT mode
- * before timeout and turning the link around (sending a RR).
- * Or, this is how much we can keep the pf bit in primary mode.
- * Therefore, it must be lower or equal than our *OWN* max turn around.
- * Jean II */
- self->poll_timeout = msecs_to_jiffies(
- self->qos_tx.max_turn_time.value);
- /* The Final timeout applies only to the primary station.
- * It defines the maximum time the primary wait (mostly in RECV mode)
- * for an answer from the secondary station before polling it again.
- * Therefore, it must be greater or equal than our *PARTNER*
- * max turn around time - Jean II */
- self->final_timeout = msecs_to_jiffies(
- self->qos_rx.max_turn_time.value);
- /* The Watchdog Bit timeout applies only to the secondary station.
- * It defines the maximum time the secondary wait (mostly in RECV mode)
- * for poll from the primary station before getting annoyed.
- * Therefore, it must be greater or equal than our *PARTNER*
- * max turn around time - Jean II */
- self->wd_timeout = self->final_timeout * 2;
-
- /*
- * N1 and N2 are maximum retry count for *both* the final timer
- * and the wd timer (with a factor 2) as defined above.
- * After N1 retry of a timer, we give a warning to the user.
- * After N2 retry, we consider the link dead and disconnect it.
- * Jean II
- */
-
- /*
- * Set N1 to 0 if Link Disconnect/Threshold Time = 3 and set it to
- * 3 seconds otherwise. See page 71 in IrLAP for more details.
- * Actually, it's not always 3 seconds, as we allow to set
- * it via sysctl... Max maxtt is 500ms, and N1 need to be multiple
- * of 2, so 1 second is minimum we can allow. - Jean II
- */
- if (self->qos_tx.link_disc_time.value == sysctl_warn_noreply_time)
- /*
- * If we set N1 to 0, it will trigger immediately, which is
- * not what we want. What we really want is to disable it,
- * Jean II
- */
- self->N1 = -2; /* Disable - Need to be multiple of 2*/
- else
- self->N1 = sysctl_warn_noreply_time * 1000 /
- self->qos_rx.max_turn_time.value;
-
- pr_debug("Setting N1 = %d\n", self->N1);
-
- /* Set N2 to match our own disconnect time */
- self->N2 = self->qos_tx.link_disc_time.value * 1000 /
- self->qos_rx.max_turn_time.value;
- pr_debug("Setting N2 = %d\n", self->N2);
-}
-
-#ifdef CONFIG_PROC_FS
-struct irlap_iter_state {
- int id;
-};
-
-static void *irlap_seq_start(struct seq_file *seq, loff_t *pos)
-{
- struct irlap_iter_state *iter = seq->private;
- struct irlap_cb *self;
-
- /* Protect our access to the tsap list */
- spin_lock_irq(&irlap->hb_spinlock);
- iter->id = 0;
-
- for (self = (struct irlap_cb *) hashbin_get_first(irlap);
- self; self = (struct irlap_cb *) hashbin_get_next(irlap)) {
- if (iter->id == *pos)
- break;
- ++iter->id;
- }
-
- return self;
-}
-
-static void *irlap_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct irlap_iter_state *iter = seq->private;
-
- ++*pos;
- ++iter->id;
- return (void *) hashbin_get_next(irlap);
-}
-
-static void irlap_seq_stop(struct seq_file *seq, void *v)
-{
- spin_unlock_irq(&irlap->hb_spinlock);
-}
-
-static int irlap_seq_show(struct seq_file *seq, void *v)
-{
- const struct irlap_iter_state *iter = seq->private;
- const struct irlap_cb *self = v;
-
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -EINVAL;);
-
- seq_printf(seq, "irlap%d ", iter->id);
- seq_printf(seq, "state: %s\n",
- irlap_state[self->state]);
-
- seq_printf(seq, " device name: %s, ",
- (self->netdev) ? self->netdev->name : "bug");
- seq_printf(seq, "hardware name: %s\n", self->hw_name);
-
- seq_printf(seq, " caddr: %#02x, ", self->caddr);
- seq_printf(seq, "saddr: %#08x, ", self->saddr);
- seq_printf(seq, "daddr: %#08x\n", self->daddr);
-
- seq_printf(seq, " win size: %d, ",
- self->window_size);
- seq_printf(seq, "win: %d, ", self->window);
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
- seq_printf(seq, "line capacity: %d, ",
- self->line_capacity);
- seq_printf(seq, "bytes left: %d\n", self->bytes_left);
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
- seq_printf(seq, " tx queue len: %d ",
- skb_queue_len(&self->txq));
- seq_printf(seq, "win queue len: %d ",
- skb_queue_len(&self->wx_list));
- seq_printf(seq, "rbusy: %s", self->remote_busy ?
- "TRUE" : "FALSE");
- seq_printf(seq, " mbusy: %s\n", self->media_busy ?
- "TRUE" : "FALSE");
-
- seq_printf(seq, " retrans: %d ", self->retry_count);
- seq_printf(seq, "vs: %d ", self->vs);
- seq_printf(seq, "vr: %d ", self->vr);
- seq_printf(seq, "va: %d\n", self->va);
-
- seq_printf(seq, " qos\tbps\tmaxtt\tdsize\twinsize\taddbofs\tmintt\tldisc\tcomp\n");
-
- seq_printf(seq, " tx\t%d\t",
- self->qos_tx.baud_rate.value);
- seq_printf(seq, "%d\t",
- self->qos_tx.max_turn_time.value);
- seq_printf(seq, "%d\t",
- self->qos_tx.data_size.value);
- seq_printf(seq, "%d\t",
- self->qos_tx.window_size.value);
- seq_printf(seq, "%d\t",
- self->qos_tx.additional_bofs.value);
- seq_printf(seq, "%d\t",
- self->qos_tx.min_turn_time.value);
- seq_printf(seq, "%d\t",
- self->qos_tx.link_disc_time.value);
- seq_printf(seq, "\n");
-
- seq_printf(seq, " rx\t%d\t",
- self->qos_rx.baud_rate.value);
- seq_printf(seq, "%d\t",
- self->qos_rx.max_turn_time.value);
- seq_printf(seq, "%d\t",
- self->qos_rx.data_size.value);
- seq_printf(seq, "%d\t",
- self->qos_rx.window_size.value);
- seq_printf(seq, "%d\t",
- self->qos_rx.additional_bofs.value);
- seq_printf(seq, "%d\t",
- self->qos_rx.min_turn_time.value);
- seq_printf(seq, "%d\n",
- self->qos_rx.link_disc_time.value);
-
- return 0;
-}
-
-static const struct seq_operations irlap_seq_ops = {
- .start = irlap_seq_start,
- .next = irlap_seq_next,
- .stop = irlap_seq_stop,
- .show = irlap_seq_show,
-};
-
-static int irlap_seq_open(struct inode *inode, struct file *file)
-{
- if (irlap == NULL)
- return -EINVAL;
-
- return seq_open_private(file, &irlap_seq_ops,
- sizeof(struct irlap_iter_state));
-}
-
-const struct file_operations irlap_seq_fops = {
- .owner = THIS_MODULE,
- .open = irlap_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
-#endif /* CONFIG_PROC_FS */
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
deleted file mode 100644
index 0e1b4d79f745..000000000000
--- a/net/irda/irlap_event.c
+++ /dev/null
@@ -1,2316 +0,0 @@
-/*********************************************************************
- *
- * Filename: irlap_event.c
- * Version: 0.9
- * Description: IrLAP state machine implementation
- * Status: Experimental.
- * Author: Dag Brattli <dag@brattli.net>
- * Created at: Sat Aug 16 00:59:29 1997
- * Modified at: Sat Dec 25 21:07:57 1999
- * Modified by: Dag Brattli <dag@brattli.net>
- *
- * Copyright (c) 1998-2000 Dag Brattli <dag@brattli.net>,
- * Copyright (c) 1998 Thomas Davis <ratbert@radiks.net>
- * All Rights Reserved.
- * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlap_event.h>
-
-#include <net/irda/timer.h>
-#include <net/irda/irlap.h>
-#include <net/irda/irlap_frame.h>
-#include <net/irda/qos.h>
-#include <net/irda/parameters.h>
-#include <net/irda/irlmp.h> /* irlmp_flow_indication(), ... */
-
-#include <net/irda/irda_device.h>
-
-#ifdef CONFIG_IRDA_FAST_RR
-int sysctl_fast_poll_increase = 50;
-#endif
-
-static int irlap_state_ndm (struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_query (struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_reply (struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_conn (struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_setup (struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_offline(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_xmit_p (struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_pclose (struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_nrm_p (struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_reset_wait(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_reset (struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_nrm_s (struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_xmit_s (struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_sclose (struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_reset_check(struct irlap_cb *, IRLAP_EVENT event,
- struct sk_buff *, struct irlap_info *);
-
-static const char *const irlap_event[] __maybe_unused = {
- "DISCOVERY_REQUEST",
- "CONNECT_REQUEST",
- "CONNECT_RESPONSE",
- "DISCONNECT_REQUEST",
- "DATA_REQUEST",
- "RESET_REQUEST",
- "RESET_RESPONSE",
- "SEND_I_CMD",
- "SEND_UI_FRAME",
- "RECV_DISCOVERY_XID_CMD",
- "RECV_DISCOVERY_XID_RSP",
- "RECV_SNRM_CMD",
- "RECV_TEST_CMD",
- "RECV_TEST_RSP",
- "RECV_UA_RSP",
- "RECV_DM_RSP",
- "RECV_RD_RSP",
- "RECV_I_CMD",
- "RECV_I_RSP",
- "RECV_UI_FRAME",
- "RECV_FRMR_RSP",
- "RECV_RR_CMD",
- "RECV_RR_RSP",
- "RECV_RNR_CMD",
- "RECV_RNR_RSP",
- "RECV_REJ_CMD",
- "RECV_REJ_RSP",
- "RECV_SREJ_CMD",
- "RECV_SREJ_RSP",
- "RECV_DISC_CMD",
- "SLOT_TIMER_EXPIRED",
- "QUERY_TIMER_EXPIRED",
- "FINAL_TIMER_EXPIRED",
- "POLL_TIMER_EXPIRED",
- "DISCOVERY_TIMER_EXPIRED",
- "WD_TIMER_EXPIRED",
- "BACKOFF_TIMER_EXPIRED",
- "MEDIA_BUSY_TIMER_EXPIRED",
-};
-
-const char *const irlap_state[] = {
- "LAP_NDM",
- "LAP_QUERY",
- "LAP_REPLY",
- "LAP_CONN",
- "LAP_SETUP",
- "LAP_OFFLINE",
- "LAP_XMIT_P",
- "LAP_PCLOSE",
- "LAP_NRM_P",
- "LAP_RESET_WAIT",
- "LAP_RESET",
- "LAP_NRM_S",
- "LAP_XMIT_S",
- "LAP_SCLOSE",
- "LAP_RESET_CHECK",
-};
-
-static int (*state[])(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info) =
-{
- irlap_state_ndm,
- irlap_state_query,
- irlap_state_reply,
- irlap_state_conn,
- irlap_state_setup,
- irlap_state_offline,
- irlap_state_xmit_p,
- irlap_state_pclose,
- irlap_state_nrm_p,
- irlap_state_reset_wait,
- irlap_state_reset,
- irlap_state_nrm_s,
- irlap_state_xmit_s,
- irlap_state_sclose,
- irlap_state_reset_check,
-};
-
-/*
- * Function irda_poll_timer_expired (data)
- *
- * Poll timer has expired. Normally we must now send a RR frame to the
- * remote device
- */
-static void irlap_poll_timer_expired(void *data)
-{
- struct irlap_cb *self = (struct irlap_cb *) data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- irlap_do_event(self, POLL_TIMER_EXPIRED, NULL, NULL);
-}
-
-/*
- * Calculate and set time before we will have to send back the pf bit
- * to the peer. Use in primary.
- * Make sure that state is XMIT_P/XMIT_S when calling this function
- * (and that nobody messed up with the state). - Jean II
- */
-static void irlap_start_poll_timer(struct irlap_cb *self, int timeout)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-#ifdef CONFIG_IRDA_FAST_RR
- /*
- * Send out the RR frames faster if our own transmit queue is empty, or
- * if the peer is busy. The effect is a much faster conversation
- */
- if (skb_queue_empty(&self->txq) || self->remote_busy) {
- if (self->fast_RR == TRUE) {
- /*
- * Assert that the fast poll timer has not reached the
- * normal poll timer yet
- */
- if (self->fast_RR_timeout < timeout) {
- /*
- * FIXME: this should be a more configurable
- * function
- */
- self->fast_RR_timeout +=
- (sysctl_fast_poll_increase * HZ/1000);
-
- /* Use this fast(er) timeout instead */
- timeout = self->fast_RR_timeout;
- }
- } else {
- self->fast_RR = TRUE;
-
- /* Start with just 0 ms */
- self->fast_RR_timeout = 0;
- timeout = 0;
- }
- } else
- self->fast_RR = FALSE;
-
- pr_debug("%s(), timeout=%d (%ld)\n", __func__, timeout, jiffies);
-#endif /* CONFIG_IRDA_FAST_RR */
-
- if (timeout == 0)
- irlap_do_event(self, POLL_TIMER_EXPIRED, NULL, NULL);
- else
- irda_start_timer(&self->poll_timer, timeout, self,
- irlap_poll_timer_expired);
-}
-
-/*
- * Function irlap_do_event (event, skb, info)
- *
- * Rushes through the state machine without any delay. If state == XMIT
- * then send queued data frames.
- */
-void irlap_do_event(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- int ret;
-
- if (!self || self->magic != LAP_MAGIC)
- return;
-
- pr_debug("%s(), event = %s, state = %s\n", __func__,
- irlap_event[event], irlap_state[self->state]);
-
- ret = (*state[self->state])(self, event, skb, info);
-
- /*
- * Check if there are any pending events that needs to be executed
- */
- switch (self->state) {
- case LAP_XMIT_P: /* FALLTHROUGH */
- case LAP_XMIT_S:
- /*
- * We just received the pf bit and are at the beginning
- * of a new LAP transmit window.
- * Check if there are any queued data frames, and do not
- * try to disconnect link if we send any data frames, since
- * that will change the state away form XMIT
- */
- pr_debug("%s() : queue len = %d\n", __func__,
- skb_queue_len(&self->txq));
-
- if (!skb_queue_empty(&self->txq)) {
- /* Prevent race conditions with irlap_data_request() */
- self->local_busy = TRUE;
-
- /* Theory of operation.
- * We send frames up to when we fill the window or
- * reach line capacity. Those frames will queue up
- * in the device queue, and the driver will slowly
- * send them.
- * After each frame that we send, we poll the higher
- * layer for more data. It's the right time to do
- * that because the link layer need to perform the mtt
- * and then send the first frame, so we can afford
- * to send a bit of time in kernel space.
- * The explicit flow indication allow to minimise
- * buffers (== lower latency), to avoid higher layer
- * polling via timers (== less context switches) and
- * to implement a crude scheduler - Jean II */
-
- /* Try to send away all queued data frames */
- while ((skb = skb_dequeue(&self->txq)) != NULL) {
- /* Send one frame */
- ret = (*state[self->state])(self, SEND_I_CMD,
- skb, NULL);
- /* Drop reference count.
- * It will be increase as needed in
- * irlap_send_data_xxx() */
- kfree_skb(skb);
-
- /* Poll the higher layers for one more frame */
- irlmp_flow_indication(self->notify.instance,
- FLOW_START);
-
- if (ret == -EPROTO)
- break; /* Try again later! */
- }
- /* Finished transmitting */
- self->local_busy = FALSE;
- } else if (self->disconnect_pending) {
- self->disconnect_pending = FALSE;
-
- ret = (*state[self->state])(self, DISCONNECT_REQUEST,
- NULL, NULL);
- }
- break;
-/* case LAP_NDM: */
-/* case LAP_CONN: */
-/* case LAP_RESET_WAIT: */
-/* case LAP_RESET_CHECK: */
- default:
- break;
- }
-}
-
-/*
- * Function irlap_state_ndm (event, skb, frame)
- *
- * NDM (Normal Disconnected Mode) state
- *
- */
-static int irlap_state_ndm(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- discovery_t *discovery_rsp;
- int ret = 0;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- switch (event) {
- case CONNECT_REQUEST:
- IRDA_ASSERT(self->netdev != NULL, return -1;);
-
- if (self->media_busy) {
- /* Note : this will never happen, because we test
- * media busy in irlap_connect_request() and
- * postpone the event... - Jean II */
- pr_debug("%s(), CONNECT_REQUEST: media busy!\n",
- __func__);
-
- /* Always switch state before calling upper layers */
- irlap_next_state(self, LAP_NDM);
-
- irlap_disconnect_indication(self, LAP_MEDIA_BUSY);
- } else {
- irlap_send_snrm_frame(self, &self->qos_rx);
-
- /* Start Final-bit timer */
- irlap_start_final_timer(self, self->final_timeout);
-
- self->retry_count = 0;
- irlap_next_state(self, LAP_SETUP);
- }
- break;
- case RECV_SNRM_CMD:
- /* Check if the frame contains and I field */
- if (info) {
- self->daddr = info->daddr;
- self->caddr = info->caddr;
-
- irlap_next_state(self, LAP_CONN);
-
- irlap_connect_indication(self, skb);
- } else {
- pr_debug("%s(), SNRM frame does not contain an I field!\n",
- __func__);
- }
- break;
- case DISCOVERY_REQUEST:
- IRDA_ASSERT(info != NULL, return -1;);
-
- if (self->media_busy) {
- pr_debug("%s(), DISCOVERY_REQUEST: media busy!\n",
- __func__);
- /* irlap->log.condition = MEDIA_BUSY; */
-
- /* This will make IrLMP try again */
- irlap_discovery_confirm(self, NULL);
- /* Note : the discovery log is not cleaned up here,
- * it will be done in irlap_discovery_request()
- * Jean II */
- return 0;
- }
-
- self->S = info->S;
- self->s = info->s;
- irlap_send_discovery_xid_frame(self, info->S, info->s, TRUE,
- info->discovery);
- self->frame_sent = FALSE;
- self->s++;
-
- irlap_start_slot_timer(self, self->slot_timeout);
- irlap_next_state(self, LAP_QUERY);
- break;
- case RECV_DISCOVERY_XID_CMD:
- IRDA_ASSERT(info != NULL, return -1;);
-
- /* Assert that this is not the final slot */
- if (info->s <= info->S) {
- self->slot = irlap_generate_rand_time_slot(info->S,
- info->s);
- if (self->slot == info->s) {
- discovery_rsp = irlmp_get_discovery_response();
- discovery_rsp->data.daddr = info->daddr;
-
- irlap_send_discovery_xid_frame(self, info->S,
- self->slot,
- FALSE,
- discovery_rsp);
- self->frame_sent = TRUE;
- } else
- self->frame_sent = FALSE;
-
- /*
- * Go to reply state until end of discovery to
- * inhibit our own transmissions. Set the timer
- * to not stay forever there... Jean II
- */
- irlap_start_query_timer(self, info->S, info->s);
- irlap_next_state(self, LAP_REPLY);
- } else {
- /* This is the final slot. How is it possible ?
- * This would happen is both discoveries are just slightly
- * offset (if they are in sync, all packets are lost).
- * Most often, all the discovery requests will be received
- * in QUERY state (see my comment there), except for the
- * last frame that will come here.
- * The big trouble when it happen is that active discovery
- * doesn't happen, because nobody answer the discoveries
- * frame of the other guy, so the log shows up empty.
- * What should we do ?
- * Not much. It's too late to answer those discovery frames,
- * so we just pass the info to IrLMP who will put it in the
- * log (and post an event).
- * Another cause would be devices that do discovery much
- * slower than us, however the latest fixes should minimise
- * those cases...
- * Jean II
- */
- pr_debug("%s(), Receiving final discovery request, missed the discovery slots :-(\n",
- __func__);
-
- /* Last discovery request -> in the log */
- irlap_discovery_indication(self, info->discovery);
- }
- break;
- case MEDIA_BUSY_TIMER_EXPIRED:
- /* A bunch of events may be postponed because the media is
- * busy (usually immediately after we close a connection),
- * or while we are doing discovery (state query/reply).
- * In all those cases, the media busy flag will be cleared
- * when it's OK for us to process those postponed events.
- * This event is not mentioned in the state machines in the
- * IrLAP spec. It's because they didn't consider Ultra and
- * postponing connection request is optional.
- * Jean II */
-#ifdef CONFIG_IRDA_ULTRA
- /* Send any pending Ultra frames if any */
- if (!skb_queue_empty(&self->txq_ultra)) {
- /* We don't send the frame, just post an event.
- * Also, previously this code was in timer.c...
- * Jean II */
- ret = (*state[self->state])(self, SEND_UI_FRAME,
- NULL, NULL);
- }
-#endif /* CONFIG_IRDA_ULTRA */
- /* Check if we should try to connect.
- * This code was previously in irlap_do_event() */
- if (self->connect_pending) {
- self->connect_pending = FALSE;
-
- /* This one *should* not pend in this state, except
- * if a socket try to connect and immediately
- * disconnect. - clear - Jean II */
- if (self->disconnect_pending)
- irlap_disconnect_indication(self, LAP_DISC_INDICATION);
- else
- ret = (*state[self->state])(self,
- CONNECT_REQUEST,
- NULL, NULL);
- self->disconnect_pending = FALSE;
- }
- /* Note : one way to test if this code works well (including
- * media busy and small busy) is to create a user space
- * application generating an Ultra packet every 3.05 sec (or
- * 2.95 sec) and to see how it interact with discovery.
- * It's fairly easy to check that no packet is lost, that the
- * packets are postponed during discovery and that after
- * discovery indication you have a 100ms "gap".
- * As connection request and Ultra are now processed the same
- * way, this avoid the tedious job of trying IrLAP connection
- * in all those cases...
- * Jean II */
- break;
-#ifdef CONFIG_IRDA_ULTRA
- case SEND_UI_FRAME:
- {
- int i;
- /* Only allowed to repeat an operation twice */
- for (i=0; ((i<2) && (self->media_busy == FALSE)); i++) {
- skb = skb_dequeue(&self->txq_ultra);
- if (skb)
- irlap_send_ui_frame(self, skb, CBROADCAST,
- CMD_FRAME);
- else
- break;
- /* irlap_send_ui_frame() won't increase skb reference
- * count, so no dev_kfree_skb() - Jean II */
- }
- if (i == 2) {
- /* Force us to listen 500 ms again */
- irda_device_set_media_busy(self->netdev, TRUE);
- }
- break;
- }
- case RECV_UI_FRAME:
- /* Only accept broadcast frames in NDM mode */
- if (info->caddr != CBROADCAST) {
- pr_debug("%s(), not a broadcast frame!\n",
- __func__);
- } else
- irlap_unitdata_indication(self, skb);
- break;
-#endif /* CONFIG_IRDA_ULTRA */
- case RECV_TEST_CMD:
- /* Remove test frame header */
- skb_pull(skb, sizeof(struct test_frame));
-
- /*
- * Send response. This skb will not be sent out again, and
- * will only be used to send out the same info as the cmd
- */
- irlap_send_test_frame(self, CBROADCAST, info->daddr, skb);
- break;
- case RECV_TEST_RSP:
- pr_debug("%s() not implemented!\n", __func__);
- break;
- default:
- pr_debug("%s(), Unknown event %s\n", __func__,
- irlap_event[event]);
-
- ret = -1;
- break;
- }
- return ret;
-}
-
-/*
- * Function irlap_state_query (event, skb, info)
- *
- * QUERY state
- *
- */
-static int irlap_state_query(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- int ret = 0;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- switch (event) {
- case RECV_DISCOVERY_XID_RSP:
- IRDA_ASSERT(info != NULL, return -1;);
- IRDA_ASSERT(info->discovery != NULL, return -1;);
-
- pr_debug("%s(), daddr=%08x\n", __func__,
- info->discovery->data.daddr);
-
- if (!self->discovery_log) {
- net_warn_ratelimited("%s: discovery log is gone! maybe the discovery timeout has been set too short?\n",
- __func__);
- break;
- }
- hashbin_insert(self->discovery_log,
- (irda_queue_t *) info->discovery,
- info->discovery->data.daddr, NULL);
-
- /* Keep state */
- /* irlap_next_state(self, LAP_QUERY); */
-
- break;
- case RECV_DISCOVERY_XID_CMD:
- /* Yes, it is possible to receive those frames in this mode.
- * Note that most often the last discovery request won't
- * occur here but in NDM state (see my comment there).
- * What should we do ?
- * Not much. We are currently performing our own discovery,
- * therefore we can't answer those frames. We don't want
- * to change state either. We just pass the info to
- * IrLMP who will put it in the log (and post an event).
- * Jean II
- */
-
- IRDA_ASSERT(info != NULL, return -1;);
-
- pr_debug("%s(), Receiving discovery request (s = %d) while performing discovery :-(\n",
- __func__, info->s);
-
- /* Last discovery request ? */
- if (info->s == 0xff)
- irlap_discovery_indication(self, info->discovery);
- break;
- case SLOT_TIMER_EXPIRED:
- /*
- * Wait a little longer if we detect an incoming frame. This
- * is not mentioned in the spec, but is a good thing to do,
- * since we want to work even with devices that violate the
- * timing requirements.
- */
- if (irda_device_is_receiving(self->netdev) && !self->add_wait) {
- pr_debug("%s(), device is slow to answer, waiting some more!\n",
- __func__);
- irlap_start_slot_timer(self, msecs_to_jiffies(10));
- self->add_wait = TRUE;
- return ret;
- }
- self->add_wait = FALSE;
-
- if (self->s < self->S) {
- irlap_send_discovery_xid_frame(self, self->S,
- self->s, TRUE,
- self->discovery_cmd);
- self->s++;
- irlap_start_slot_timer(self, self->slot_timeout);
-
- /* Keep state */
- irlap_next_state(self, LAP_QUERY);
- } else {
- /* This is the final slot! */
- irlap_send_discovery_xid_frame(self, self->S, 0xff,
- TRUE,
- self->discovery_cmd);
-
- /* Always switch state before calling upper layers */
- irlap_next_state(self, LAP_NDM);
-
- /*
- * We are now finished with the discovery procedure,
- * so now we must return the results
- */
- irlap_discovery_confirm(self, self->discovery_log);
-
- /* IrLMP should now have taken care of the log */
- self->discovery_log = NULL;
- }
- break;
- default:
- pr_debug("%s(), Unknown event %s\n", __func__,
- irlap_event[event]);
-
- ret = -1;
- break;
- }
- return ret;
-}
-
-/*
- * Function irlap_state_reply (self, event, skb, info)
- *
- * REPLY, we have received a XID discovery frame from a device and we
- * are waiting for the right time slot to send a response XID frame
- *
- */
-static int irlap_state_reply(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- discovery_t *discovery_rsp;
- int ret=0;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- switch (event) {
- case QUERY_TIMER_EXPIRED:
- pr_debug("%s(), QUERY_TIMER_EXPIRED <%ld>\n",
- __func__, jiffies);
- irlap_next_state(self, LAP_NDM);
- break;
- case RECV_DISCOVERY_XID_CMD:
- IRDA_ASSERT(info != NULL, return -1;);
- /* Last frame? */
- if (info->s == 0xff) {
- del_timer(&self->query_timer);
-
- /* info->log.condition = REMOTE; */
-
- /* Always switch state before calling upper layers */
- irlap_next_state(self, LAP_NDM);
-
- irlap_discovery_indication(self, info->discovery);
- } else {
- /* If it's our slot, send our reply */
- if ((info->s >= self->slot) && (!self->frame_sent)) {
- discovery_rsp = irlmp_get_discovery_response();
- discovery_rsp->data.daddr = info->daddr;
-
- irlap_send_discovery_xid_frame(self, info->S,
- self->slot,
- FALSE,
- discovery_rsp);
-
- self->frame_sent = TRUE;
- }
- /* Readjust our timer to accommodate devices
- * doing faster or slower discovery than us...
- * Jean II */
- irlap_start_query_timer(self, info->S, info->s);
-
- /* Keep state */
- //irlap_next_state(self, LAP_REPLY);
- }
- break;
- default:
- pr_debug("%s(), Unknown event %d, %s\n", __func__,
- event, irlap_event[event]);
-
- ret = -1;
- break;
- }
- return ret;
-}
-
-/*
- * Function irlap_state_conn (event, skb, info)
- *
- * CONN, we have received a SNRM command and is waiting for the upper
- * layer to accept or refuse connection
- *
- */
-static int irlap_state_conn(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- int ret = 0;
-
- pr_debug("%s(), event=%s\n", __func__, irlap_event[event]);
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- switch (event) {
- case CONNECT_RESPONSE:
- skb_pull(skb, sizeof(struct snrm_frame));
-
- IRDA_ASSERT(self->netdev != NULL, return -1;);
-
- irlap_qos_negotiate(self, skb);
-
- irlap_initiate_connection_state(self);
-
- /*
- * Applying the parameters now will make sure we change speed
- * *after* we have sent the next frame
- */
- irlap_apply_connection_parameters(self, FALSE);
-
- /*
- * Sending this frame will force a speed change after it has
- * been sent (i.e. the frame will be sent at 9600).
- */
- irlap_send_ua_response_frame(self, &self->qos_rx);
-
-#if 0
- /*
- * We are allowed to send two frames, but this may increase
- * the connect latency, so lets not do it for now.
- */
- /* This is full of good intentions, but doesn't work in
- * practice.
- * After sending the first UA response, we switch the
- * dongle to the negotiated speed, which is usually
- * different than 9600 kb/s.
- * From there, there is two solutions :
- * 1) The other end has received the first UA response :
- * it will set up the connection, move to state LAP_NRM_P,
- * and will ignore and drop the second UA response.
- * Actually, it's even worse : the other side will almost
- * immediately send a RR that will likely collide with the
- * UA response (depending on negotiated turnaround).
- * 2) The other end has not received the first UA response,
- * will stay at 9600 and will never see the second UA response.
- * Jean II */
- irlap_send_ua_response_frame(self, &self->qos_rx);
-#endif
-
- /*
- * The WD-timer could be set to the duration of the P-timer
- * for this case, but it is recommended to use twice the
- * value (note 3 IrLAP p. 60).
- */
- irlap_start_wd_timer(self, self->wd_timeout);
- irlap_next_state(self, LAP_NRM_S);
-
- break;
- case RECV_DISCOVERY_XID_CMD:
- pr_debug("%s(), event RECV_DISCOVER_XID_CMD!\n",
- __func__);
- irlap_next_state(self, LAP_NDM);
-
- break;
- case DISCONNECT_REQUEST:
- pr_debug("%s(), Disconnect request!\n", __func__);
- irlap_send_dm_frame(self);
- irlap_next_state( self, LAP_NDM);
- irlap_disconnect_indication(self, LAP_DISC_INDICATION);
- break;
- default:
- pr_debug("%s(), Unknown event %d, %s\n", __func__,
- event, irlap_event[event]);
-
- ret = -1;
- break;
- }
-
- return ret;
-}
-
-/*
- * Function irlap_state_setup (event, skb, frame)
- *
- * SETUP state, The local layer has transmitted a SNRM command frame to
- * a remote peer layer and is awaiting a reply .
- *
- */
-static int irlap_state_setup(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- int ret = 0;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- switch (event) {
- case FINAL_TIMER_EXPIRED:
- if (self->retry_count < self->N3) {
-/*
- * Perform random backoff, Wait a random number of time units, minimum
- * duration half the time taken to transmitt a SNRM frame, maximum duration
- * 1.5 times the time taken to transmit a SNRM frame. So this time should
- * between 15 msecs and 45 msecs.
- */
- irlap_start_backoff_timer(self, msecs_to_jiffies(20 +
- (jiffies % 30)));
- } else {
- /* Always switch state before calling upper layers */
- irlap_next_state(self, LAP_NDM);
-
- irlap_disconnect_indication(self, LAP_FOUND_NONE);
- }
- break;
- case BACKOFF_TIMER_EXPIRED:
- irlap_send_snrm_frame(self, &self->qos_rx);
- irlap_start_final_timer(self, self->final_timeout);
- self->retry_count++;
- break;
- case RECV_SNRM_CMD:
- pr_debug("%s(), SNRM battle!\n", __func__);
-
- IRDA_ASSERT(skb != NULL, return 0;);
- IRDA_ASSERT(info != NULL, return 0;);
-
- /*
- * The device with the largest device address wins the battle
- * (both have sent a SNRM command!)
- */
- if (info &&(info->daddr > self->saddr)) {
- del_timer(&self->final_timer);
- irlap_initiate_connection_state(self);
-
- IRDA_ASSERT(self->netdev != NULL, return -1;);
-
- skb_pull(skb, sizeof(struct snrm_frame));
-
- irlap_qos_negotiate(self, skb);
-
- /* Send UA frame and then change link settings */
- irlap_apply_connection_parameters(self, FALSE);
- irlap_send_ua_response_frame(self, &self->qos_rx);
-
- irlap_next_state(self, LAP_NRM_S);
- irlap_connect_confirm(self, skb);
-
- /*
- * The WD-timer could be set to the duration of the
- * P-timer for this case, but it is recommended
- * to use twice the value (note 3 IrLAP p. 60).
- */
- irlap_start_wd_timer(self, self->wd_timeout);
- } else {
- /* We just ignore the other device! */
- irlap_next_state(self, LAP_SETUP);
- }
- break;
- case RECV_UA_RSP:
- /* Stop F-timer */
- del_timer(&self->final_timer);
-
- /* Initiate connection state */
- irlap_initiate_connection_state(self);
-
- /* Negotiate connection parameters */
- IRDA_ASSERT(skb->len > 10, return -1;);
-
- skb_pull(skb, sizeof(struct ua_frame));
-
- IRDA_ASSERT(self->netdev != NULL, return -1;);
-
- irlap_qos_negotiate(self, skb);
-
- /* Set the new link setting *now* (before the rr frame) */
- irlap_apply_connection_parameters(self, TRUE);
- self->retry_count = 0;
-
- /* Wait for turnaround time to give a chance to the other
- * device to be ready to receive us.
- * Note : the time to switch speed is typically larger
- * than the turnaround time, but as we don't have the other
- * side speed switch time, that's our best guess...
- * Jean II */
- irlap_wait_min_turn_around(self, &self->qos_tx);
-
- /* This frame will actually be sent at the new speed */
- irlap_send_rr_frame(self, CMD_FRAME);
-
- /* The timer is set to half the normal timer to quickly
- * detect a failure to negotiate the new connection
- * parameters. IrLAP 6.11.3.2, note 3.
- * Note that currently we don't process this failure
- * properly, as we should do a quick disconnect.
- * Jean II */
- irlap_start_final_timer(self, self->final_timeout/2);
- irlap_next_state(self, LAP_NRM_P);
-
- irlap_connect_confirm(self, skb);
- break;
- case RECV_DM_RSP: /* FALLTHROUGH */
- case RECV_DISC_CMD:
- del_timer(&self->final_timer);
- irlap_next_state(self, LAP_NDM);
-
- irlap_disconnect_indication(self, LAP_DISC_INDICATION);
- break;
- default:
- pr_debug("%s(), Unknown event %d, %s\n", __func__,
- event, irlap_event[event]);
-
- ret = -1;
- break;
- }
- return ret;
-}
-
-/*
- * Function irlap_state_offline (self, event, skb, info)
- *
- * OFFLINE state, not used for now!
- *
- */
-static int irlap_state_offline(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- pr_debug("%s(), Unknown event\n", __func__);
-
- return -1;
-}
-
-/*
- * Function irlap_state_xmit_p (self, event, skb, info)
- *
- * XMIT, Only the primary station has right to transmit, and we
- * therefore do not expect to receive any transmissions from other
- * stations.
- *
- */
-static int irlap_state_xmit_p(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- int ret = 0;
-
- switch (event) {
- case SEND_I_CMD:
- /*
- * Only send frame if send-window > 0.
- */
- if ((self->window > 0) && (!self->remote_busy)) {
- int nextfit;
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
- struct sk_buff *skb_next;
-
- /* With DYNAMIC_WINDOW, we keep the window size
- * maximum, and adapt on the packets we are sending.
- * At 115k, we can send only 2 packets of 2048 bytes
- * in a 500 ms turnaround. Without this option, we
- * would always limit the window to 2. With this
- * option, if we send smaller packets, we can send
- * up to 7 of them (always depending on QoS).
- * Jean II */
-
- /* Look at the next skb. This is safe, as we are
- * the only consumer of the Tx queue (if we are not,
- * we have other problems) - Jean II */
- skb_next = skb_peek(&self->txq);
-
- /* Check if a subsequent skb exist and would fit in
- * the current window (with respect to turnaround
- * time).
- * This allow us to properly mark the current packet
- * with the pf bit, to avoid falling back on the
- * second test below, and avoid waiting the
- * end of the window and sending a extra RR.
- * Note : (skb_next != NULL) <=> (skb_queue_len() > 0)
- * Jean II */
- nextfit = ((skb_next != NULL) &&
- ((skb_next->len + skb->len) <=
- self->bytes_left));
-
- /*
- * The current packet may not fit ! Because of test
- * above, this should not happen any more !!!
- * Test if we have transmitted more bytes over the
- * link than its possible to do with the current
- * speed and turn-around-time.
- */
- if((!nextfit) && (skb->len > self->bytes_left)) {
- pr_debug("%s(), Not allowed to transmit more bytes!\n",
- __func__);
- /* Requeue the skb */
- skb_queue_head(&self->txq, skb_get(skb));
- /*
- * We should switch state to LAP_NRM_P, but
- * that is not possible since we must be sure
- * that we poll the other side. Since we have
- * used up our time, the poll timer should
- * trigger anyway now, so we just wait for it
- * DB
- */
- /*
- * Sorry, but that's not totally true. If
- * we send 2000B packets, we may wait another
- * 1000B until our turnaround expire. That's
- * why we need to be proactive in avoiding
- * coming here. - Jean II
- */
- return -EPROTO;
- }
-
- /* Subtract space used by this skb */
- self->bytes_left -= skb->len;
-#else /* CONFIG_IRDA_DYNAMIC_WINDOW */
- /* Window has been adjusted for the max packet
- * size, so much simpler... - Jean II */
- nextfit = !skb_queue_empty(&self->txq);
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
- /*
- * Send data with poll bit cleared only if window > 1
- * and there is more frames after this one to be sent
- */
- if ((self->window > 1) && (nextfit)) {
- /* More packet to send in current window */
- irlap_send_data_primary(self, skb);
- irlap_next_state(self, LAP_XMIT_P);
- } else {
- /* Final packet of window */
- irlap_send_data_primary_poll(self, skb);
-
- /*
- * Make sure state machine does not try to send
- * any more frames
- */
- ret = -EPROTO;
- }
-#ifdef CONFIG_IRDA_FAST_RR
- /* Peer may want to reply immediately */
- self->fast_RR = FALSE;
-#endif /* CONFIG_IRDA_FAST_RR */
- } else {
- pr_debug("%s(), Unable to send! remote busy?\n",
- __func__);
- skb_queue_head(&self->txq, skb_get(skb));
-
- /*
- * The next ret is important, because it tells
- * irlap_next_state _not_ to deliver more frames
- */
- ret = -EPROTO;
- }
- break;
- case POLL_TIMER_EXPIRED:
- pr_debug("%s(), POLL_TIMER_EXPIRED <%ld>\n",
- __func__, jiffies);
- irlap_send_rr_frame(self, CMD_FRAME);
- /* Return to NRM properly - Jean II */
- self->window = self->window_size;
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
- /* Allowed to transmit a maximum number of bytes again. */
- self->bytes_left = self->line_capacity;
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
- irlap_start_final_timer(self, self->final_timeout);
- irlap_next_state(self, LAP_NRM_P);
- break;
- case DISCONNECT_REQUEST:
- del_timer(&self->poll_timer);
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_disc_frame(self);
- irlap_flush_all_queues(self);
- irlap_start_final_timer(self, self->final_timeout);
- self->retry_count = 0;
- irlap_next_state(self, LAP_PCLOSE);
- break;
- case DATA_REQUEST:
- /* Nothing to do, irlap_do_event() will send the packet
- * when we return... - Jean II */
- break;
- default:
- pr_debug("%s(), Unknown event %s\n",
- __func__, irlap_event[event]);
-
- ret = -EINVAL;
- break;
- }
- return ret;
-}
-
-/*
- * Function irlap_state_pclose (event, skb, info)
- *
- * PCLOSE state
- */
-static int irlap_state_pclose(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- int ret = 0;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- switch (event) {
- case RECV_UA_RSP: /* FALLTHROUGH */
- case RECV_DM_RSP:
- del_timer(&self->final_timer);
-
- /* Set new link parameters */
- irlap_apply_default_connection_parameters(self);
-
- /* Always switch state before calling upper layers */
- irlap_next_state(self, LAP_NDM);
-
- irlap_disconnect_indication(self, LAP_DISC_INDICATION);
- break;
- case FINAL_TIMER_EXPIRED:
- if (self->retry_count < self->N3) {
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_disc_frame(self);
- irlap_start_final_timer(self, self->final_timeout);
- self->retry_count++;
- /* Keep state */
- } else {
- irlap_apply_default_connection_parameters(self);
-
- /* Always switch state before calling upper layers */
- irlap_next_state(self, LAP_NDM);
-
- irlap_disconnect_indication(self, LAP_NO_RESPONSE);
- }
- break;
- default:
- pr_debug("%s(), Unknown event %d\n", __func__, event);
-
- ret = -1;
- break;
- }
- return ret;
-}
-
-/*
- * Function irlap_state_nrm_p (self, event, skb, info)
- *
- * NRM_P (Normal Response Mode as Primary), The primary station has given
- * permissions to a secondary station to transmit IrLAP resonse frames
- * (by sending a frame with the P bit set). The primary station will not
- * transmit any frames and is expecting to receive frames only from the
- * secondary to which transmission permissions has been given.
- */
-static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- int ret = 0;
- int ns_status;
- int nr_status;
-
- switch (event) {
- case RECV_I_RSP: /* Optimize for the common case */
- if (unlikely(skb->len <= LAP_ADDR_HEADER + LAP_CTRL_HEADER)) {
- /*
- * Input validation check: a stir4200/mcp2150
- * combination sometimes results in an empty i:rsp.
- * This makes no sense; we can just ignore the frame
- * and send an rr:cmd immediately. This happens before
- * changing nr or ns so triggers a retransmit
- */
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_rr_frame(self, CMD_FRAME);
- /* Keep state */
- break;
- }
- /* FIXME: must check for remote_busy below */
-#ifdef CONFIG_IRDA_FAST_RR
- /*
- * Reset the fast_RR so we can use the fast RR code with
- * full speed the next time since peer may have more frames
- * to transmitt
- */
- self->fast_RR = FALSE;
-#endif /* CONFIG_IRDA_FAST_RR */
- IRDA_ASSERT( info != NULL, return -1;);
-
- ns_status = irlap_validate_ns_received(self, info->ns);
- nr_status = irlap_validate_nr_received(self, info->nr);
-
- /*
- * Check for expected I(nformation) frame
- */
- if ((ns_status == NS_EXPECTED) && (nr_status == NR_EXPECTED)) {
-
- /* Update Vr (next frame for us to receive) */
- self->vr = (self->vr + 1) % 8;
-
- /* Update Nr received, cleanup our retry queue */
- irlap_update_nr_received(self, info->nr);
-
- /*
- * Got expected NR, so reset the
- * retry_count. This is not done by IrLAP spec,
- * which is strange!
- */
- self->retry_count = 0;
- self->ack_required = TRUE;
-
- /* poll bit cleared? */
- if (!info->pf) {
- /* Keep state, do not move this line */
- irlap_next_state(self, LAP_NRM_P);
-
- irlap_data_indication(self, skb, FALSE);
- } else {
- /* No longer waiting for pf */
- del_timer(&self->final_timer);
-
- irlap_wait_min_turn_around(self, &self->qos_tx);
-
- /* Call higher layer *before* changing state
- * to give them a chance to send data in the
- * next LAP frame.
- * Jean II */
- irlap_data_indication(self, skb, FALSE);
-
- /* XMIT states are the most dangerous state
- * to be in, because user requests are
- * processed directly and may change state.
- * On the other hand, in NDM_P, those
- * requests are queued and we will process
- * them when we return to irlap_do_event().
- * Jean II
- */
- irlap_next_state(self, LAP_XMIT_P);
-
- /* This is the last frame.
- * Make sure it's always called in XMIT state.
- * - Jean II */
- irlap_start_poll_timer(self, self->poll_timeout);
- }
- break;
-
- }
- /* Unexpected next to send (Ns) */
- if ((ns_status == NS_UNEXPECTED) && (nr_status == NR_EXPECTED))
- {
- if (!info->pf) {
- irlap_update_nr_received(self, info->nr);
-
- /*
- * Wait until the last frame before doing
- * anything
- */
-
- /* Keep state */
- irlap_next_state(self, LAP_NRM_P);
- } else {
- pr_debug("%s(), missing or duplicate frame!\n",
- __func__);
-
- /* Update Nr received */
- irlap_update_nr_received(self, info->nr);
-
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_rr_frame(self, CMD_FRAME);
-
- self->ack_required = FALSE;
-
- irlap_start_final_timer(self, self->final_timeout);
- irlap_next_state(self, LAP_NRM_P);
- }
- break;
- }
- /*
- * Unexpected next to receive (Nr)
- */
- if ((ns_status == NS_EXPECTED) && (nr_status == NR_UNEXPECTED))
- {
- if (info->pf) {
- self->vr = (self->vr + 1) % 8;
-
- /* Update Nr received */
- irlap_update_nr_received(self, info->nr);
-
- /* Resend rejected frames */
- irlap_resend_rejected_frames(self, CMD_FRAME);
-
- self->ack_required = FALSE;
-
- /* Make sure we account for the time
- * to transmit our frames. See comemnts
- * in irlap_send_data_primary_poll().
- * Jean II */
- irlap_start_final_timer(self, 2 * self->final_timeout);
-
- /* Keep state, do not move this line */
- irlap_next_state(self, LAP_NRM_P);
-
- irlap_data_indication(self, skb, FALSE);
- } else {
- /*
- * Do not resend frames until the last
- * frame has arrived from the other
- * device. This is not documented in
- * IrLAP!!
- */
- self->vr = (self->vr + 1) % 8;
-
- /* Update Nr received */
- irlap_update_nr_received(self, info->nr);
-
- self->ack_required = FALSE;
-
- /* Keep state, do not move this line!*/
- irlap_next_state(self, LAP_NRM_P);
-
- irlap_data_indication(self, skb, FALSE);
- }
- break;
- }
- /*
- * Unexpected next to send (Ns) and next to receive (Nr)
- * Not documented by IrLAP!
- */
- if ((ns_status == NS_UNEXPECTED) &&
- (nr_status == NR_UNEXPECTED))
- {
- pr_debug("%s(), unexpected nr and ns!\n",
- __func__);
- if (info->pf) {
- /* Resend rejected frames */
- irlap_resend_rejected_frames(self, CMD_FRAME);
-
- /* Give peer some time to retransmit!
- * But account for our own Tx. */
- irlap_start_final_timer(self, 2 * self->final_timeout);
-
- /* Keep state, do not move this line */
- irlap_next_state(self, LAP_NRM_P);
- } else {
- /* Update Nr received */
- /* irlap_update_nr_received( info->nr); */
-
- self->ack_required = FALSE;
- }
- break;
- }
-
- /*
- * Invalid NR or NS
- */
- if ((nr_status == NR_INVALID) || (ns_status == NS_INVALID)) {
- if (info->pf) {
- del_timer(&self->final_timer);
-
- irlap_next_state(self, LAP_RESET_WAIT);
-
- irlap_disconnect_indication(self, LAP_RESET_INDICATION);
- self->xmitflag = TRUE;
- } else {
- del_timer(&self->final_timer);
-
- irlap_disconnect_indication(self, LAP_RESET_INDICATION);
-
- self->xmitflag = FALSE;
- }
- break;
- }
- pr_debug("%s(), Not implemented!\n", __func__);
- pr_debug("%s(), event=%s, ns_status=%d, nr_status=%d\n",
- __func__, irlap_event[event], ns_status, nr_status);
- break;
- case RECV_UI_FRAME:
- /* Poll bit cleared? */
- if (!info->pf) {
- irlap_data_indication(self, skb, TRUE);
- irlap_next_state(self, LAP_NRM_P);
- } else {
- del_timer(&self->final_timer);
- irlap_data_indication(self, skb, TRUE);
- irlap_next_state(self, LAP_XMIT_P);
- pr_debug("%s: RECV_UI_FRAME: next state %s\n",
- __func__, irlap_state[self->state]);
- irlap_start_poll_timer(self, self->poll_timeout);
- }
- break;
- case RECV_RR_RSP:
- /*
- * If you get a RR, the remote isn't busy anymore,
- * no matter what the NR
- */
- self->remote_busy = FALSE;
-
- /* Stop final timer */
- del_timer(&self->final_timer);
-
- /*
- * Nr as expected?
- */
- ret = irlap_validate_nr_received(self, info->nr);
- if (ret == NR_EXPECTED) {
- /* Update Nr received */
- irlap_update_nr_received(self, info->nr);
-
- /*
- * Got expected NR, so reset the retry_count. This
- * is not done by the IrLAP standard , which is
- * strange! DB.
- */
- self->retry_count = 0;
- irlap_wait_min_turn_around(self, &self->qos_tx);
-
- irlap_next_state(self, LAP_XMIT_P);
-
- /* Start poll timer */
- irlap_start_poll_timer(self, self->poll_timeout);
- } else if (ret == NR_UNEXPECTED) {
- IRDA_ASSERT(info != NULL, return -1;);
- /*
- * Unexpected nr!
- */
-
- /* Update Nr received */
- irlap_update_nr_received(self, info->nr);
-
- pr_debug("RECV_RR_FRAME: Retrans:%d, nr=%d, va=%d, vs=%d, vr=%d\n",
- self->retry_count, info->nr, self->va,
- self->vs, self->vr);
-
- /* Resend rejected frames */
- irlap_resend_rejected_frames(self, CMD_FRAME);
- irlap_start_final_timer(self, self->final_timeout * 2);
-
- irlap_next_state(self, LAP_NRM_P);
- } else if (ret == NR_INVALID) {
- pr_debug("%s(), Received RR with invalid nr !\n",
- __func__);
-
- irlap_next_state(self, LAP_RESET_WAIT);
-
- irlap_disconnect_indication(self, LAP_RESET_INDICATION);
- self->xmitflag = TRUE;
- }
- break;
- case RECV_RNR_RSP:
- IRDA_ASSERT(info != NULL, return -1;);
-
- /* Stop final timer */
- del_timer(&self->final_timer);
- self->remote_busy = TRUE;
-
- /* Update Nr received */
- irlap_update_nr_received(self, info->nr);
- irlap_next_state(self, LAP_XMIT_P);
-
- /* Start poll timer */
- irlap_start_poll_timer(self, self->poll_timeout);
- break;
- case RECV_FRMR_RSP:
- del_timer(&self->final_timer);
- self->xmitflag = TRUE;
- irlap_next_state(self, LAP_RESET_WAIT);
- irlap_reset_indication(self);
- break;
- case FINAL_TIMER_EXPIRED:
- /*
- * We are allowed to wait for additional 300 ms if
- * final timer expires when we are in the middle
- * of receiving a frame (page 45, IrLAP). Check that
- * we only do this once for each frame.
- */
- if (irda_device_is_receiving(self->netdev) && !self->add_wait) {
- pr_debug("FINAL_TIMER_EXPIRED when receiving a frame! Waiting a little bit more!\n");
- irlap_start_final_timer(self, msecs_to_jiffies(300));
-
- /*
- * Don't allow this to happen one more time in a row,
- * or else we can get a pretty tight loop here if
- * if we only receive half a frame. DB.
- */
- self->add_wait = TRUE;
- break;
- }
- self->add_wait = FALSE;
-
- /* N2 is the disconnect timer. Until we reach it, we retry */
- if (self->retry_count < self->N2) {
- if (skb_peek(&self->wx_list) == NULL) {
- /* Retry sending the pf bit to the secondary */
- pr_debug("nrm_p: resending rr");
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_rr_frame(self, CMD_FRAME);
- } else {
- pr_debug("nrm_p: resend frames");
- irlap_resend_rejected_frames(self, CMD_FRAME);
- }
-
- irlap_start_final_timer(self, self->final_timeout);
- self->retry_count++;
- pr_debug("irlap_state_nrm_p: FINAL_TIMER_EXPIRED: retry_count=%d\n",
- self->retry_count);
-
- /* Early warning event. I'm using a pretty liberal
- * interpretation of the spec and generate an event
- * every time the timer is multiple of N1 (and not
- * only the first time). This allow application
- * to know precisely if connectivity restart...
- * Jean II */
- if((self->retry_count % self->N1) == 0)
- irlap_status_indication(self,
- STATUS_NO_ACTIVITY);
-
- /* Keep state */
- } else {
- irlap_apply_default_connection_parameters(self);
-
- /* Always switch state before calling upper layers */
- irlap_next_state(self, LAP_NDM);
- irlap_disconnect_indication(self, LAP_NO_RESPONSE);
- }
- break;
- case RECV_REJ_RSP:
- irlap_update_nr_received(self, info->nr);
- if (self->remote_busy) {
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_rr_frame(self, CMD_FRAME);
- } else
- irlap_resend_rejected_frames(self, CMD_FRAME);
- irlap_start_final_timer(self, 2 * self->final_timeout);
- break;
- case RECV_SREJ_RSP:
- irlap_update_nr_received(self, info->nr);
- if (self->remote_busy) {
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_rr_frame(self, CMD_FRAME);
- } else
- irlap_resend_rejected_frame(self, CMD_FRAME);
- irlap_start_final_timer(self, 2 * self->final_timeout);
- break;
- case RECV_RD_RSP:
- pr_debug("%s(), RECV_RD_RSP\n", __func__);
-
- irlap_flush_all_queues(self);
- irlap_next_state(self, LAP_XMIT_P);
- /* Call back the LAP state machine to do a proper disconnect */
- irlap_disconnect_request(self);
- break;
- default:
- pr_debug("%s(), Unknown event %s\n",
- __func__, irlap_event[event]);
-
- ret = -1;
- break;
- }
- return ret;
-}
-
-/*
- * Function irlap_state_reset_wait (event, skb, info)
- *
- * We have informed the service user of a reset condition, and is
- * awaiting reset of disconnect request.
- *
- */
-static int irlap_state_reset_wait(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- int ret = 0;
-
- pr_debug("%s(), event = %s\n", __func__, irlap_event[event]);
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- switch (event) {
- case RESET_REQUEST:
- if (self->xmitflag) {
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_snrm_frame(self, NULL);
- irlap_start_final_timer(self, self->final_timeout);
- irlap_next_state(self, LAP_RESET);
- } else {
- irlap_start_final_timer(self, self->final_timeout);
- irlap_next_state(self, LAP_RESET);
- }
- break;
- case DISCONNECT_REQUEST:
- irlap_wait_min_turn_around( self, &self->qos_tx);
- irlap_send_disc_frame( self);
- irlap_flush_all_queues( self);
- irlap_start_final_timer( self, self->final_timeout);
- self->retry_count = 0;
- irlap_next_state( self, LAP_PCLOSE);
- break;
- default:
- pr_debug("%s(), Unknown event %s\n", __func__,
- irlap_event[event]);
-
- ret = -1;
- break;
- }
- return ret;
-}
-
-/*
- * Function irlap_state_reset (self, event, skb, info)
- *
- * We have sent a SNRM reset command to the peer layer, and is awaiting
- * reply.
- *
- */
-static int irlap_state_reset(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- int ret = 0;
-
- pr_debug("%s(), event = %s\n", __func__, irlap_event[event]);
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- switch (event) {
- case RECV_DISC_CMD:
- del_timer(&self->final_timer);
-
- irlap_apply_default_connection_parameters(self);
-
- /* Always switch state before calling upper layers */
- irlap_next_state(self, LAP_NDM);
-
- irlap_disconnect_indication(self, LAP_NO_RESPONSE);
-
- break;
- case RECV_UA_RSP:
- del_timer(&self->final_timer);
-
- /* Initiate connection state */
- irlap_initiate_connection_state(self);
-
- irlap_reset_confirm();
-
- self->remote_busy = FALSE;
-
- irlap_next_state(self, LAP_XMIT_P);
-
- irlap_start_poll_timer(self, self->poll_timeout);
-
- break;
- case FINAL_TIMER_EXPIRED:
- if (self->retry_count < 3) {
- irlap_wait_min_turn_around(self, &self->qos_tx);
-
- IRDA_ASSERT(self->netdev != NULL, return -1;);
- irlap_send_snrm_frame(self, self->qos_dev);
-
- self->retry_count++; /* Experimental!! */
-
- irlap_start_final_timer(self, self->final_timeout);
- irlap_next_state(self, LAP_RESET);
- } else if (self->retry_count >= self->N3) {
- irlap_apply_default_connection_parameters(self);
-
- /* Always switch state before calling upper layers */
- irlap_next_state(self, LAP_NDM);
-
- irlap_disconnect_indication(self, LAP_NO_RESPONSE);
- }
- break;
- case RECV_SNRM_CMD:
- /*
- * SNRM frame is not allowed to contain an I-field in this
- * state
- */
- if (!info) {
- pr_debug("%s(), RECV_SNRM_CMD\n", __func__);
- irlap_initiate_connection_state(self);
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_ua_response_frame(self, &self->qos_rx);
- irlap_reset_confirm();
- irlap_start_wd_timer(self, self->wd_timeout);
- irlap_next_state(self, LAP_NDM);
- } else {
- pr_debug("%s(), SNRM frame contained an I field!\n",
- __func__);
- }
- break;
- default:
- pr_debug("%s(), Unknown event %s\n",
- __func__, irlap_event[event]);
-
- ret = -1;
- break;
- }
- return ret;
-}
-
-/*
- * Function irlap_state_xmit_s (event, skb, info)
- *
- * XMIT_S, The secondary station has been given the right to transmit,
- * and we therefore do not expect to receive any transmissions from other
- * stations.
- */
-static int irlap_state_xmit_s(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- int ret = 0;
-
- pr_debug("%s(), event=%s\n", __func__, irlap_event[event]);
-
- IRDA_ASSERT(self != NULL, return -ENODEV;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -EBADR;);
-
- switch (event) {
- case SEND_I_CMD:
- /*
- * Send frame only if send window > 0
- */
- if ((self->window > 0) && (!self->remote_busy)) {
- int nextfit;
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
- struct sk_buff *skb_next;
-
- /*
- * Same deal as in irlap_state_xmit_p(), so see
- * the comments at that point.
- * We are the secondary, so there are only subtle
- * differences. - Jean II
- */
-
- /* Check if a subsequent skb exist and would fit in
- * the current window (with respect to turnaround
- * time). - Jean II */
- skb_next = skb_peek(&self->txq);
- nextfit = ((skb_next != NULL) &&
- ((skb_next->len + skb->len) <=
- self->bytes_left));
-
- /*
- * Test if we have transmitted more bytes over the
- * link than its possible to do with the current
- * speed and turn-around-time.
- */
- if((!nextfit) && (skb->len > self->bytes_left)) {
- pr_debug("%s(), Not allowed to transmit more bytes!\n",
- __func__);
- /* Requeue the skb */
- skb_queue_head(&self->txq, skb_get(skb));
-
- /*
- * Switch to NRM_S, this is only possible
- * when we are in secondary mode, since we
- * must be sure that we don't miss any RR
- * frames
- */
- self->window = self->window_size;
- self->bytes_left = self->line_capacity;
- irlap_start_wd_timer(self, self->wd_timeout);
-
- irlap_next_state(self, LAP_NRM_S);
- /* Slight difference with primary :
- * here we would wait for the other side to
- * expire the turnaround. - Jean II */
-
- return -EPROTO; /* Try again later */
- }
- /* Subtract space used by this skb */
- self->bytes_left -= skb->len;
-#else /* CONFIG_IRDA_DYNAMIC_WINDOW */
- /* Window has been adjusted for the max packet
- * size, so much simpler... - Jean II */
- nextfit = !skb_queue_empty(&self->txq);
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
- /*
- * Send data with final bit cleared only if window > 1
- * and there is more frames to be sent
- */
- if ((self->window > 1) && (nextfit)) {
- irlap_send_data_secondary(self, skb);
- irlap_next_state(self, LAP_XMIT_S);
- } else {
- irlap_send_data_secondary_final(self, skb);
- irlap_next_state(self, LAP_NRM_S);
-
- /*
- * Make sure state machine does not try to send
- * any more frames
- */
- ret = -EPROTO;
- }
- } else {
- pr_debug("%s(), Unable to send!\n", __func__);
- skb_queue_head(&self->txq, skb_get(skb));
- ret = -EPROTO;
- }
- break;
- case DISCONNECT_REQUEST:
- irlap_send_rd_frame(self);
- irlap_flush_all_queues(self);
- irlap_start_wd_timer(self, self->wd_timeout);
- irlap_next_state(self, LAP_SCLOSE);
- break;
- case DATA_REQUEST:
- /* Nothing to do, irlap_do_event() will send the packet
- * when we return... - Jean II */
- break;
- default:
- pr_debug("%s(), Unknown event %s\n", __func__,
- irlap_event[event]);
-
- ret = -EINVAL;
- break;
- }
- return ret;
-}
-
-/*
- * Function irlap_state_nrm_s (event, skb, info)
- *
- * NRM_S (Normal Response Mode as Secondary) state, in this state we are
- * expecting to receive frames from the primary station
- *
- */
-static int irlap_state_nrm_s(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- int ns_status;
- int nr_status;
- int ret = 0;
-
- pr_debug("%s(), event=%s\n", __func__, irlap_event[event]);
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- switch (event) {
- case RECV_I_CMD: /* Optimize for the common case */
- /* FIXME: must check for remote_busy below */
- pr_debug("%s(), event=%s nr=%d, vs=%d, ns=%d, vr=%d, pf=%d\n",
- __func__, irlap_event[event], info->nr,
- self->vs, info->ns, self->vr, info->pf);
-
- self->retry_count = 0;
-
- ns_status = irlap_validate_ns_received(self, info->ns);
- nr_status = irlap_validate_nr_received(self, info->nr);
- /*
- * Check for expected I(nformation) frame
- */
- if ((ns_status == NS_EXPECTED) && (nr_status == NR_EXPECTED)) {
-
- /* Update Vr (next frame for us to receive) */
- self->vr = (self->vr + 1) % 8;
-
- /* Update Nr received */
- irlap_update_nr_received(self, info->nr);
-
- /*
- * poll bit cleared?
- */
- if (!info->pf) {
-
- self->ack_required = TRUE;
-
- /*
- * Starting WD-timer here is optional, but
- * not recommended. Note 6 IrLAP p. 83
- */
-#if 0
- irda_start_timer(WD_TIMER, self->wd_timeout);
-#endif
- /* Keep state, do not move this line */
- irlap_next_state(self, LAP_NRM_S);
-
- irlap_data_indication(self, skb, FALSE);
- break;
- } else {
- /*
- * We should wait before sending RR, and
- * also before changing to XMIT_S
- * state. (note 1, IrLAP p. 82)
- */
- irlap_wait_min_turn_around(self, &self->qos_tx);
-
- /*
- * Give higher layers a chance to
- * immediately reply with some data before
- * we decide if we should send a RR frame
- * or not
- */
- irlap_data_indication(self, skb, FALSE);
-
- /* Any pending data requests? */
- if (!skb_queue_empty(&self->txq) &&
- (self->window > 0))
- {
- self->ack_required = TRUE;
-
- del_timer(&self->wd_timer);
-
- irlap_next_state(self, LAP_XMIT_S);
- } else {
- irlap_send_rr_frame(self, RSP_FRAME);
- irlap_start_wd_timer(self,
- self->wd_timeout);
-
- /* Keep the state */
- irlap_next_state(self, LAP_NRM_S);
- }
- break;
- }
- }
- /*
- * Check for Unexpected next to send (Ns)
- */
- if ((ns_status == NS_UNEXPECTED) && (nr_status == NR_EXPECTED))
- {
- /* Unexpected next to send, with final bit cleared */
- if (!info->pf) {
- irlap_update_nr_received(self, info->nr);
-
- irlap_start_wd_timer(self, self->wd_timeout);
- } else {
- /* Update Nr received */
- irlap_update_nr_received(self, info->nr);
-
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_rr_frame(self, RSP_FRAME);
-
- irlap_start_wd_timer(self, self->wd_timeout);
- }
- break;
- }
-
- /*
- * Unexpected Next to Receive(NR) ?
- */
- if ((ns_status == NS_EXPECTED) && (nr_status == NR_UNEXPECTED))
- {
- if (info->pf) {
- pr_debug("RECV_I_RSP: frame(s) lost\n");
-
- self->vr = (self->vr + 1) % 8;
-
- /* Update Nr received */
- irlap_update_nr_received(self, info->nr);
-
- /* Resend rejected frames */
- irlap_resend_rejected_frames(self, RSP_FRAME);
-
- /* Keep state, do not move this line */
- irlap_next_state(self, LAP_NRM_S);
-
- irlap_data_indication(self, skb, FALSE);
- irlap_start_wd_timer(self, self->wd_timeout);
- break;
- }
- /*
- * This is not documented in IrLAP!! Unexpected NR
- * with poll bit cleared
- */
- if (!info->pf) {
- self->vr = (self->vr + 1) % 8;
-
- /* Update Nr received */
- irlap_update_nr_received(self, info->nr);
-
- /* Keep state, do not move this line */
- irlap_next_state(self, LAP_NRM_S);
-
- irlap_data_indication(self, skb, FALSE);
- irlap_start_wd_timer(self, self->wd_timeout);
- }
- break;
- }
-
- if (ret == NR_INVALID) {
- pr_debug("NRM_S, NR_INVALID not implemented!\n");
- }
- if (ret == NS_INVALID) {
- pr_debug("NRM_S, NS_INVALID not implemented!\n");
- }
- break;
- case RECV_UI_FRAME:
- /*
- * poll bit cleared?
- */
- if (!info->pf) {
- irlap_data_indication(self, skb, TRUE);
- irlap_next_state(self, LAP_NRM_S); /* Keep state */
- } else {
- /*
- * Any pending data requests?
- */
- if (!skb_queue_empty(&self->txq) &&
- (self->window > 0) && !self->remote_busy)
- {
- irlap_data_indication(self, skb, TRUE);
-
- del_timer(&self->wd_timer);
-
- irlap_next_state(self, LAP_XMIT_S);
- } else {
- irlap_data_indication(self, skb, TRUE);
-
- irlap_wait_min_turn_around(self, &self->qos_tx);
-
- irlap_send_rr_frame(self, RSP_FRAME);
- self->ack_required = FALSE;
-
- irlap_start_wd_timer(self, self->wd_timeout);
-
- /* Keep the state */
- irlap_next_state(self, LAP_NRM_S);
- }
- }
- break;
- case RECV_RR_CMD:
- self->retry_count = 0;
-
- /*
- * Nr as expected?
- */
- nr_status = irlap_validate_nr_received(self, info->nr);
- if (nr_status == NR_EXPECTED) {
- if (!skb_queue_empty(&self->txq) &&
- (self->window > 0)) {
- self->remote_busy = FALSE;
-
- /* Update Nr received */
- irlap_update_nr_received(self, info->nr);
- del_timer(&self->wd_timer);
-
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_next_state(self, LAP_XMIT_S);
- } else {
- self->remote_busy = FALSE;
- /* Update Nr received */
- irlap_update_nr_received(self, info->nr);
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_start_wd_timer(self, self->wd_timeout);
-
- /* Note : if the link is idle (this case),
- * we never go in XMIT_S, so we never get a
- * chance to process any DISCONNECT_REQUEST.
- * Do it now ! - Jean II */
- if (self->disconnect_pending) {
- /* Disconnect */
- irlap_send_rd_frame(self);
- irlap_flush_all_queues(self);
-
- irlap_next_state(self, LAP_SCLOSE);
- } else {
- /* Just send back pf bit */
- irlap_send_rr_frame(self, RSP_FRAME);
-
- irlap_next_state(self, LAP_NRM_S);
- }
- }
- } else if (nr_status == NR_UNEXPECTED) {
- self->remote_busy = FALSE;
- irlap_update_nr_received(self, info->nr);
- irlap_resend_rejected_frames(self, RSP_FRAME);
-
- irlap_start_wd_timer(self, self->wd_timeout);
-
- /* Keep state */
- irlap_next_state(self, LAP_NRM_S);
- } else {
- pr_debug("%s(), invalid nr not implemented!\n",
- __func__);
- }
- break;
- case RECV_SNRM_CMD:
- /* SNRM frame is not allowed to contain an I-field */
- if (!info) {
- del_timer(&self->wd_timer);
- pr_debug("%s(), received SNRM cmd\n", __func__);
- irlap_next_state(self, LAP_RESET_CHECK);
-
- irlap_reset_indication(self);
- } else {
- pr_debug("%s(), SNRM frame contained an I-field!\n",
- __func__);
-
- }
- break;
- case RECV_REJ_CMD:
- irlap_update_nr_received(self, info->nr);
- if (self->remote_busy) {
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_rr_frame(self, RSP_FRAME);
- } else
- irlap_resend_rejected_frames(self, RSP_FRAME);
- irlap_start_wd_timer(self, self->wd_timeout);
- break;
- case RECV_SREJ_CMD:
- irlap_update_nr_received(self, info->nr);
- if (self->remote_busy) {
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_rr_frame(self, RSP_FRAME);
- } else
- irlap_resend_rejected_frame(self, RSP_FRAME);
- irlap_start_wd_timer(self, self->wd_timeout);
- break;
- case WD_TIMER_EXPIRED:
- /*
- * Wait until retry_count * n matches negotiated threshold/
- * disconnect time (note 2 in IrLAP p. 82)
- *
- * Similar to irlap_state_nrm_p() -> FINAL_TIMER_EXPIRED
- * Note : self->wd_timeout = (self->final_timeout * 2),
- * which explain why we use (self->N2 / 2) here !!!
- * Jean II
- */
- pr_debug("%s(), retry_count = %d\n", __func__,
- self->retry_count);
-
- if (self->retry_count < (self->N2 / 2)) {
- /* No retry, just wait for primary */
- irlap_start_wd_timer(self, self->wd_timeout);
- self->retry_count++;
-
- if((self->retry_count % (self->N1 / 2)) == 0)
- irlap_status_indication(self,
- STATUS_NO_ACTIVITY);
- } else {
- irlap_apply_default_connection_parameters(self);
-
- /* Always switch state before calling upper layers */
- irlap_next_state(self, LAP_NDM);
- irlap_disconnect_indication(self, LAP_NO_RESPONSE);
- }
- break;
- case RECV_DISC_CMD:
- /* Always switch state before calling upper layers */
- irlap_next_state(self, LAP_NDM);
-
- /* Send disconnect response */
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_ua_response_frame(self, NULL);
-
- del_timer(&self->wd_timer);
- irlap_flush_all_queues(self);
- /* Set default link parameters */
- irlap_apply_default_connection_parameters(self);
-
- irlap_disconnect_indication(self, LAP_DISC_INDICATION);
- break;
- case RECV_DISCOVERY_XID_CMD:
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_rr_frame(self, RSP_FRAME);
- self->ack_required = TRUE;
- irlap_start_wd_timer(self, self->wd_timeout);
- irlap_next_state(self, LAP_NRM_S);
-
- break;
- case RECV_TEST_CMD:
- /* Remove test frame header (only LAP header in NRM) */
- skb_pull(skb, LAP_ADDR_HEADER + LAP_CTRL_HEADER);
-
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_start_wd_timer(self, self->wd_timeout);
-
- /* Send response (info will be copied) */
- irlap_send_test_frame(self, self->caddr, info->daddr, skb);
- break;
- default:
- pr_debug("%s(), Unknown event %d, (%s)\n", __func__,
- event, irlap_event[event]);
-
- ret = -EINVAL;
- break;
- }
- return ret;
-}
-
-/*
- * Function irlap_state_sclose (self, event, skb, info)
- */
-static int irlap_state_sclose(struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb, struct irlap_info *info)
-{
- IRDA_ASSERT(self != NULL, return -ENODEV;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -EBADR;);
-
- switch (event) {
- case RECV_DISC_CMD:
- /* Always switch state before calling upper layers */
- irlap_next_state(self, LAP_NDM);
-
- /* Send disconnect response */
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_ua_response_frame(self, NULL);
-
- del_timer(&self->wd_timer);
- /* Set default link parameters */
- irlap_apply_default_connection_parameters(self);
-
- irlap_disconnect_indication(self, LAP_DISC_INDICATION);
- break;
- case RECV_DM_RSP:
- /* IrLAP-1.1 p.82: in SCLOSE, S and I type RSP frames
- * shall take us down into default NDM state, like DM_RSP
- */
- case RECV_RR_RSP:
- case RECV_RNR_RSP:
- case RECV_REJ_RSP:
- case RECV_SREJ_RSP:
- case RECV_I_RSP:
- /* Always switch state before calling upper layers */
- irlap_next_state(self, LAP_NDM);
-
- del_timer(&self->wd_timer);
- irlap_apply_default_connection_parameters(self);
-
- irlap_disconnect_indication(self, LAP_DISC_INDICATION);
- break;
- case WD_TIMER_EXPIRED:
- /* Always switch state before calling upper layers */
- irlap_next_state(self, LAP_NDM);
-
- irlap_apply_default_connection_parameters(self);
-
- irlap_disconnect_indication(self, LAP_DISC_INDICATION);
- break;
- default:
- /* IrLAP-1.1 p.82: in SCLOSE, basically any received frame
- * with pf=1 shall restart the wd-timer and resend the rd:rsp
- */
- if (info != NULL && info->pf) {
- del_timer(&self->wd_timer);
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_rd_frame(self);
- irlap_start_wd_timer(self, self->wd_timeout);
- break; /* stay in SCLOSE */
- }
-
- pr_debug("%s(), Unknown event %d, (%s)\n", __func__,
- event, irlap_event[event]);
-
- break;
- }
-
- return -1;
-}
-
-static int irlap_state_reset_check( struct irlap_cb *self, IRLAP_EVENT event,
- struct sk_buff *skb,
- struct irlap_info *info)
-{
- int ret = 0;
-
- pr_debug("%s(), event=%s\n", __func__, irlap_event[event]);
-
- IRDA_ASSERT(self != NULL, return -ENODEV;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -EBADR;);
-
- switch (event) {
- case RESET_RESPONSE:
- irlap_send_ua_response_frame(self, &self->qos_rx);
- irlap_initiate_connection_state(self);
- irlap_start_wd_timer(self, WD_TIMEOUT);
- irlap_flush_all_queues(self);
-
- irlap_next_state(self, LAP_NRM_S);
- break;
- case DISCONNECT_REQUEST:
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_send_rd_frame(self);
- irlap_start_wd_timer(self, WD_TIMEOUT);
- irlap_next_state(self, LAP_SCLOSE);
- break;
- default:
- pr_debug("%s(), Unknown event %d, (%s)\n", __func__,
- event, irlap_event[event]);
-
- ret = -EINVAL;
- break;
- }
- return ret;
-}
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
deleted file mode 100644
index debda3de4726..000000000000
--- a/net/irda/irlap_frame.c
+++ /dev/null
@@ -1,1407 +0,0 @@
-/*********************************************************************
- *
- * Filename: irlap_frame.c
- * Version: 1.0
- * Description: Build and transmit IrLAP frames
- * Status: Stable
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Tue Aug 19 10:27:26 1997
- * Modified at: Wed Jan 5 08:59:04 2000
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>,
- * All Rights Reserved.
- * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/skbuff.h>
-#include <linux/if.h>
-#include <linux/if_ether.h>
-#include <linux/netdevice.h>
-#include <linux/irda.h>
-#include <linux/slab.h>
-
-#include <net/pkt_sched.h>
-#include <net/sock.h>
-
-#include <asm/byteorder.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irda_device.h>
-#include <net/irda/irlap.h>
-#include <net/irda/wrapper.h>
-#include <net/irda/timer.h>
-#include <net/irda/irlap_frame.h>
-#include <net/irda/qos.h>
-
-static void irlap_send_i_frame(struct irlap_cb *self, struct sk_buff *skb,
- int command);
-
-/*
- * Function irlap_insert_info (self, skb)
- *
- * Insert minimum turnaround time and speed information into the skb. We
- * need to do this since it's per packet relevant information. Safe to
- * have this function inlined since it's only called from one place
- */
-static inline void irlap_insert_info(struct irlap_cb *self,
- struct sk_buff *skb)
-{
- struct irda_skb_cb *cb = (struct irda_skb_cb *) skb->cb;
-
- /*
- * Insert MTT (min. turn time) and speed into skb, so that the
- * device driver knows which settings to use
- */
- cb->magic = LAP_MAGIC;
- cb->mtt = self->mtt_required;
- cb->next_speed = self->speed;
-
- /* Reset */
- self->mtt_required = 0;
-
- /*
- * Delay equals negotiated BOFs count, plus the number of BOFs to
- * force the negotiated minimum turnaround time
- */
- cb->xbofs = self->bofs_count;
- cb->next_xbofs = self->next_bofs;
- cb->xbofs_delay = self->xbofs_delay;
-
- /* Reset XBOF's delay (used only for getting min turn time) */
- self->xbofs_delay = 0;
- /* Put the correct xbofs value for the next packet */
- self->bofs_count = self->next_bofs;
-}
-
-/*
- * Function irlap_queue_xmit (self, skb)
- *
- * A little wrapper for dev_queue_xmit, so we can insert some common
- * code into it.
- */
-void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb)
-{
- /* Some common init stuff */
- skb->dev = self->netdev;
- skb_reset_mac_header(skb);
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
- skb->protocol = htons(ETH_P_IRDA);
- skb->priority = TC_PRIO_BESTEFFORT;
-
- irlap_insert_info(self, skb);
-
- if (unlikely(self->mode & IRDA_MODE_MONITOR)) {
- pr_debug("%s(): %s is in monitor mode\n", __func__,
- self->netdev->name);
- dev_kfree_skb(skb);
- return;
- }
-
- dev_queue_xmit(skb);
-}
-
-/*
- * Function irlap_send_snrm_cmd (void)
- *
- * Transmits a connect SNRM command frame
- */
-void irlap_send_snrm_frame(struct irlap_cb *self, struct qos_info *qos)
-{
- struct sk_buff *tx_skb;
- struct snrm_frame *frame;
- int ret;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- /* Allocate frame */
- tx_skb = alloc_skb(sizeof(struct snrm_frame) +
- IRLAP_NEGOCIATION_PARAMS_LEN,
- GFP_ATOMIC);
- if (!tx_skb)
- return;
-
- frame = skb_put(tx_skb, 2);
-
- /* Insert connection address field */
- if (qos)
- frame->caddr = CMD_FRAME | CBROADCAST;
- else
- frame->caddr = CMD_FRAME | self->caddr;
-
- /* Insert control field */
- frame->control = SNRM_CMD | PF_BIT;
-
- /*
- * If we are establishing a connection then insert QoS parameters
- */
- if (qos) {
- skb_put(tx_skb, 9); /* 25 left */
- frame->saddr = cpu_to_le32(self->saddr);
- frame->daddr = cpu_to_le32(self->daddr);
-
- frame->ncaddr = self->caddr;
-
- ret = irlap_insert_qos_negotiation_params(self, tx_skb);
- if (ret < 0) {
- dev_kfree_skb(tx_skb);
- return;
- }
- }
- irlap_queue_xmit(self, tx_skb);
-}
-
-/*
- * Function irlap_recv_snrm_cmd (skb, info)
- *
- * Received SNRM (Set Normal Response Mode) command frame
- *
- */
-static void irlap_recv_snrm_cmd(struct irlap_cb *self, struct sk_buff *skb,
- struct irlap_info *info)
-{
- struct snrm_frame *frame;
-
- if (pskb_may_pull(skb,sizeof(struct snrm_frame))) {
- frame = (struct snrm_frame *) skb->data;
-
- /* Copy the new connection address ignoring the C/R bit */
- info->caddr = frame->ncaddr & 0xFE;
-
- /* Check if the new connection address is valid */
- if ((info->caddr == 0x00) || (info->caddr == 0xfe)) {
- pr_debug("%s(), invalid connection address!\n",
- __func__);
- return;
- }
-
- /* Copy peer device address */
- info->daddr = le32_to_cpu(frame->saddr);
- info->saddr = le32_to_cpu(frame->daddr);
-
- /* Only accept if addressed directly to us */
- if (info->saddr != self->saddr) {
- pr_debug("%s(), not addressed to us!\n",
- __func__);
- return;
- }
- irlap_do_event(self, RECV_SNRM_CMD, skb, info);
- } else {
- /* Signal that this SNRM frame does not contain and I-field */
- irlap_do_event(self, RECV_SNRM_CMD, skb, NULL);
- }
-}
-
-/*
- * Function irlap_send_ua_response_frame (qos)
- *
- * Send UA (Unnumbered Acknowledgement) frame
- *
- */
-void irlap_send_ua_response_frame(struct irlap_cb *self, struct qos_info *qos)
-{
- struct sk_buff *tx_skb;
- struct ua_frame *frame;
- int ret;
-
- pr_debug("%s() <%ld>\n", __func__, jiffies);
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- /* Allocate frame */
- tx_skb = alloc_skb(sizeof(struct ua_frame) +
- IRLAP_NEGOCIATION_PARAMS_LEN,
- GFP_ATOMIC);
- if (!tx_skb)
- return;
-
- frame = skb_put(tx_skb, 10);
-
- /* Build UA response */
- frame->caddr = self->caddr;
- frame->control = UA_RSP | PF_BIT;
-
- frame->saddr = cpu_to_le32(self->saddr);
- frame->daddr = cpu_to_le32(self->daddr);
-
- /* Should we send QoS negotiation parameters? */
- if (qos) {
- ret = irlap_insert_qos_negotiation_params(self, tx_skb);
- if (ret < 0) {
- dev_kfree_skb(tx_skb);
- return;
- }
- }
-
- irlap_queue_xmit(self, tx_skb);
-}
-
-
-/*
- * Function irlap_send_dm_frame (void)
- *
- * Send disconnected mode (DM) frame
- *
- */
-void irlap_send_dm_frame( struct irlap_cb *self)
-{
- struct sk_buff *tx_skb = NULL;
- struct dm_frame *frame;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- tx_skb = alloc_skb(sizeof(struct dm_frame), GFP_ATOMIC);
- if (!tx_skb)
- return;
-
- frame = skb_put(tx_skb, 2);
-
- if (self->state == LAP_NDM)
- frame->caddr = CBROADCAST;
- else
- frame->caddr = self->caddr;
-
- frame->control = DM_RSP | PF_BIT;
-
- irlap_queue_xmit(self, tx_skb);
-}
-
-/*
- * Function irlap_send_disc_frame (void)
- *
- * Send disconnect (DISC) frame
- *
- */
-void irlap_send_disc_frame(struct irlap_cb *self)
-{
- struct sk_buff *tx_skb = NULL;
- struct disc_frame *frame;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- tx_skb = alloc_skb(sizeof(struct disc_frame), GFP_ATOMIC);
- if (!tx_skb)
- return;
-
- frame = skb_put(tx_skb, 2);
-
- frame->caddr = self->caddr | CMD_FRAME;
- frame->control = DISC_CMD | PF_BIT;
-
- irlap_queue_xmit(self, tx_skb);
-}
-
-/*
- * Function irlap_send_discovery_xid_frame (S, s, command)
- *
- * Build and transmit a XID (eXchange station IDentifier) discovery
- * frame.
- */
-void irlap_send_discovery_xid_frame(struct irlap_cb *self, int S, __u8 s,
- __u8 command, discovery_t *discovery)
-{
- struct sk_buff *tx_skb = NULL;
- struct xid_frame *frame;
- __u32 bcast = BROADCAST;
- __u8 *info;
-
- pr_debug("%s(), s=%d, S=%d, command=%d\n", __func__,
- s, S, command);
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
- IRDA_ASSERT(discovery != NULL, return;);
-
- tx_skb = alloc_skb(sizeof(struct xid_frame) + IRLAP_DISCOVERY_INFO_LEN,
- GFP_ATOMIC);
- if (!tx_skb)
- return;
-
- skb_put(tx_skb, 14);
- frame = (struct xid_frame *) tx_skb->data;
-
- if (command) {
- frame->caddr = CBROADCAST | CMD_FRAME;
- frame->control = XID_CMD | PF_BIT;
- } else {
- frame->caddr = CBROADCAST;
- frame->control = XID_RSP | PF_BIT;
- }
- frame->ident = XID_FORMAT;
-
- frame->saddr = cpu_to_le32(self->saddr);
-
- if (command)
- frame->daddr = cpu_to_le32(bcast);
- else
- frame->daddr = cpu_to_le32(discovery->data.daddr);
-
- switch (S) {
- case 1:
- frame->flags = 0x00;
- break;
- case 6:
- frame->flags = 0x01;
- break;
- case 8:
- frame->flags = 0x02;
- break;
- case 16:
- frame->flags = 0x03;
- break;
- default:
- frame->flags = 0x02;
- break;
- }
-
- frame->slotnr = s;
- frame->version = 0x00;
-
- /*
- * Provide info for final slot only in commands, and for all
- * responses. Send the second byte of the hint only if the
- * EXTENSION bit is set in the first byte.
- */
- if (!command || (frame->slotnr == 0xff)) {
- int len;
-
- if (discovery->data.hints[0] & HINT_EXTENSION) {
- info = skb_put(tx_skb, 2);
- info[0] = discovery->data.hints[0];
- info[1] = discovery->data.hints[1];
- } else {
- info = skb_put(tx_skb, 1);
- info[0] = discovery->data.hints[0];
- }
- info = skb_put(tx_skb, 1);
- info[0] = discovery->data.charset;
-
- len = IRDA_MIN(discovery->name_len, skb_tailroom(tx_skb));
- skb_put_data(tx_skb, discovery->data.info, len);
- }
- irlap_queue_xmit(self, tx_skb);
-}
-
-/*
- * Function irlap_recv_discovery_xid_rsp (skb, info)
- *
- * Received a XID discovery response
- *
- */
-static void irlap_recv_discovery_xid_rsp(struct irlap_cb *self,
- struct sk_buff *skb,
- struct irlap_info *info)
-{
- struct xid_frame *xid;
- discovery_t *discovery = NULL;
- __u8 *discovery_info;
- char *text;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
- net_err_ratelimited("%s: frame too short!\n", __func__);
- return;
- }
-
- xid = (struct xid_frame *) skb->data;
-
- info->daddr = le32_to_cpu(xid->saddr);
- info->saddr = le32_to_cpu(xid->daddr);
-
- /* Make sure frame is addressed to us */
- if ((info->saddr != self->saddr) && (info->saddr != BROADCAST)) {
- pr_debug("%s(), frame is not addressed to us!\n",
- __func__);
- return;
- }
-
- if ((discovery = kzalloc(sizeof(discovery_t), GFP_ATOMIC)) == NULL) {
- net_warn_ratelimited("%s: kmalloc failed!\n", __func__);
- return;
- }
-
- discovery->data.daddr = info->daddr;
- discovery->data.saddr = self->saddr;
- discovery->timestamp = jiffies;
-
- pr_debug("%s(), daddr=%08x\n", __func__,
- discovery->data.daddr);
-
- discovery_info = skb_pull(skb, sizeof(struct xid_frame));
-
- /* Get info returned from peer */
- discovery->data.hints[0] = discovery_info[0];
- if (discovery_info[0] & HINT_EXTENSION) {
- pr_debug("EXTENSION\n");
- discovery->data.hints[1] = discovery_info[1];
- discovery->data.charset = discovery_info[2];
- text = (char *) &discovery_info[3];
- } else {
- discovery->data.hints[1] = 0;
- discovery->data.charset = discovery_info[1];
- text = (char *) &discovery_info[2];
- }
- /*
- * Terminate info string, should be safe since this is where the
- * FCS bytes resides.
- */
- skb->data[skb->len] = '\0';
- strncpy(discovery->data.info, text, NICKNAME_MAX_LEN);
- discovery->name_len = strlen(discovery->data.info);
-
- info->discovery = discovery;
-
- irlap_do_event(self, RECV_DISCOVERY_XID_RSP, skb, info);
-}
-
-/*
- * Function irlap_recv_discovery_xid_cmd (skb, info)
- *
- * Received a XID discovery command
- *
- */
-static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
- struct sk_buff *skb,
- struct irlap_info *info)
-{
- struct xid_frame *xid;
- discovery_t *discovery = NULL;
- __u8 *discovery_info;
- char *text;
-
- if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
- net_err_ratelimited("%s: frame too short!\n", __func__);
- return;
- }
-
- xid = (struct xid_frame *) skb->data;
-
- info->daddr = le32_to_cpu(xid->saddr);
- info->saddr = le32_to_cpu(xid->daddr);
-
- /* Make sure frame is addressed to us */
- if ((info->saddr != self->saddr) && (info->saddr != BROADCAST)) {
- pr_debug("%s(), frame is not addressed to us!\n",
- __func__);
- return;
- }
-
- switch (xid->flags & 0x03) {
- case 0x00:
- info->S = 1;
- break;
- case 0x01:
- info->S = 6;
- break;
- case 0x02:
- info->S = 8;
- break;
- case 0x03:
- info->S = 16;
- break;
- default:
- /* Error!! */
- return;
- }
- info->s = xid->slotnr;
-
- discovery_info = skb_pull(skb, sizeof(struct xid_frame));
-
- /*
- * Check if last frame
- */
- if (info->s == 0xff) {
- /* Check if things are sane at this point... */
- if((discovery_info == NULL) ||
- !pskb_may_pull(skb, 3)) {
- net_err_ratelimited("%s: discovery frame too short!\n",
- __func__);
- return;
- }
-
- /*
- * We now have some discovery info to deliver!
- */
- discovery = kzalloc(sizeof(discovery_t), GFP_ATOMIC);
- if (!discovery)
- return;
-
- discovery->data.daddr = info->daddr;
- discovery->data.saddr = self->saddr;
- discovery->timestamp = jiffies;
-
- discovery->data.hints[0] = discovery_info[0];
- if (discovery_info[0] & HINT_EXTENSION) {
- discovery->data.hints[1] = discovery_info[1];
- discovery->data.charset = discovery_info[2];
- text = (char *) &discovery_info[3];
- } else {
- discovery->data.hints[1] = 0;
- discovery->data.charset = discovery_info[1];
- text = (char *) &discovery_info[2];
- }
- /*
- * Terminate string, should be safe since this is where the
- * FCS bytes resides.
- */
- skb->data[skb->len] = '\0';
- strncpy(discovery->data.info, text, NICKNAME_MAX_LEN);
- discovery->name_len = strlen(discovery->data.info);
-
- info->discovery = discovery;
- } else
- info->discovery = NULL;
-
- irlap_do_event(self, RECV_DISCOVERY_XID_CMD, skb, info);
-}
-
-/*
- * Function irlap_send_rr_frame (self, command)
- *
- * Build and transmit RR (Receive Ready) frame. Notice that it is currently
- * only possible to send RR frames with the poll bit set.
- */
-void irlap_send_rr_frame(struct irlap_cb *self, int command)
-{
- struct sk_buff *tx_skb;
- struct rr_frame *frame;
-
- tx_skb = alloc_skb(sizeof(struct rr_frame), GFP_ATOMIC);
- if (!tx_skb)
- return;
-
- frame = skb_put(tx_skb, 2);
-
- frame->caddr = self->caddr;
- frame->caddr |= (command) ? CMD_FRAME : 0;
-
- frame->control = RR | PF_BIT | (self->vr << 5);
-
- irlap_queue_xmit(self, tx_skb);
-}
-
-/*
- * Function irlap_send_rd_frame (self)
- *
- * Request disconnect. Used by a secondary station to request the
- * disconnection of the link.
- */
-void irlap_send_rd_frame(struct irlap_cb *self)
-{
- struct sk_buff *tx_skb;
- struct rd_frame *frame;
-
- tx_skb = alloc_skb(sizeof(struct rd_frame), GFP_ATOMIC);
- if (!tx_skb)
- return;
-
- frame = skb_put(tx_skb, 2);
-
- frame->caddr = self->caddr;
- frame->control = RD_RSP | PF_BIT;
-
- irlap_queue_xmit(self, tx_skb);
-}
-
-/*
- * Function irlap_recv_rr_frame (skb, info)
- *
- * Received RR (Receive Ready) frame from peer station, no harm in
- * making it inline since its called only from one single place
- * (irlap_driver_rcv).
- */
-static inline void irlap_recv_rr_frame(struct irlap_cb *self,
- struct sk_buff *skb,
- struct irlap_info *info, int command)
-{
- info->nr = skb->data[1] >> 5;
-
- /* Check if this is a command or a response frame */
- if (command)
- irlap_do_event(self, RECV_RR_CMD, skb, info);
- else
- irlap_do_event(self, RECV_RR_RSP, skb, info);
-}
-
-/*
- * Function irlap_recv_rnr_frame (self, skb, info)
- *
- * Received RNR (Receive Not Ready) frame from peer station
- *
- */
-static void irlap_recv_rnr_frame(struct irlap_cb *self, struct sk_buff *skb,
- struct irlap_info *info, int command)
-{
- info->nr = skb->data[1] >> 5;
-
- pr_debug("%s(), nr=%d, %ld\n", __func__, info->nr, jiffies);
-
- if (command)
- irlap_do_event(self, RECV_RNR_CMD, skb, info);
- else
- irlap_do_event(self, RECV_RNR_RSP, skb, info);
-}
-
-static void irlap_recv_rej_frame(struct irlap_cb *self, struct sk_buff *skb,
- struct irlap_info *info, int command)
-{
- info->nr = skb->data[1] >> 5;
-
- /* Check if this is a command or a response frame */
- if (command)
- irlap_do_event(self, RECV_REJ_CMD, skb, info);
- else
- irlap_do_event(self, RECV_REJ_RSP, skb, info);
-}
-
-static void irlap_recv_srej_frame(struct irlap_cb *self, struct sk_buff *skb,
- struct irlap_info *info, int command)
-{
- info->nr = skb->data[1] >> 5;
-
- /* Check if this is a command or a response frame */
- if (command)
- irlap_do_event(self, RECV_SREJ_CMD, skb, info);
- else
- irlap_do_event(self, RECV_SREJ_RSP, skb, info);
-}
-
-static void irlap_recv_disc_frame(struct irlap_cb *self, struct sk_buff *skb,
- struct irlap_info *info, int command)
-{
- /* Check if this is a command or a response frame */
- if (command)
- irlap_do_event(self, RECV_DISC_CMD, skb, info);
- else
- irlap_do_event(self, RECV_RD_RSP, skb, info);
-}
-
-/*
- * Function irlap_recv_ua_frame (skb, frame)
- *
- * Received UA (Unnumbered Acknowledgement) frame
- *
- */
-static inline void irlap_recv_ua_frame(struct irlap_cb *self,
- struct sk_buff *skb,
- struct irlap_info *info)
-{
- irlap_do_event(self, RECV_UA_RSP, skb, info);
-}
-
-/*
- * Function irlap_send_data_primary(self, skb)
- *
- * Send I-frames as the primary station but without the poll bit set
- *
- */
-void irlap_send_data_primary(struct irlap_cb *self, struct sk_buff *skb)
-{
- struct sk_buff *tx_skb;
-
- if (skb->data[1] == I_FRAME) {
-
- /*
- * Insert frame sequence number (Vs) in control field before
- * inserting into transmit window queue.
- */
- skb->data[1] = I_FRAME | (self->vs << 1);
-
- /*
- * Insert frame in store, in case of retransmissions
- * Increase skb reference count, see irlap_do_event()
- */
- skb_get(skb);
- skb_queue_tail(&self->wx_list, skb);
-
- /* Copy buffer */
- tx_skb = skb_clone(skb, GFP_ATOMIC);
- if (tx_skb == NULL) {
- return;
- }
-
- self->vs = (self->vs + 1) % 8;
- self->ack_required = FALSE;
- self->window -= 1;
-
- irlap_send_i_frame( self, tx_skb, CMD_FRAME);
- } else {
- pr_debug("%s(), sending unreliable frame\n", __func__);
- irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME);
- self->window -= 1;
- }
-}
-/*
- * Function irlap_send_data_primary_poll (self, skb)
- *
- * Send I(nformation) frame as primary with poll bit set
- */
-void irlap_send_data_primary_poll(struct irlap_cb *self, struct sk_buff *skb)
-{
- struct sk_buff *tx_skb;
- int transmission_time;
-
- /* Stop P timer */
- del_timer(&self->poll_timer);
-
- /* Is this reliable or unreliable data? */
- if (skb->data[1] == I_FRAME) {
-
- /*
- * Insert frame sequence number (Vs) in control field before
- * inserting into transmit window queue.
- */
- skb->data[1] = I_FRAME | (self->vs << 1);
-
- /*
- * Insert frame in store, in case of retransmissions
- * Increase skb reference count, see irlap_do_event()
- */
- skb_get(skb);
- skb_queue_tail(&self->wx_list, skb);
-
- /* Copy buffer */
- tx_skb = skb_clone(skb, GFP_ATOMIC);
- if (tx_skb == NULL) {
- return;
- }
-
- /*
- * Set poll bit if necessary. We do this to the copied
- * skb, since retransmitted need to set or clear the poll
- * bit depending on when they are sent.
- */
- tx_skb->data[1] |= PF_BIT;
-
- self->vs = (self->vs + 1) % 8;
- self->ack_required = FALSE;
-
- irlap_next_state(self, LAP_NRM_P);
- irlap_send_i_frame(self, tx_skb, CMD_FRAME);
- } else {
- pr_debug("%s(), sending unreliable frame\n", __func__);
-
- if (self->ack_required) {
- irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME);
- irlap_next_state(self, LAP_NRM_P);
- irlap_send_rr_frame(self, CMD_FRAME);
- self->ack_required = FALSE;
- } else {
- skb->data[1] |= PF_BIT;
- irlap_next_state(self, LAP_NRM_P);
- irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME);
- }
- }
-
- /* How much time we took for transmission of all frames.
- * We don't know, so let assume we used the full window. Jean II */
- transmission_time = self->final_timeout;
-
- /* Reset parameter so that we can fill next window */
- self->window = self->window_size;
-
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
- /* Remove what we have not used. Just do a prorata of the
- * bytes left in window to window capacity.
- * See max_line_capacities[][] in qos.c for details. Jean II */
- transmission_time -= (self->final_timeout * self->bytes_left
- / self->line_capacity);
- pr_debug("%s() adjusting transmission_time : ft=%d, bl=%d, lc=%d -> tt=%d\n",
- __func__, self->final_timeout, self->bytes_left,
- self->line_capacity, transmission_time);
-
- /* We are allowed to transmit a maximum number of bytes again. */
- self->bytes_left = self->line_capacity;
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
-
- /*
- * The network layer has a intermediate buffer between IrLAP
- * and the IrDA driver which can contain 8 frames. So, even
- * though IrLAP is currently sending the *last* frame of the
- * tx-window, the driver most likely has only just started
- * sending the *first* frame of the same tx-window.
- * I.e. we are always at the very beginning of or Tx window.
- * Now, we are supposed to set the final timer from the end
- * of our tx-window to let the other peer reply. So, we need
- * to add extra time to compensate for the fact that we
- * are really at the start of tx-window, otherwise the final timer
- * might expire before he can answer...
- * Jean II
- */
- irlap_start_final_timer(self, self->final_timeout + transmission_time);
-
- /*
- * The clever amongst you might ask why we do this adjustement
- * only here, and not in all the other cases in irlap_event.c.
- * In all those other case, we only send a very short management
- * frame (few bytes), so the adjustement would be lost in the
- * noise...
- * The exception of course is irlap_resend_rejected_frame().
- * Jean II */
-}
-
-/*
- * Function irlap_send_data_secondary_final (self, skb)
- *
- * Send I(nformation) frame as secondary with final bit set
- *
- */
-void irlap_send_data_secondary_final(struct irlap_cb *self,
- struct sk_buff *skb)
-{
- struct sk_buff *tx_skb = NULL;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
-
- /* Is this reliable or unreliable data? */
- if (skb->data[1] == I_FRAME) {
-
- /*
- * Insert frame sequence number (Vs) in control field before
- * inserting into transmit window queue.
- */
- skb->data[1] = I_FRAME | (self->vs << 1);
-
- /*
- * Insert frame in store, in case of retransmissions
- * Increase skb reference count, see irlap_do_event()
- */
- skb_get(skb);
- skb_queue_tail(&self->wx_list, skb);
-
- tx_skb = skb_clone(skb, GFP_ATOMIC);
- if (tx_skb == NULL) {
- return;
- }
-
- tx_skb->data[1] |= PF_BIT;
-
- self->vs = (self->vs + 1) % 8;
- self->ack_required = FALSE;
-
- irlap_send_i_frame(self, tx_skb, RSP_FRAME);
- } else {
- if (self->ack_required) {
- irlap_send_ui_frame(self, skb_get(skb), self->caddr, RSP_FRAME);
- irlap_send_rr_frame(self, RSP_FRAME);
- self->ack_required = FALSE;
- } else {
- skb->data[1] |= PF_BIT;
- irlap_send_ui_frame(self, skb_get(skb), self->caddr, RSP_FRAME);
- }
- }
-
- self->window = self->window_size;
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
- /* We are allowed to transmit a maximum number of bytes again. */
- self->bytes_left = self->line_capacity;
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
-
- irlap_start_wd_timer(self, self->wd_timeout);
-}
-
-/*
- * Function irlap_send_data_secondary (self, skb)
- *
- * Send I(nformation) frame as secondary without final bit set
- *
- */
-void irlap_send_data_secondary(struct irlap_cb *self, struct sk_buff *skb)
-{
- struct sk_buff *tx_skb = NULL;
-
- /* Is this reliable or unreliable data? */
- if (skb->data[1] == I_FRAME) {
-
- /*
- * Insert frame sequence number (Vs) in control field before
- * inserting into transmit window queue.
- */
- skb->data[1] = I_FRAME | (self->vs << 1);
-
- /*
- * Insert frame in store, in case of retransmissions
- * Increase skb reference count, see irlap_do_event()
- */
- skb_get(skb);
- skb_queue_tail(&self->wx_list, skb);
-
- tx_skb = skb_clone(skb, GFP_ATOMIC);
- if (tx_skb == NULL) {
- return;
- }
-
- self->vs = (self->vs + 1) % 8;
- self->ack_required = FALSE;
- self->window -= 1;
-
- irlap_send_i_frame(self, tx_skb, RSP_FRAME);
- } else {
- irlap_send_ui_frame(self, skb_get(skb), self->caddr, RSP_FRAME);
- self->window -= 1;
- }
-}
-
-/*
- * Function irlap_resend_rejected_frames (nr)
- *
- * Resend frames which has not been acknowledged. Should be safe to
- * traverse the list without locking it since this function will only be
- * called from interrupt context (BH)
- */
-void irlap_resend_rejected_frames(struct irlap_cb *self, int command)
-{
- struct sk_buff *tx_skb;
- struct sk_buff *skb;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- /* Resend unacknowledged frame(s) */
- skb_queue_walk(&self->wx_list, skb) {
- irlap_wait_min_turn_around(self, &self->qos_tx);
-
- /* We copy the skb to be retransmitted since we will have to
- * modify it. Cloning will confuse packet sniffers
- */
- /* tx_skb = skb_clone( skb, GFP_ATOMIC); */
- tx_skb = skb_copy(skb, GFP_ATOMIC);
- if (!tx_skb) {
- pr_debug("%s(), unable to copy\n", __func__);
- return;
- }
-
- /* Clear old Nr field + poll bit */
- tx_skb->data[1] &= 0x0f;
-
- /*
- * Set poll bit on the last frame retransmitted
- */
- if (skb_queue_is_last(&self->wx_list, skb))
- tx_skb->data[1] |= PF_BIT; /* Set p/f bit */
- else
- tx_skb->data[1] &= ~PF_BIT; /* Clear p/f bit */
-
- irlap_send_i_frame(self, tx_skb, command);
- }
-#if 0 /* Not yet */
- /*
- * We can now fill the window with additional data frames
- */
- while (!skb_queue_empty(&self->txq)) {
-
- pr_debug("%s(), sending additional frames!\n", __func__);
- if (self->window > 0) {
- skb = skb_dequeue( &self->txq);
- IRDA_ASSERT(skb != NULL, return;);
-
- /*
- * If send window > 1 then send frame with pf
- * bit cleared
- */
- if ((self->window > 1) &&
- !skb_queue_empty(&self->txq)) {
- irlap_send_data_primary(self, skb);
- } else {
- irlap_send_data_primary_poll(self, skb);
- }
- kfree_skb(skb);
- }
- }
-#endif
-}
-
-void irlap_resend_rejected_frame(struct irlap_cb *self, int command)
-{
- struct sk_buff *tx_skb;
- struct sk_buff *skb;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- /* Resend unacknowledged frame(s) */
- skb = skb_peek(&self->wx_list);
- if (skb != NULL) {
- irlap_wait_min_turn_around(self, &self->qos_tx);
-
- /* We copy the skb to be retransmitted since we will have to
- * modify it. Cloning will confuse packet sniffers
- */
- /* tx_skb = skb_clone( skb, GFP_ATOMIC); */
- tx_skb = skb_copy(skb, GFP_ATOMIC);
- if (!tx_skb) {
- pr_debug("%s(), unable to copy\n", __func__);
- return;
- }
-
- /* Clear old Nr field + poll bit */
- tx_skb->data[1] &= 0x0f;
-
- /* Set poll/final bit */
- tx_skb->data[1] |= PF_BIT; /* Set p/f bit */
-
- irlap_send_i_frame(self, tx_skb, command);
- }
-}
-
-/*
- * Function irlap_send_ui_frame (self, skb, command)
- *
- * Contruct and transmit an Unnumbered Information (UI) frame
- *
- */
-void irlap_send_ui_frame(struct irlap_cb *self, struct sk_buff *skb,
- __u8 caddr, int command)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
-
- /* Insert connection address */
- skb->data[0] = caddr | ((command) ? CMD_FRAME : 0);
-
- irlap_queue_xmit(self, skb);
-}
-
-/*
- * Function irlap_send_i_frame (skb)
- *
- * Contruct and transmit Information (I) frame
- */
-static void irlap_send_i_frame(struct irlap_cb *self, struct sk_buff *skb,
- int command)
-{
- /* Insert connection address */
- skb->data[0] = self->caddr;
- skb->data[0] |= (command) ? CMD_FRAME : 0;
-
- /* Insert next to receive (Vr) */
- skb->data[1] |= (self->vr << 5); /* insert nr */
-
- irlap_queue_xmit(self, skb);
-}
-
-/*
- * Function irlap_recv_i_frame (skb, frame)
- *
- * Receive and parse an I (Information) frame, no harm in making it inline
- * since it's called only from one single place (irlap_driver_rcv).
- */
-static inline void irlap_recv_i_frame(struct irlap_cb *self,
- struct sk_buff *skb,
- struct irlap_info *info, int command)
-{
- info->nr = skb->data[1] >> 5; /* Next to receive */
- info->pf = skb->data[1] & PF_BIT; /* Final bit */
- info->ns = (skb->data[1] >> 1) & 0x07; /* Next to send */
-
- /* Check if this is a command or a response frame */
- if (command)
- irlap_do_event(self, RECV_I_CMD, skb, info);
- else
- irlap_do_event(self, RECV_I_RSP, skb, info);
-}
-
-/*
- * Function irlap_recv_ui_frame (self, skb, info)
- *
- * Receive and parse an Unnumbered Information (UI) frame
- *
- */
-static void irlap_recv_ui_frame(struct irlap_cb *self, struct sk_buff *skb,
- struct irlap_info *info)
-{
- info->pf = skb->data[1] & PF_BIT; /* Final bit */
-
- irlap_do_event(self, RECV_UI_FRAME, skb, info);
-}
-
-/*
- * Function irlap_recv_frmr_frame (skb, frame)
- *
- * Received Frame Reject response.
- *
- */
-static void irlap_recv_frmr_frame(struct irlap_cb *self, struct sk_buff *skb,
- struct irlap_info *info)
-{
- __u8 *frame;
- int w, x, y, z;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
- IRDA_ASSERT(info != NULL, return;);
-
- if (!pskb_may_pull(skb, 4)) {
- net_err_ratelimited("%s: frame too short!\n", __func__);
- return;
- }
-
- frame = skb->data;
-
- info->nr = frame[2] >> 5; /* Next to receive */
- info->pf = frame[2] & PF_BIT; /* Final bit */
- info->ns = (frame[2] >> 1) & 0x07; /* Next to send */
-
- w = frame[3] & 0x01;
- x = frame[3] & 0x02;
- y = frame[3] & 0x04;
- z = frame[3] & 0x08;
-
- if (w) {
- pr_debug("Rejected control field is undefined or not implemented\n");
- }
- if (x) {
- pr_debug("Rejected control field was invalid because it contained a non permitted I field\n");
- }
- if (y) {
- pr_debug("Received I field exceeded the maximum negotiated for the existing connection or exceeded the maximum this station supports if no connection exists\n");
- }
- if (z) {
- pr_debug("Rejected control field control field contained an invalid Nr count\n");
- }
- irlap_do_event(self, RECV_FRMR_RSP, skb, info);
-}
-
-/*
- * Function irlap_send_test_frame (self, daddr)
- *
- * Send a test frame response
- *
- */
-void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr,
- struct sk_buff *cmd)
-{
- struct sk_buff *tx_skb;
- struct test_frame *frame;
-
- tx_skb = alloc_skb(cmd->len + sizeof(struct test_frame), GFP_ATOMIC);
- if (!tx_skb)
- return;
-
- /* Broadcast frames must include saddr and daddr fields */
- if (caddr == CBROADCAST) {
- frame = skb_put(tx_skb, sizeof(struct test_frame));
-
- /* Insert the swapped addresses */
- frame->saddr = cpu_to_le32(self->saddr);
- frame->daddr = cpu_to_le32(daddr);
- } else
- frame = skb_put(tx_skb, LAP_ADDR_HEADER + LAP_CTRL_HEADER);
-
- frame->caddr = caddr;
- frame->control = TEST_RSP | PF_BIT;
-
- /* Copy info */
- skb_put_data(tx_skb, cmd->data, cmd->len);
-
- /* Return to sender */
- irlap_wait_min_turn_around(self, &self->qos_tx);
- irlap_queue_xmit(self, tx_skb);
-}
-
-/*
- * Function irlap_recv_test_frame (self, skb)
- *
- * Receive a test frame
- *
- */
-static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
- struct irlap_info *info, int command)
-{
- struct test_frame *frame;
-
- if (!pskb_may_pull(skb, sizeof(*frame))) {
- net_err_ratelimited("%s: frame too short!\n", __func__);
- return;
- }
- frame = (struct test_frame *) skb->data;
-
- /* Broadcast frames must carry saddr and daddr fields */
- if (info->caddr == CBROADCAST) {
- if (skb->len < sizeof(struct test_frame)) {
- pr_debug("%s() test frame too short!\n",
- __func__);
- return;
- }
-
- /* Read and swap addresses */
- info->daddr = le32_to_cpu(frame->saddr);
- info->saddr = le32_to_cpu(frame->daddr);
-
- /* Make sure frame is addressed to us */
- if ((info->saddr != self->saddr) &&
- (info->saddr != BROADCAST)) {
- return;
- }
- }
-
- if (command)
- irlap_do_event(self, RECV_TEST_CMD, skb, info);
- else
- irlap_do_event(self, RECV_TEST_RSP, skb, info);
-}
-
-/*
- * Function irlap_driver_rcv (skb, netdev, ptype)
- *
- * Called when a frame is received. Dispatches the right receive function
- * for processing of the frame.
- *
- * Note on skb management :
- * After calling the higher layers of the IrDA stack, we always
- * kfree() the skb, which drop the reference count (and potentially
- * destroy it).
- * If a higher layer of the stack want to keep the skb around (to put
- * in a queue or pass it to the higher layer), it will need to use
- * skb_get() to keep a reference on it. This is usually done at the
- * LMP level in irlmp.c.
- * Jean II
- */
-int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *ptype, struct net_device *orig_dev)
-{
- struct irlap_info info;
- struct irlap_cb *self;
- int command;
- __u8 control;
- int ret = -1;
-
- if (!net_eq(dev_net(dev), &init_net))
- goto out;
-
- /* FIXME: should we get our own field? */
- self = (struct irlap_cb *) dev->atalk_ptr;
-
- /* If the net device is down, then IrLAP is gone! */
- if (!self || self->magic != LAP_MAGIC)
- goto err;
-
- /* We are no longer an "old" protocol, so we need to handle
- * share and non linear skbs. This should never happen, so
- * we don't need to be clever about it. Jean II */
- if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
- net_err_ratelimited("%s: can't clone shared skb!\n", __func__);
- goto err;
- }
-
- /* Check if frame is large enough for parsing */
- if (!pskb_may_pull(skb, 2)) {
- net_err_ratelimited("%s: frame too short!\n", __func__);
- goto err;
- }
-
- command = skb->data[0] & CMD_FRAME;
- info.caddr = skb->data[0] & CBROADCAST;
-
- info.pf = skb->data[1] & PF_BIT;
- info.control = skb->data[1] & ~PF_BIT; /* Mask away poll/final bit */
-
- control = info.control;
-
- /* First we check if this frame has a valid connection address */
- if ((info.caddr != self->caddr) && (info.caddr != CBROADCAST)) {
- pr_debug("%s(), wrong connection address!\n",
- __func__);
- goto out;
- }
- /*
- * Optimize for the common case and check if the frame is an
- * I(nformation) frame. Only I-frames have bit 0 set to 0
- */
- if (~control & 0x01) {
- irlap_recv_i_frame(self, skb, &info, command);
- goto out;
- }
- /*
- * We now check is the frame is an S(upervisory) frame. Only
- * S-frames have bit 0 set to 1 and bit 1 set to 0
- */
- if (~control & 0x02) {
- /*
- * Received S(upervisory) frame, check which frame type it is
- * only the first nibble is of interest
- */
- switch (control & 0x0f) {
- case RR:
- irlap_recv_rr_frame(self, skb, &info, command);
- break;
- case RNR:
- irlap_recv_rnr_frame(self, skb, &info, command);
- break;
- case REJ:
- irlap_recv_rej_frame(self, skb, &info, command);
- break;
- case SREJ:
- irlap_recv_srej_frame(self, skb, &info, command);
- break;
- default:
- net_warn_ratelimited("%s: Unknown S-frame %02x received!\n",
- __func__, info.control);
- break;
- }
- goto out;
- }
- /*
- * This must be a C(ontrol) frame
- */
- switch (control) {
- case XID_RSP:
- irlap_recv_discovery_xid_rsp(self, skb, &info);
- break;
- case XID_CMD:
- irlap_recv_discovery_xid_cmd(self, skb, &info);
- break;
- case SNRM_CMD:
- irlap_recv_snrm_cmd(self, skb, &info);
- break;
- case DM_RSP:
- irlap_do_event(self, RECV_DM_RSP, skb, &info);
- break;
- case DISC_CMD: /* And RD_RSP since they have the same value */
- irlap_recv_disc_frame(self, skb, &info, command);
- break;
- case TEST_CMD:
- irlap_recv_test_frame(self, skb, &info, command);
- break;
- case UA_RSP:
- irlap_recv_ua_frame(self, skb, &info);
- break;
- case FRMR_RSP:
- irlap_recv_frmr_frame(self, skb, &info);
- break;
- case UI_FRAME:
- irlap_recv_ui_frame(self, skb, &info);
- break;
- default:
- net_warn_ratelimited("%s: Unknown frame %02x received!\n",
- __func__, info.control);
- break;
- }
-out:
- ret = 0;
-err:
- /* Always drop our reference on the skb */
- dev_kfree_skb(skb);
- return ret;
-}
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
deleted file mode 100644
index 43964594aa12..000000000000
--- a/net/irda/irlmp.c
+++ /dev/null
@@ -1,1996 +0,0 @@
-/*********************************************************************
- *
- * Filename: irlmp.c
- * Version: 1.0
- * Description: IrDA Link Management Protocol (LMP) layer
- * Status: Stable.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sun Aug 17 20:54:32 1997
- * Modified at: Wed Jan 5 11:26:03 2000
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>,
- * All Rights Reserved.
- * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/skbuff.h>
-#include <linux/types.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/kmod.h>
-#include <linux/random.h>
-#include <linux/seq_file.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/timer.h>
-#include <net/irda/qos.h>
-#include <net/irda/irlap.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irlmp_frame.h>
-
-#include <asm/unaligned.h>
-
-static __u8 irlmp_find_free_slsap(void);
-static int irlmp_slsap_inuse(__u8 slsap_sel);
-
-/* Master structure */
-struct irlmp_cb *irlmp = NULL;
-
-/* These can be altered by the sysctl interface */
-int sysctl_discovery = 0;
-int sysctl_discovery_timeout = 3; /* 3 seconds by default */
-int sysctl_discovery_slots = 6; /* 6 slots by default */
-int sysctl_lap_keepalive_time = LM_IDLE_TIMEOUT * 1000 / HZ;
-char sysctl_devname[65];
-
-static const char *irlmp_reasons[] = {
- "ERROR, NOT USED",
- "LM_USER_REQUEST",
- "LM_LAP_DISCONNECT",
- "LM_CONNECT_FAILURE",
- "LM_LAP_RESET",
- "LM_INIT_DISCONNECT",
- "ERROR, NOT USED",
- "UNKNOWN",
-};
-
-const char *irlmp_reason_str(LM_REASON reason)
-{
- reason = min_t(size_t, reason, ARRAY_SIZE(irlmp_reasons) - 1);
- return irlmp_reasons[reason];
-}
-
-/*
- * Function irlmp_init (void)
- *
- * Create (allocate) the main IrLMP structure
- *
- */
-int __init irlmp_init(void)
-{
- /* Initialize the irlmp structure. */
- irlmp = kzalloc( sizeof(struct irlmp_cb), GFP_KERNEL);
- if (irlmp == NULL)
- return -ENOMEM;
-
- irlmp->magic = LMP_MAGIC;
-
- irlmp->clients = hashbin_new(HB_LOCK);
- irlmp->services = hashbin_new(HB_LOCK);
- irlmp->links = hashbin_new(HB_LOCK);
- irlmp->unconnected_lsaps = hashbin_new(HB_LOCK);
- irlmp->cachelog = hashbin_new(HB_NOLOCK);
-
- if ((irlmp->clients == NULL) ||
- (irlmp->services == NULL) ||
- (irlmp->links == NULL) ||
- (irlmp->unconnected_lsaps == NULL) ||
- (irlmp->cachelog == NULL)) {
- return -ENOMEM;
- }
-
- spin_lock_init(&irlmp->cachelog->hb_spinlock);
-
- irlmp->last_lsap_sel = 0x0f; /* Reserved 0x00-0x0f */
- strcpy(sysctl_devname, "Linux");
-
- init_timer(&irlmp->discovery_timer);
-
- /* Do discovery every 3 seconds, conditionally */
- if (sysctl_discovery)
- irlmp_start_discovery_timer(irlmp,
- sysctl_discovery_timeout*HZ);
-
- return 0;
-}
-
-/*
- * Function irlmp_cleanup (void)
- *
- * Remove IrLMP layer
- *
- */
-void irlmp_cleanup(void)
-{
- /* Check for main structure */
- IRDA_ASSERT(irlmp != NULL, return;);
- IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return;);
-
- del_timer(&irlmp->discovery_timer);
-
- hashbin_delete(irlmp->links, (FREE_FUNC) kfree);
- hashbin_delete(irlmp->unconnected_lsaps, (FREE_FUNC) kfree);
- hashbin_delete(irlmp->clients, (FREE_FUNC) kfree);
- hashbin_delete(irlmp->services, (FREE_FUNC) kfree);
- hashbin_delete(irlmp->cachelog, (FREE_FUNC) kfree);
-
- /* De-allocate main structure */
- kfree(irlmp);
- irlmp = NULL;
-}
-
-/*
- * Function irlmp_open_lsap (slsap, notify)
- *
- * Register with IrLMP and create a local LSAP,
- * returns handle to LSAP.
- */
-struct lsap_cb *irlmp_open_lsap(__u8 slsap_sel, notify_t *notify, __u8 pid)
-{
- struct lsap_cb *self;
-
- IRDA_ASSERT(notify != NULL, return NULL;);
- IRDA_ASSERT(irlmp != NULL, return NULL;);
- IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return NULL;);
- IRDA_ASSERT(notify->instance != NULL, return NULL;);
-
- /* Does the client care which Source LSAP selector it gets? */
- if (slsap_sel == LSAP_ANY) {
- slsap_sel = irlmp_find_free_slsap();
- if (!slsap_sel)
- return NULL;
- } else if (irlmp_slsap_inuse(slsap_sel))
- return NULL;
-
- /* Allocate new instance of a LSAP connection */
- self = kzalloc(sizeof(struct lsap_cb), GFP_ATOMIC);
- if (self == NULL)
- return NULL;
-
- self->magic = LMP_LSAP_MAGIC;
- self->slsap_sel = slsap_sel;
-
- /* Fix connectionless LSAP's */
- if (slsap_sel == LSAP_CONNLESS) {
-#ifdef CONFIG_IRDA_ULTRA
- self->dlsap_sel = LSAP_CONNLESS;
- self->pid = pid;
-#endif /* CONFIG_IRDA_ULTRA */
- } else
- self->dlsap_sel = LSAP_ANY;
- /* self->connected = FALSE; -> already NULL via memset() */
-
- init_timer(&self->watchdog_timer);
-
- self->notify = *notify;
-
- self->lsap_state = LSAP_DISCONNECTED;
-
- /* Insert into queue of unconnected LSAPs */
- hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) self,
- (long) self, NULL);
-
- return self;
-}
-EXPORT_SYMBOL(irlmp_open_lsap);
-
-/*
- * Function __irlmp_close_lsap (self)
- *
- * Remove an instance of LSAP
- */
-static void __irlmp_close_lsap(struct lsap_cb *self)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
-
- /*
- * Set some of the variables to preset values
- */
- self->magic = 0;
- del_timer(&self->watchdog_timer); /* Important! */
-
- if (self->conn_skb)
- dev_kfree_skb(self->conn_skb);
-
- kfree(self);
-}
-
-/*
- * Function irlmp_close_lsap (self)
- *
- * Close and remove LSAP
- *
- */
-void irlmp_close_lsap(struct lsap_cb *self)
-{
- struct lap_cb *lap;
- struct lsap_cb *lsap = NULL;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
-
- /*
- * Find out if we should remove this LSAP from a link or from the
- * list of unconnected lsaps (not associated with a link)
- */
- lap = self->lap;
- if (lap) {
- IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;);
- /* We might close a LSAP before it has completed the
- * connection setup. In those case, higher layers won't
- * send a proper disconnect request. Harmless, except
- * that we will forget to close LAP... - Jean II */
- if(self->lsap_state != LSAP_DISCONNECTED) {
- self->lsap_state = LSAP_DISCONNECTED;
- irlmp_do_lap_event(self->lap,
- LM_LAP_DISCONNECT_REQUEST, NULL);
- }
- /* Now, remove from the link */
- lsap = hashbin_remove(lap->lsaps, (long) self, NULL);
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
- lap->cache.valid = FALSE;
-#endif
- }
- self->lap = NULL;
- /* Check if we found the LSAP! If not then try the unconnected lsaps */
- if (!lsap) {
- lsap = hashbin_remove(irlmp->unconnected_lsaps, (long) self,
- NULL);
- }
- if (!lsap) {
- pr_debug("%s(), Looks like somebody has removed me already!\n",
- __func__);
- return;
- }
- __irlmp_close_lsap(self);
-}
-EXPORT_SYMBOL(irlmp_close_lsap);
-
-/*
- * Function irlmp_register_irlap (saddr, notify)
- *
- * Register IrLAP layer with IrLMP. There is possible to have multiple
- * instances of the IrLAP layer, each connected to different IrDA ports
- *
- */
-void irlmp_register_link(struct irlap_cb *irlap, __u32 saddr, notify_t *notify)
-{
- struct lap_cb *lap;
-
- IRDA_ASSERT(irlmp != NULL, return;);
- IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return;);
- IRDA_ASSERT(notify != NULL, return;);
-
- /*
- * Allocate new instance of a LSAP connection
- */
- lap = kzalloc(sizeof(struct lap_cb), GFP_KERNEL);
- if (lap == NULL)
- return;
-
- lap->irlap = irlap;
- lap->magic = LMP_LAP_MAGIC;
- lap->saddr = saddr;
- lap->daddr = DEV_ADDR_ANY;
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
- lap->cache.valid = FALSE;
-#endif
- lap->lsaps = hashbin_new(HB_LOCK);
- if (lap->lsaps == NULL) {
- net_warn_ratelimited("%s(), unable to kmalloc lsaps\n",
- __func__);
- kfree(lap);
- return;
- }
-
- lap->lap_state = LAP_STANDBY;
-
- init_timer(&lap->idle_timer);
-
- /*
- * Insert into queue of LMP links
- */
- hashbin_insert(irlmp->links, (irda_queue_t *) lap, lap->saddr, NULL);
-
- /*
- * We set only this variable so IrLAP can tell us on which link the
- * different events happened on
- */
- irda_notify_init(notify);
- notify->instance = lap;
-}
-
-/*
- * Function irlmp_unregister_irlap (saddr)
- *
- * IrLAP layer has been removed!
- *
- */
-void irlmp_unregister_link(__u32 saddr)
-{
- struct lap_cb *link;
-
- /* We must remove ourselves from the hashbin *first*. This ensure
- * that no more LSAPs will be open on this link and no discovery
- * will be triggered anymore. Jean II */
- link = hashbin_remove(irlmp->links, saddr, NULL);
- if (link) {
- IRDA_ASSERT(link->magic == LMP_LAP_MAGIC, return;);
-
- /* Kill all the LSAPs on this link. Jean II */
- link->reason = LAP_DISC_INDICATION;
- link->daddr = DEV_ADDR_ANY;
- irlmp_do_lap_event(link, LM_LAP_DISCONNECT_INDICATION, NULL);
-
- /* Remove all discoveries discovered at this link */
- irlmp_expire_discoveries(irlmp->cachelog, link->saddr, TRUE);
-
- /* Final cleanup */
- del_timer(&link->idle_timer);
- link->magic = 0;
- hashbin_delete(link->lsaps, (FREE_FUNC) __irlmp_close_lsap);
- kfree(link);
- }
-}
-
-/*
- * Function irlmp_connect_request (handle, dlsap, userdata)
- *
- * Connect with a peer LSAP
- *
- */
-int irlmp_connect_request(struct lsap_cb *self, __u8 dlsap_sel,
- __u32 saddr, __u32 daddr,
- struct qos_info *qos, struct sk_buff *userdata)
-{
- struct sk_buff *tx_skb = userdata;
- struct lap_cb *lap;
- struct lsap_cb *lsap;
- int ret;
-
- IRDA_ASSERT(self != NULL, return -EBADR;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -EBADR;);
-
- pr_debug("%s(), slsap_sel=%02x, dlsap_sel=%02x, saddr=%08x, daddr=%08x\n",
- __func__, self->slsap_sel, dlsap_sel, saddr, daddr);
-
- if (test_bit(0, &self->connected)) {
- ret = -EISCONN;
- goto err;
- }
-
- /* Client must supply destination device address */
- if (!daddr) {
- ret = -EINVAL;
- goto err;
- }
-
- /* Any userdata? */
- if (tx_skb == NULL) {
- tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
- if (!tx_skb)
- return -ENOMEM;
-
- skb_reserve(tx_skb, LMP_MAX_HEADER);
- }
-
- /* Make room for MUX control header (3 bytes) */
- IRDA_ASSERT(skb_headroom(tx_skb) >= LMP_CONTROL_HEADER, return -1;);
- skb_push(tx_skb, LMP_CONTROL_HEADER);
-
- self->dlsap_sel = dlsap_sel;
-
- /*
- * Find the link to where we should try to connect since there may
- * be more than one IrDA port on this machine. If the client has
- * passed us the saddr (and already knows which link to use), then
- * we use that to find the link, if not then we have to look in the
- * discovery log and check if any of the links has discovered a
- * device with the given daddr
- */
- if ((!saddr) || (saddr == DEV_ADDR_ANY)) {
- discovery_t *discovery;
- unsigned long flags;
-
- spin_lock_irqsave(&irlmp->cachelog->hb_spinlock, flags);
- if (daddr != DEV_ADDR_ANY)
- discovery = hashbin_find(irlmp->cachelog, daddr, NULL);
- else {
- pr_debug("%s(), no daddr\n", __func__);
- discovery = (discovery_t *)
- hashbin_get_first(irlmp->cachelog);
- }
-
- if (discovery) {
- saddr = discovery->data.saddr;
- daddr = discovery->data.daddr;
- }
- spin_unlock_irqrestore(&irlmp->cachelog->hb_spinlock, flags);
- }
- lap = hashbin_lock_find(irlmp->links, saddr, NULL);
- if (lap == NULL) {
- pr_debug("%s(), Unable to find a usable link!\n", __func__);
- ret = -EHOSTUNREACH;
- goto err;
- }
-
- /* Check if LAP is disconnected or already connected */
- if (lap->daddr == DEV_ADDR_ANY)
- lap->daddr = daddr;
- else if (lap->daddr != daddr) {
- /* Check if some LSAPs are active on this LAP */
- if (HASHBIN_GET_SIZE(lap->lsaps) == 0) {
- /* No active connection, but LAP hasn't been
- * disconnected yet (waiting for timeout in LAP).
- * Maybe we could give LAP a bit of help in this case.
- */
- pr_debug("%s(), sorry, but I'm waiting for LAP to timeout!\n",
- __func__);
- ret = -EAGAIN;
- goto err;
- }
-
- /* LAP is already connected to a different node, and LAP
- * can only talk to one node at a time */
- pr_debug("%s(), sorry, but link is busy!\n", __func__);
- ret = -EBUSY;
- goto err;
- }
-
- self->lap = lap;
-
- /*
- * Remove LSAP from list of unconnected LSAPs and insert it into the
- * list of connected LSAPs for the particular link
- */
- lsap = hashbin_remove(irlmp->unconnected_lsaps, (long) self, NULL);
-
- IRDA_ASSERT(lsap != NULL, return -1;);
- IRDA_ASSERT(lsap->magic == LMP_LSAP_MAGIC, return -1;);
- IRDA_ASSERT(lsap->lap != NULL, return -1;);
- IRDA_ASSERT(lsap->lap->magic == LMP_LAP_MAGIC, return -1;);
-
- hashbin_insert(self->lap->lsaps, (irda_queue_t *) self, (long) self,
- NULL);
-
- set_bit(0, &self->connected); /* TRUE */
-
- /*
- * User supplied qos specifications?
- */
- if (qos)
- self->qos = *qos;
-
- irlmp_do_lsap_event(self, LM_CONNECT_REQUEST, tx_skb);
-
- /* Drop reference count - see irlap_data_request(). */
- dev_kfree_skb(tx_skb);
-
- return 0;
-
-err:
- /* Cleanup */
- if(tx_skb)
- dev_kfree_skb(tx_skb);
- return ret;
-}
-EXPORT_SYMBOL(irlmp_connect_request);
-
-/*
- * Function irlmp_connect_indication (self)
- *
- * Incoming connection
- *
- */
-void irlmp_connect_indication(struct lsap_cb *self, struct sk_buff *skb)
-{
- int max_seg_size;
- int lap_header_size;
- int max_header_size;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
- IRDA_ASSERT(self->lap != NULL, return;);
-
- pr_debug("%s(), slsap_sel=%02x, dlsap_sel=%02x\n",
- __func__, self->slsap_sel, self->dlsap_sel);
-
- /* Note : self->lap is set in irlmp_link_data_indication(),
- * (case CONNECT_CMD:) because we have no way to set it here.
- * Similarly, self->dlsap_sel is usually set in irlmp_find_lsap().
- * Jean II */
-
- self->qos = *self->lap->qos;
-
- max_seg_size = self->lap->qos->data_size.value-LMP_HEADER;
- lap_header_size = IRLAP_GET_HEADER_SIZE(self->lap->irlap);
- max_header_size = LMP_HEADER + lap_header_size;
-
- /* Hide LMP_CONTROL_HEADER header from layer above */
- skb_pull(skb, LMP_CONTROL_HEADER);
-
- if (self->notify.connect_indication) {
- /* Don't forget to refcount it - see irlap_driver_rcv(). */
- skb_get(skb);
- self->notify.connect_indication(self->notify.instance, self,
- &self->qos, max_seg_size,
- max_header_size, skb);
- }
-}
-
-/*
- * Function irlmp_connect_response (handle, userdata)
- *
- * Service user is accepting connection
- *
- */
-int irlmp_connect_response(struct lsap_cb *self, struct sk_buff *userdata)
-{
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
- IRDA_ASSERT(userdata != NULL, return -1;);
-
- /* We set the connected bit and move the lsap to the connected list
- * in the state machine itself. Jean II */
-
- pr_debug("%s(), slsap_sel=%02x, dlsap_sel=%02x\n",
- __func__, self->slsap_sel, self->dlsap_sel);
-
- /* Make room for MUX control header (3 bytes) */
- IRDA_ASSERT(skb_headroom(userdata) >= LMP_CONTROL_HEADER, return -1;);
- skb_push(userdata, LMP_CONTROL_HEADER);
-
- irlmp_do_lsap_event(self, LM_CONNECT_RESPONSE, userdata);
-
- /* Drop reference count - see irlap_data_request(). */
- dev_kfree_skb(userdata);
-
- return 0;
-}
-EXPORT_SYMBOL(irlmp_connect_response);
-
-/*
- * Function irlmp_connect_confirm (handle, skb)
- *
- * LSAP connection confirmed peer device!
- */
-void irlmp_connect_confirm(struct lsap_cb *self, struct sk_buff *skb)
-{
- int max_header_size;
- int lap_header_size;
- int max_seg_size;
-
- IRDA_ASSERT(skb != NULL, return;);
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
- IRDA_ASSERT(self->lap != NULL, return;);
-
- self->qos = *self->lap->qos;
-
- max_seg_size = self->lap->qos->data_size.value-LMP_HEADER;
- lap_header_size = IRLAP_GET_HEADER_SIZE(self->lap->irlap);
- max_header_size = LMP_HEADER + lap_header_size;
-
- pr_debug("%s(), max_header_size=%d\n",
- __func__, max_header_size);
-
- /* Hide LMP_CONTROL_HEADER header from layer above */
- skb_pull(skb, LMP_CONTROL_HEADER);
-
- if (self->notify.connect_confirm) {
- /* Don't forget to refcount it - see irlap_driver_rcv() */
- skb_get(skb);
- self->notify.connect_confirm(self->notify.instance, self,
- &self->qos, max_seg_size,
- max_header_size, skb);
- }
-}
-
-/*
- * Function irlmp_dup (orig, instance)
- *
- * Duplicate LSAP, can be used by servers to confirm a connection on a
- * new LSAP so it can keep listening on the old one.
- *
- */
-struct lsap_cb *irlmp_dup(struct lsap_cb *orig, void *instance)
-{
- struct lsap_cb *new;
- unsigned long flags;
-
- spin_lock_irqsave(&irlmp->unconnected_lsaps->hb_spinlock, flags);
-
- /* Only allowed to duplicate unconnected LSAP's, and only LSAPs
- * that have received a connect indication. Jean II */
- if ((!hashbin_find(irlmp->unconnected_lsaps, (long) orig, NULL)) ||
- (orig->lap == NULL)) {
- pr_debug("%s(), invalid LSAP (wrong state)\n",
- __func__);
- spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock,
- flags);
- return NULL;
- }
-
- /* Allocate a new instance */
- new = kmemdup(orig, sizeof(*new), GFP_ATOMIC);
- if (!new) {
- pr_debug("%s(), unable to kmalloc\n", __func__);
- spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock,
- flags);
- return NULL;
- }
- /* new->lap = orig->lap; => done in the memcpy() */
- /* new->slsap_sel = orig->slsap_sel; => done in the memcpy() */
- new->conn_skb = NULL;
-
- spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags);
-
- /* Not everything is the same */
- new->notify.instance = instance;
-
- init_timer(&new->watchdog_timer);
-
- hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) new,
- (long) new, NULL);
-
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
- /* Make sure that we invalidate the LSAP cache */
- new->lap->cache.valid = FALSE;
-#endif /* CONFIG_IRDA_CACHE_LAST_LSAP */
-
- return new;
-}
-
-/*
- * Function irlmp_disconnect_request (handle, userdata)
- *
- * The service user is requesting disconnection, this will not remove the
- * LSAP, but only mark it as disconnected
- */
-int irlmp_disconnect_request(struct lsap_cb *self, struct sk_buff *userdata)
-{
- struct lsap_cb *lsap;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
- IRDA_ASSERT(userdata != NULL, return -1;);
-
- /* Already disconnected ?
- * There is a race condition between irlmp_disconnect_indication()
- * and us that might mess up the hashbins below. This fixes it.
- * Jean II */
- if (! test_and_clear_bit(0, &self->connected)) {
- pr_debug("%s(), already disconnected!\n", __func__);
- dev_kfree_skb(userdata);
- return -1;
- }
-
- skb_push(userdata, LMP_CONTROL_HEADER);
-
- /*
- * Do the event before the other stuff since we must know
- * which lap layer that the frame should be transmitted on
- */
- irlmp_do_lsap_event(self, LM_DISCONNECT_REQUEST, userdata);
-
- /* Drop reference count - see irlap_data_request(). */
- dev_kfree_skb(userdata);
-
- /*
- * Remove LSAP from list of connected LSAPs for the particular link
- * and insert it into the list of unconnected LSAPs
- */
- IRDA_ASSERT(self->lap != NULL, return -1;);
- IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;);
- IRDA_ASSERT(self->lap->lsaps != NULL, return -1;);
-
- lsap = hashbin_remove(self->lap->lsaps, (long) self, NULL);
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
- self->lap->cache.valid = FALSE;
-#endif
-
- IRDA_ASSERT(lsap != NULL, return -1;);
- IRDA_ASSERT(lsap->magic == LMP_LSAP_MAGIC, return -1;);
- IRDA_ASSERT(lsap == self, return -1;);
-
- hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) self,
- (long) self, NULL);
-
- /* Reset some values */
- self->dlsap_sel = LSAP_ANY;
- self->lap = NULL;
-
- return 0;
-}
-EXPORT_SYMBOL(irlmp_disconnect_request);
-
-/*
- * Function irlmp_disconnect_indication (reason, userdata)
- *
- * LSAP is being closed!
- */
-void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason,
- struct sk_buff *skb)
-{
- struct lsap_cb *lsap;
-
- pr_debug("%s(), reason=%s [%d]\n", __func__,
- irlmp_reason_str(reason), reason);
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
-
- pr_debug("%s(), slsap_sel=%02x, dlsap_sel=%02x\n",
- __func__, self->slsap_sel, self->dlsap_sel);
-
- /* Already disconnected ?
- * There is a race condition between irlmp_disconnect_request()
- * and us that might mess up the hashbins below. This fixes it.
- * Jean II */
- if (! test_and_clear_bit(0, &self->connected)) {
- pr_debug("%s(), already disconnected!\n", __func__);
- return;
- }
-
- /*
- * Remove association between this LSAP and the link it used
- */
- IRDA_ASSERT(self->lap != NULL, return;);
- IRDA_ASSERT(self->lap->lsaps != NULL, return;);
-
- lsap = hashbin_remove(self->lap->lsaps, (long) self, NULL);
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
- self->lap->cache.valid = FALSE;
-#endif
-
- IRDA_ASSERT(lsap != NULL, return;);
- IRDA_ASSERT(lsap == self, return;);
- hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) lsap,
- (long) lsap, NULL);
-
- self->dlsap_sel = LSAP_ANY;
- self->lap = NULL;
-
- /*
- * Inform service user
- */
- if (self->notify.disconnect_indication) {
- /* Don't forget to refcount it - see irlap_driver_rcv(). */
- if(skb)
- skb_get(skb);
- self->notify.disconnect_indication(self->notify.instance,
- self, reason, skb);
- } else {
- pr_debug("%s(), no handler\n", __func__);
- }
-}
-
-/*
- * Function irlmp_do_expiry (void)
- *
- * Do a cleanup of the discovery log (remove old entries)
- *
- * Note : separate from irlmp_do_discovery() so that we can handle
- * passive discovery properly.
- */
-void irlmp_do_expiry(void)
-{
- struct lap_cb *lap;
-
- /*
- * Expire discovery on all links which are *not* connected.
- * On links which are connected, we can't do discovery
- * anymore and can't refresh the log, so we freeze the
- * discovery log to keep info about the device we are
- * connected to.
- * This info is mandatory if we want irlmp_connect_request()
- * to work properly. - Jean II
- */
- lap = (struct lap_cb *) hashbin_get_first(irlmp->links);
- while (lap != NULL) {
- IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;);
-
- if (lap->lap_state == LAP_STANDBY) {
- /* Expire discoveries discovered on this link */
- irlmp_expire_discoveries(irlmp->cachelog, lap->saddr,
- FALSE);
- }
- lap = (struct lap_cb *) hashbin_get_next(irlmp->links);
- }
-}
-
-/*
- * Function irlmp_do_discovery (nslots)
- *
- * Do some discovery on all links
- *
- * Note : log expiry is done above.
- */
-void irlmp_do_discovery(int nslots)
-{
- struct lap_cb *lap;
- __u16 *data_hintsp;
-
- /* Make sure the value is sane */
- if ((nslots != 1) && (nslots != 6) && (nslots != 8) && (nslots != 16)){
- net_warn_ratelimited("%s: invalid value for number of slots!\n",
- __func__);
- nslots = sysctl_discovery_slots = 8;
- }
-
- /* Construct new discovery info to be used by IrLAP, */
- data_hintsp = (__u16 *) irlmp->discovery_cmd.data.hints;
- put_unaligned(irlmp->hints.word, data_hintsp);
-
- /*
- * Set character set for device name (we use ASCII), and
- * copy device name. Remember to make room for a \0 at the
- * end
- */
- irlmp->discovery_cmd.data.charset = CS_ASCII;
- strncpy(irlmp->discovery_cmd.data.info, sysctl_devname,
- NICKNAME_MAX_LEN);
- irlmp->discovery_cmd.name_len = strlen(irlmp->discovery_cmd.data.info);
- irlmp->discovery_cmd.nslots = nslots;
-
- /*
- * Try to send discovery packets on all links
- */
- lap = (struct lap_cb *) hashbin_get_first(irlmp->links);
- while (lap != NULL) {
- IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;);
-
- if (lap->lap_state == LAP_STANDBY) {
- /* Try to discover */
- irlmp_do_lap_event(lap, LM_LAP_DISCOVERY_REQUEST,
- NULL);
- }
- lap = (struct lap_cb *) hashbin_get_next(irlmp->links);
- }
-}
-
-/*
- * Function irlmp_discovery_request (nslots)
- *
- * Do a discovery of devices in front of the computer
- *
- * If the caller has registered a client discovery callback, this
- * allow him to receive the full content of the discovery log through
- * this callback (as normally he will receive only new discoveries).
- */
-void irlmp_discovery_request(int nslots)
-{
- /* Return current cached discovery log (in full) */
- irlmp_discovery_confirm(irlmp->cachelog, DISCOVERY_LOG);
-
- /*
- * Start a single discovery operation if discovery is not already
- * running
- */
- if (!sysctl_discovery) {
- /* Check if user wants to override the default */
- if (nslots == DISCOVERY_DEFAULT_SLOTS)
- nslots = sysctl_discovery_slots;
-
- irlmp_do_discovery(nslots);
- /* Note : we never do expiry here. Expiry will run on the
- * discovery timer regardless of the state of sysctl_discovery
- * Jean II */
- }
-}
-EXPORT_SYMBOL(irlmp_discovery_request);
-
-/*
- * Function irlmp_get_discoveries (pn, mask, slots)
- *
- * Return the current discovery log
- *
- * If discovery is not enabled, you should call this function again
- * after 1 or 2 seconds (i.e. after discovery has been done).
- */
-struct irda_device_info *irlmp_get_discoveries(int *pn, __u16 mask, int nslots)
-{
- /* If discovery is not enabled, it's likely that the discovery log
- * will be empty. So, we trigger a single discovery, so that next
- * time the user call us there might be some results in the log.
- * Jean II
- */
- if (!sysctl_discovery) {
- /* Check if user wants to override the default */
- if (nslots == DISCOVERY_DEFAULT_SLOTS)
- nslots = sysctl_discovery_slots;
-
- /* Start discovery - will complete sometime later */
- irlmp_do_discovery(nslots);
- /* Note : we never do expiry here. Expiry will run on the
- * discovery timer regardless of the state of sysctl_discovery
- * Jean II */
- }
-
- /* Return current cached discovery log */
- return irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE);
-}
-EXPORT_SYMBOL(irlmp_get_discoveries);
-
-/*
- * Function irlmp_notify_client (log)
- *
- * Notify all about discovered devices
- *
- * Clients registered with IrLMP are :
- * o IrComm
- * o IrLAN
- * o Any socket (in any state - ouch, that may be a lot !)
- * The client may have defined a callback to be notified in case of
- * partial/selective discovery based on the hints that it passed to IrLMP.
- */
-static inline void
-irlmp_notify_client(irlmp_client_t *client,
- hashbin_t *log, DISCOVERY_MODE mode)
-{
- discinfo_t *discoveries; /* Copy of the discovery log */
- int number; /* Number of nodes in the log */
- int i;
-
- /* Check if client wants or not partial/selective log (optimisation) */
- if (!client->disco_callback)
- return;
-
- /*
- * Locking notes :
- * the old code was manipulating the log directly, which was
- * very racy. Now, we use copy_discoveries, that protects
- * itself while dumping the log for us.
- * The overhead of the copy is compensated by the fact that
- * we only pass new discoveries in normal mode and don't
- * pass the same old entry every 3s to the caller as we used
- * to do (virtual function calling is expensive).
- * Jean II
- */
-
- /*
- * Now, check all discovered devices (if any), and notify client
- * only about the services that the client is interested in
- * We also notify only about the new devices unless the caller
- * explicitly request a dump of the log. Jean II
- */
- discoveries = irlmp_copy_discoveries(log, &number,
- client->hint_mask.word,
- (mode == DISCOVERY_LOG));
- /* Check if the we got some results */
- if (discoveries == NULL)
- return; /* No nodes discovered */
-
- /* Pass all entries to the listener */
- for(i = 0; i < number; i++)
- client->disco_callback(&(discoveries[i]), mode, client->priv);
-
- /* Free up our buffer */
- kfree(discoveries);
-}
-
-/*
- * Function irlmp_discovery_confirm ( self, log)
- *
- * Some device(s) answered to our discovery request! Check to see which
- * device it is, and give indication to the client(s)
- *
- */
-void irlmp_discovery_confirm(hashbin_t *log, DISCOVERY_MODE mode)
-{
- irlmp_client_t *client;
- irlmp_client_t *client_next;
-
- IRDA_ASSERT(log != NULL, return;);
-
- if (!(HASHBIN_GET_SIZE(log)))
- return;
-
- /* For each client - notify callback may touch client list */
- client = (irlmp_client_t *) hashbin_get_first(irlmp->clients);
- while (NULL != hashbin_find_next(irlmp->clients, (long) client, NULL,
- (void *) &client_next) ) {
- /* Check if we should notify client */
- irlmp_notify_client(client, log, mode);
-
- client = client_next;
- }
-}
-
-/*
- * Function irlmp_discovery_expiry (expiry)
- *
- * This device is no longer been discovered, and therefore it is being
- * purged from the discovery log. Inform all clients who have
- * registered for this event...
- *
- * Note : called exclusively from discovery.c
- * Note : this is no longer called under discovery spinlock, so the
- * client can do whatever he wants in the callback.
- */
-void irlmp_discovery_expiry(discinfo_t *expiries, int number)
-{
- irlmp_client_t *client;
- irlmp_client_t *client_next;
- int i;
-
- IRDA_ASSERT(expiries != NULL, return;);
-
- /* For each client - notify callback may touch client list */
- client = (irlmp_client_t *) hashbin_get_first(irlmp->clients);
- while (NULL != hashbin_find_next(irlmp->clients, (long) client, NULL,
- (void *) &client_next) ) {
-
- /* Pass all entries to the listener */
- for(i = 0; i < number; i++) {
- /* Check if we should notify client */
- if ((client->expir_callback) &&
- (client->hint_mask.word &
- get_unaligned((__u16 *)expiries[i].hints)
- & 0x7f7f) )
- client->expir_callback(&(expiries[i]),
- EXPIRY_TIMEOUT,
- client->priv);
- }
-
- /* Next client */
- client = client_next;
- }
-}
-
-/*
- * Function irlmp_get_discovery_response ()
- *
- * Used by IrLAP to get the discovery info it needs when answering
- * discovery requests by other devices.
- */
-discovery_t *irlmp_get_discovery_response(void)
-{
- IRDA_ASSERT(irlmp != NULL, return NULL;);
-
- put_unaligned(irlmp->hints.word, (__u16 *)irlmp->discovery_rsp.data.hints);
-
- /*
- * Set character set for device name (we use ASCII), and
- * copy device name. Remember to make room for a \0 at the
- * end
- */
- irlmp->discovery_rsp.data.charset = CS_ASCII;
-
- strncpy(irlmp->discovery_rsp.data.info, sysctl_devname,
- NICKNAME_MAX_LEN);
- irlmp->discovery_rsp.name_len = strlen(irlmp->discovery_rsp.data.info);
-
- return &irlmp->discovery_rsp;
-}
-
-/*
- * Function irlmp_data_request (self, skb)
- *
- * Send some data to peer device
- *
- * Note on skb management :
- * After calling the lower layers of the IrDA stack, we always
- * kfree() the skb, which drop the reference count (and potentially
- * destroy it).
- * IrLMP and IrLAP may queue the packet, and in those cases will need
- * to use skb_get() to keep it around.
- * Jean II
- */
-int irlmp_data_request(struct lsap_cb *self, struct sk_buff *userdata)
-{
- int ret;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-
- /* Make room for MUX header */
- IRDA_ASSERT(skb_headroom(userdata) >= LMP_HEADER, return -1;);
- skb_push(userdata, LMP_HEADER);
-
- ret = irlmp_do_lsap_event(self, LM_DATA_REQUEST, userdata);
-
- /* Drop reference count - see irlap_data_request(). */
- dev_kfree_skb(userdata);
-
- return ret;
-}
-EXPORT_SYMBOL(irlmp_data_request);
-
-/*
- * Function irlmp_data_indication (handle, skb)
- *
- * Got data from LAP layer so pass it up to upper layer
- *
- */
-void irlmp_data_indication(struct lsap_cb *self, struct sk_buff *skb)
-{
- /* Hide LMP header from layer above */
- skb_pull(skb, LMP_HEADER);
-
- if (self->notify.data_indication) {
- /* Don't forget to refcount it - see irlap_driver_rcv(). */
- skb_get(skb);
- self->notify.data_indication(self->notify.instance, self, skb);
- }
-}
-
-/*
- * Function irlmp_udata_request (self, skb)
- */
-int irlmp_udata_request(struct lsap_cb *self, struct sk_buff *userdata)
-{
- int ret;
-
- IRDA_ASSERT(userdata != NULL, return -1;);
-
- /* Make room for MUX header */
- IRDA_ASSERT(skb_headroom(userdata) >= LMP_HEADER, return -1;);
- skb_push(userdata, LMP_HEADER);
-
- ret = irlmp_do_lsap_event(self, LM_UDATA_REQUEST, userdata);
-
- /* Drop reference count - see irlap_data_request(). */
- dev_kfree_skb(userdata);
-
- return ret;
-}
-
-/*
- * Function irlmp_udata_indication (self, skb)
- *
- * Send unreliable data (but still within the connection)
- *
- */
-void irlmp_udata_indication(struct lsap_cb *self, struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
-
- /* Hide LMP header from layer above */
- skb_pull(skb, LMP_HEADER);
-
- if (self->notify.udata_indication) {
- /* Don't forget to refcount it - see irlap_driver_rcv(). */
- skb_get(skb);
- self->notify.udata_indication(self->notify.instance, self,
- skb);
- }
-}
-
-/*
- * Function irlmp_connless_data_request (self, skb)
- */
-#ifdef CONFIG_IRDA_ULTRA
-int irlmp_connless_data_request(struct lsap_cb *self, struct sk_buff *userdata,
- __u8 pid)
-{
- struct sk_buff *clone_skb;
- struct lap_cb *lap;
-
- IRDA_ASSERT(userdata != NULL, return -1;);
-
- /* Make room for MUX and PID header */
- IRDA_ASSERT(skb_headroom(userdata) >= LMP_HEADER+LMP_PID_HEADER,
- return -1;);
-
- /* Insert protocol identifier */
- skb_push(userdata, LMP_PID_HEADER);
- if(self != NULL)
- userdata->data[0] = self->pid;
- else
- userdata->data[0] = pid;
-
- /* Connectionless sockets must use 0x70 */
- skb_push(userdata, LMP_HEADER);
- userdata->data[0] = userdata->data[1] = LSAP_CONNLESS;
-
- /* Try to send Connectionless packets out on all links */
- lap = (struct lap_cb *) hashbin_get_first(irlmp->links);
- while (lap != NULL) {
- IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return -1;);
-
- clone_skb = skb_clone(userdata, GFP_ATOMIC);
- if (!clone_skb) {
- dev_kfree_skb(userdata);
- return -ENOMEM;
- }
-
- irlap_unitdata_request(lap->irlap, clone_skb);
- /* irlap_unitdata_request() don't increase refcount,
- * so no dev_kfree_skb() - Jean II */
-
- lap = (struct lap_cb *) hashbin_get_next(irlmp->links);
- }
- dev_kfree_skb(userdata);
-
- return 0;
-}
-#endif /* CONFIG_IRDA_ULTRA */
-
-/*
- * Function irlmp_connless_data_indication (self, skb)
- *
- * Receive unreliable data outside any connection. Mostly used by Ultra
- *
- */
-#ifdef CONFIG_IRDA_ULTRA
-void irlmp_connless_data_indication(struct lsap_cb *self, struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
-
- /* Hide LMP and PID header from layer above */
- skb_pull(skb, LMP_HEADER+LMP_PID_HEADER);
-
- if (self->notify.udata_indication) {
- /* Don't forget to refcount it - see irlap_driver_rcv(). */
- skb_get(skb);
- self->notify.udata_indication(self->notify.instance, self,
- skb);
- }
-}
-#endif /* CONFIG_IRDA_ULTRA */
-
-/*
- * Propagate status indication from LAP to LSAPs (via LMP)
- * This don't trigger any change of state in lap_cb, lmp_cb or lsap_cb,
- * and the event is stateless, therefore we can bypass both state machines
- * and send the event direct to the LSAP user.
- * Jean II
- */
-void irlmp_status_indication(struct lap_cb *self,
- LINK_STATUS link, LOCK_STATUS lock)
-{
- struct lsap_cb *next;
- struct lsap_cb *curr;
-
- /* Send status_indication to all LSAPs using this link */
- curr = (struct lsap_cb *) hashbin_get_first( self->lsaps);
- while (NULL != hashbin_find_next(self->lsaps, (long) curr, NULL,
- (void *) &next) ) {
- IRDA_ASSERT(curr->magic == LMP_LSAP_MAGIC, return;);
- /*
- * Inform service user if he has requested it
- */
- if (curr->notify.status_indication != NULL)
- curr->notify.status_indication(curr->notify.instance,
- link, lock);
- else
- pr_debug("%s(), no handler\n", __func__);
-
- curr = next;
- }
-}
-
-/*
- * Receive flow control indication from LAP.
- * LAP want us to send it one more frame. We implement a simple round
- * robin scheduler between the active sockets so that we get a bit of
- * fairness. Note that the round robin is far from perfect, but it's
- * better than nothing.
- * We then poll the selected socket so that we can do synchronous
- * refilling of IrLAP (which allow to minimise the number of buffers).
- * Jean II
- */
-void irlmp_flow_indication(struct lap_cb *self, LOCAL_FLOW flow)
-{
- struct lsap_cb *next;
- struct lsap_cb *curr;
- int lsap_todo;
-
- IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
- IRDA_ASSERT(flow == FLOW_START, return;);
-
- /* Get the number of lsap. That's the only safe way to know
- * that we have looped around... - Jean II */
- lsap_todo = HASHBIN_GET_SIZE(self->lsaps);
- pr_debug("%s() : %d lsaps to scan\n", __func__, lsap_todo);
-
- /* Poll lsap in order until the queue is full or until we
- * tried them all.
- * Most often, the current LSAP will have something to send,
- * so we will go through this loop only once. - Jean II */
- while((lsap_todo--) &&
- (IRLAP_GET_TX_QUEUE_LEN(self->irlap) < LAP_HIGH_THRESHOLD)) {
- /* Try to find the next lsap we should poll. */
- next = self->flow_next;
- /* If we have no lsap, restart from first one */
- if(next == NULL)
- next = (struct lsap_cb *) hashbin_get_first(self->lsaps);
- /* Verify current one and find the next one */
- curr = hashbin_find_next(self->lsaps, (long) next, NULL,
- (void *) &self->flow_next);
- /* Uh-oh... Paranoia */
- if(curr == NULL)
- break;
- pr_debug("%s() : curr is %p, next was %p and is now %p, still %d to go - queue len = %d\n",
- __func__, curr, next, self->flow_next, lsap_todo,
- IRLAP_GET_TX_QUEUE_LEN(self->irlap));
-
- /* Inform lsap user that it can send one more packet. */
- if (curr->notify.flow_indication != NULL)
- curr->notify.flow_indication(curr->notify.instance,
- curr, flow);
- else
- pr_debug("%s(), no handler\n", __func__);
- }
-}
-
-#if 0
-/*
- * Function irlmp_hint_to_service (hint)
- *
- * Returns a list of all servics contained in the given hint bits. This
- * function assumes that the hint bits have the size of two bytes only
- */
-__u8 *irlmp_hint_to_service(__u8 *hint)
-{
- __u8 *service;
- int i = 0;
-
- /*
- * Allocate array to store services in. 16 entries should be safe
- * since we currently only support 2 hint bytes
- */
- service = kmalloc(16, GFP_ATOMIC);
- if (!service)
- return NULL;
-
- if (!hint[0]) {
- pr_debug("<None>\n");
- kfree(service);
- return NULL;
- }
- if (hint[0] & HINT_PNP)
- pr_debug("PnP Compatible ");
- if (hint[0] & HINT_PDA)
- pr_debug("PDA/Palmtop ");
- if (hint[0] & HINT_COMPUTER)
- pr_debug("Computer ");
- if (hint[0] & HINT_PRINTER) {
- pr_debug("Printer ");
- service[i++] = S_PRINTER;
- }
- if (hint[0] & HINT_MODEM)
- pr_debug("Modem ");
- if (hint[0] & HINT_FAX)
- pr_debug("Fax ");
- if (hint[0] & HINT_LAN) {
- pr_debug("LAN Access ");
- service[i++] = S_LAN;
- }
- /*
- * Test if extension byte exists. This byte will usually be
- * there, but this is not really required by the standard.
- * (IrLMP p. 29)
- */
- if (hint[0] & HINT_EXTENSION) {
- if (hint[1] & HINT_TELEPHONY) {
- pr_debug("Telephony ");
- service[i++] = S_TELEPHONY;
- }
- if (hint[1] & HINT_FILE_SERVER)
- pr_debug("File Server ");
-
- if (hint[1] & HINT_COMM) {
- pr_debug("IrCOMM ");
- service[i++] = S_COMM;
- }
- if (hint[1] & HINT_OBEX) {
- pr_debug("IrOBEX ");
- service[i++] = S_OBEX;
- }
- }
- pr_debug("\n");
-
- /* So that client can be notified about any discovery */
- service[i++] = S_ANY;
-
- service[i] = S_END;
-
- return service;
-}
-#endif
-
-static const __u16 service_hint_mapping[S_END][2] = {
- { HINT_PNP, 0 }, /* S_PNP */
- { HINT_PDA, 0 }, /* S_PDA */
- { HINT_COMPUTER, 0 }, /* S_COMPUTER */
- { HINT_PRINTER, 0 }, /* S_PRINTER */
- { HINT_MODEM, 0 }, /* S_MODEM */
- { HINT_FAX, 0 }, /* S_FAX */
- { HINT_LAN, 0 }, /* S_LAN */
- { HINT_EXTENSION, HINT_TELEPHONY }, /* S_TELEPHONY */
- { HINT_EXTENSION, HINT_COMM }, /* S_COMM */
- { HINT_EXTENSION, HINT_OBEX }, /* S_OBEX */
- { 0xFF, 0xFF }, /* S_ANY */
-};
-
-/*
- * Function irlmp_service_to_hint (service)
- *
- * Converts a service type, to a hint bit
- *
- * Returns: a 16 bit hint value, with the service bit set
- */
-__u16 irlmp_service_to_hint(int service)
-{
- __u16_host_order hint;
-
- hint.byte[0] = service_hint_mapping[service][0];
- hint.byte[1] = service_hint_mapping[service][1];
-
- return hint.word;
-}
-EXPORT_SYMBOL(irlmp_service_to_hint);
-
-/*
- * Function irlmp_register_service (service)
- *
- * Register local service with IrLMP
- *
- */
-void *irlmp_register_service(__u16 hints)
-{
- irlmp_service_t *service;
-
- pr_debug("%s(), hints = %04x\n", __func__, hints);
-
- /* Make a new registration */
- service = kmalloc(sizeof(irlmp_service_t), GFP_ATOMIC);
- if (!service)
- return NULL;
-
- service->hints.word = hints;
- hashbin_insert(irlmp->services, (irda_queue_t *) service,
- (long) service, NULL);
-
- irlmp->hints.word |= hints;
-
- return (void *)service;
-}
-EXPORT_SYMBOL(irlmp_register_service);
-
-/*
- * Function irlmp_unregister_service (handle)
- *
- * Unregister service with IrLMP.
- *
- * Returns: 0 on success, -1 on error
- */
-int irlmp_unregister_service(void *handle)
-{
- irlmp_service_t *service;
- unsigned long flags;
-
- if (!handle)
- return -1;
-
- /* Caller may call with invalid handle (it's legal) - Jean II */
- service = hashbin_lock_find(irlmp->services, (long) handle, NULL);
- if (!service) {
- pr_debug("%s(), Unknown service!\n", __func__);
- return -1;
- }
-
- hashbin_remove_this(irlmp->services, (irda_queue_t *) service);
- kfree(service);
-
- /* Remove old hint bits */
- irlmp->hints.word = 0;
-
- /* Refresh current hint bits */
- spin_lock_irqsave(&irlmp->services->hb_spinlock, flags);
- service = (irlmp_service_t *) hashbin_get_first(irlmp->services);
- while (service) {
- irlmp->hints.word |= service->hints.word;
-
- service = (irlmp_service_t *)hashbin_get_next(irlmp->services);
- }
- spin_unlock_irqrestore(&irlmp->services->hb_spinlock, flags);
- return 0;
-}
-EXPORT_SYMBOL(irlmp_unregister_service);
-
-/*
- * Function irlmp_register_client (hint_mask, callback1, callback2)
- *
- * Register a local client with IrLMP
- * First callback is selective discovery (based on hints)
- * Second callback is for selective discovery expiries
- *
- * Returns: handle > 0 on success, 0 on error
- */
-void *irlmp_register_client(__u16 hint_mask, DISCOVERY_CALLBACK1 disco_clb,
- DISCOVERY_CALLBACK2 expir_clb, void *priv)
-{
- irlmp_client_t *client;
-
- IRDA_ASSERT(irlmp != NULL, return NULL;);
-
- /* Make a new registration */
- client = kmalloc(sizeof(irlmp_client_t), GFP_ATOMIC);
- if (!client)
- return NULL;
-
- /* Register the details */
- client->hint_mask.word = hint_mask;
- client->disco_callback = disco_clb;
- client->expir_callback = expir_clb;
- client->priv = priv;
-
- hashbin_insert(irlmp->clients, (irda_queue_t *) client,
- (long) client, NULL);
-
- return (void *) client;
-}
-EXPORT_SYMBOL(irlmp_register_client);
-
-/*
- * Function irlmp_update_client (handle, hint_mask, callback1, callback2)
- *
- * Updates specified client (handle) with possibly new hint_mask and
- * callback
- *
- * Returns: 0 on success, -1 on error
- */
-int irlmp_update_client(void *handle, __u16 hint_mask,
- DISCOVERY_CALLBACK1 disco_clb,
- DISCOVERY_CALLBACK2 expir_clb, void *priv)
-{
- irlmp_client_t *client;
-
- if (!handle)
- return -1;
-
- client = hashbin_lock_find(irlmp->clients, (long) handle, NULL);
- if (!client) {
- pr_debug("%s(), Unknown client!\n", __func__);
- return -1;
- }
-
- client->hint_mask.word = hint_mask;
- client->disco_callback = disco_clb;
- client->expir_callback = expir_clb;
- client->priv = priv;
-
- return 0;
-}
-EXPORT_SYMBOL(irlmp_update_client);
-
-/*
- * Function irlmp_unregister_client (handle)
- *
- * Returns: 0 on success, -1 on error
- *
- */
-int irlmp_unregister_client(void *handle)
-{
- struct irlmp_client *client;
-
- if (!handle)
- return -1;
-
- /* Caller may call with invalid handle (it's legal) - Jean II */
- client = hashbin_lock_find(irlmp->clients, (long) handle, NULL);
- if (!client) {
- pr_debug("%s(), Unknown client!\n", __func__);
- return -1;
- }
-
- pr_debug("%s(), removing client!\n", __func__);
- hashbin_remove_this(irlmp->clients, (irda_queue_t *) client);
- kfree(client);
-
- return 0;
-}
-EXPORT_SYMBOL(irlmp_unregister_client);
-
-/*
- * Function irlmp_slsap_inuse (slsap)
- *
- * Check if the given source LSAP selector is in use
- *
- * This function is clearly not very efficient. On the mitigating side, the
- * stack make sure that in 99% of the cases, we are called only once
- * for each socket allocation. We could probably keep a bitmap
- * of the allocated LSAP, but I'm not sure the complexity is worth it.
- * Jean II
- */
-static int irlmp_slsap_inuse(__u8 slsap_sel)
-{
- struct lsap_cb *self;
- struct lap_cb *lap;
- unsigned long flags;
-
- IRDA_ASSERT(irlmp != NULL, return TRUE;);
- IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return TRUE;);
- IRDA_ASSERT(slsap_sel != LSAP_ANY, return TRUE;);
-
-#ifdef CONFIG_IRDA_ULTRA
- /* Accept all bindings to the connectionless LSAP */
- if (slsap_sel == LSAP_CONNLESS)
- return FALSE;
-#endif /* CONFIG_IRDA_ULTRA */
-
- /* Valid values are between 0 and 127 (0x0-0x6F) */
- if (slsap_sel > LSAP_MAX)
- return TRUE;
-
- /*
- * Check if slsap is already in use. To do this we have to loop over
- * every IrLAP connection and check every LSAP associated with each
- * the connection.
- */
- spin_lock_irqsave_nested(&irlmp->links->hb_spinlock, flags,
- SINGLE_DEPTH_NESTING);
- lap = (struct lap_cb *) hashbin_get_first(irlmp->links);
- while (lap != NULL) {
- IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, goto errlap;);
-
- /* Careful for priority inversions here !
- * irlmp->links is never taken while another IrDA
- * spinlock is held, so we are safe. Jean II */
- spin_lock(&lap->lsaps->hb_spinlock);
-
- /* For this IrLAP, check all the LSAPs */
- self = (struct lsap_cb *) hashbin_get_first(lap->lsaps);
- while (self != NULL) {
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC,
- goto errlsap;);
-
- if ((self->slsap_sel == slsap_sel)) {
- pr_debug("Source LSAP selector=%02x in use\n",
- self->slsap_sel);
- goto errlsap;
- }
- self = (struct lsap_cb*) hashbin_get_next(lap->lsaps);
- }
- spin_unlock(&lap->lsaps->hb_spinlock);
-
- /* Next LAP */
- lap = (struct lap_cb *) hashbin_get_next(irlmp->links);
- }
- spin_unlock_irqrestore(&irlmp->links->hb_spinlock, flags);
-
- /*
- * Server sockets are typically waiting for connections and
- * therefore reside in the unconnected list. We don't want
- * to give out their LSAPs for obvious reasons...
- * Jean II
- */
- spin_lock_irqsave(&irlmp->unconnected_lsaps->hb_spinlock, flags);
-
- self = (struct lsap_cb *) hashbin_get_first(irlmp->unconnected_lsaps);
- while (self != NULL) {
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, goto erruncon;);
- if ((self->slsap_sel == slsap_sel)) {
- pr_debug("Source LSAP selector=%02x in use (unconnected)\n",
- self->slsap_sel);
- goto erruncon;
- }
- self = (struct lsap_cb*) hashbin_get_next(irlmp->unconnected_lsaps);
- }
- spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags);
-
- return FALSE;
-
- /* Error exit from within one of the two nested loops.
- * Make sure we release the right spinlock in the righ order.
- * Jean II */
-errlsap:
- spin_unlock(&lap->lsaps->hb_spinlock);
-IRDA_ASSERT_LABEL(errlap:)
- spin_unlock_irqrestore(&irlmp->links->hb_spinlock, flags);
- return TRUE;
-
- /* Error exit from within the unconnected loop.
- * Just one spinlock to release... Jean II */
-erruncon:
- spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags);
- return TRUE;
-}
-
-/*
- * Function irlmp_find_free_slsap ()
- *
- * Find a free source LSAP to use. This function is called if the service
- * user has requested a source LSAP equal to LM_ANY
- */
-static __u8 irlmp_find_free_slsap(void)
-{
- __u8 lsap_sel;
- int wrapped = 0;
-
- IRDA_ASSERT(irlmp != NULL, return -1;);
- IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return -1;);
-
- /* Most users don't really care which LSAPs they are given,
- * and therefore we automatically give them a free LSAP.
- * This function try to find a suitable LSAP, i.e. which is
- * not in use and is within the acceptable range. Jean II */
-
- do {
- /* Always increment to LSAP number before using it.
- * In theory, we could reuse the last LSAP number, as long
- * as it is no longer in use. Some IrDA stack do that.
- * However, the previous socket may be half closed, i.e.
- * we closed it, we think it's no longer in use, but the
- * other side did not receive our close and think it's
- * active and still send data on it.
- * This is similar to what is done with PIDs and TCP ports.
- * Also, this reduce the number of calls to irlmp_slsap_inuse()
- * which is an expensive function to call.
- * Jean II */
- irlmp->last_lsap_sel++;
-
- /* Check if we need to wraparound (0x70-0x7f are reserved) */
- if (irlmp->last_lsap_sel > LSAP_MAX) {
- /* 0x00-0x10 are also reserved for well know ports */
- irlmp->last_lsap_sel = 0x10;
-
- /* Make sure we terminate the loop */
- if (wrapped++) {
- net_err_ratelimited("%s: no more free LSAPs !\n",
- __func__);
- return 0;
- }
- }
-
- /* If the LSAP is in use, try the next one.
- * Despite the autoincrement, we need to check if the lsap
- * is really in use or not, first because LSAP may be
- * directly allocated in irlmp_open_lsap(), and also because
- * we may wraparound on old sockets. Jean II */
- } while (irlmp_slsap_inuse(irlmp->last_lsap_sel));
-
- /* Got it ! */
- lsap_sel = irlmp->last_lsap_sel;
- pr_debug("%s(), found free lsap_sel=%02x\n",
- __func__, lsap_sel);
-
- return lsap_sel;
-}
-
-/*
- * Function irlmp_convert_lap_reason (lap_reason)
- *
- * Converts IrLAP disconnect reason codes to IrLMP disconnect reason
- * codes
- *
- */
-LM_REASON irlmp_convert_lap_reason( LAP_REASON lap_reason)
-{
- int reason = LM_LAP_DISCONNECT;
-
- switch (lap_reason) {
- case LAP_DISC_INDICATION: /* Received a disconnect request from peer */
- pr_debug("%s(), LAP_DISC_INDICATION\n", __func__);
- reason = LM_USER_REQUEST;
- break;
- case LAP_NO_RESPONSE: /* To many retransmits without response */
- pr_debug("%s(), LAP_NO_RESPONSE\n", __func__);
- reason = LM_LAP_DISCONNECT;
- break;
- case LAP_RESET_INDICATION:
- pr_debug("%s(), LAP_RESET_INDICATION\n", __func__);
- reason = LM_LAP_RESET;
- break;
- case LAP_FOUND_NONE:
- case LAP_MEDIA_BUSY:
- case LAP_PRIMARY_CONFLICT:
- pr_debug("%s(), LAP_FOUND_NONE, LAP_MEDIA_BUSY or LAP_PRIMARY_CONFLICT\n",
- __func__);
- reason = LM_CONNECT_FAILURE;
- break;
- default:
- pr_debug("%s(), Unknown IrLAP disconnect reason %d!\n",
- __func__, lap_reason);
- reason = LM_LAP_DISCONNECT;
- break;
- }
-
- return reason;
-}
-
-#ifdef CONFIG_PROC_FS
-
-struct irlmp_iter_state {
- hashbin_t *hashbin;
-};
-
-#define LSAP_START_TOKEN ((void *)1)
-#define LINK_START_TOKEN ((void *)2)
-
-static void *irlmp_seq_hb_idx(struct irlmp_iter_state *iter, loff_t *off)
-{
- void *element;
-
- spin_lock_irq(&iter->hashbin->hb_spinlock);
- for (element = hashbin_get_first(iter->hashbin);
- element != NULL;
- element = hashbin_get_next(iter->hashbin)) {
- if (!off || (*off)-- == 0) {
- /* NB: hashbin left locked */
- return element;
- }
- }
- spin_unlock_irq(&iter->hashbin->hb_spinlock);
- iter->hashbin = NULL;
- return NULL;
-}
-
-
-static void *irlmp_seq_start(struct seq_file *seq, loff_t *pos)
-{
- struct irlmp_iter_state *iter = seq->private;
- void *v;
- loff_t off = *pos;
-
- iter->hashbin = NULL;
- if (off-- == 0)
- return LSAP_START_TOKEN;
-
- iter->hashbin = irlmp->unconnected_lsaps;
- v = irlmp_seq_hb_idx(iter, &off);
- if (v)
- return v;
-
- if (off-- == 0)
- return LINK_START_TOKEN;
-
- iter->hashbin = irlmp->links;
- return irlmp_seq_hb_idx(iter, &off);
-}
-
-static void *irlmp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct irlmp_iter_state *iter = seq->private;
-
- ++*pos;
-
- if (v == LSAP_START_TOKEN) { /* start of list of lsaps */
- iter->hashbin = irlmp->unconnected_lsaps;
- v = irlmp_seq_hb_idx(iter, NULL);
- return v ? v : LINK_START_TOKEN;
- }
-
- if (v == LINK_START_TOKEN) { /* start of list of links */
- iter->hashbin = irlmp->links;
- return irlmp_seq_hb_idx(iter, NULL);
- }
-
- v = hashbin_get_next(iter->hashbin);
-
- if (v == NULL) { /* no more in this hash bin */
- spin_unlock_irq(&iter->hashbin->hb_spinlock);
-
- if (iter->hashbin == irlmp->unconnected_lsaps)
- v = LINK_START_TOKEN;
-
- iter->hashbin = NULL;
- }
- return v;
-}
-
-static void irlmp_seq_stop(struct seq_file *seq, void *v)
-{
- struct irlmp_iter_state *iter = seq->private;
-
- if (iter->hashbin)
- spin_unlock_irq(&iter->hashbin->hb_spinlock);
-}
-
-static int irlmp_seq_show(struct seq_file *seq, void *v)
-{
- const struct irlmp_iter_state *iter = seq->private;
- struct lsap_cb *self = v;
-
- if (v == LSAP_START_TOKEN)
- seq_puts(seq, "Unconnected LSAPs:\n");
- else if (v == LINK_START_TOKEN)
- seq_puts(seq, "\nRegistered Link Layers:\n");
- else if (iter->hashbin == irlmp->unconnected_lsaps) {
- self = v;
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -EINVAL; );
- seq_printf(seq, "lsap state: %s, ",
- irlsap_state[ self->lsap_state]);
- seq_printf(seq,
- "slsap_sel: %#02x, dlsap_sel: %#02x, ",
- self->slsap_sel, self->dlsap_sel);
- seq_printf(seq, "(%s)", self->notify.name);
- seq_printf(seq, "\n");
- } else if (iter->hashbin == irlmp->links) {
- struct lap_cb *lap = v;
-
- seq_printf(seq, "lap state: %s, ",
- irlmp_state[lap->lap_state]);
-
- seq_printf(seq, "saddr: %#08x, daddr: %#08x, ",
- lap->saddr, lap->daddr);
- seq_printf(seq, "num lsaps: %d",
- HASHBIN_GET_SIZE(lap->lsaps));
- seq_printf(seq, "\n");
-
- /* Careful for priority inversions here !
- * All other uses of attrib spinlock are independent of
- * the object spinlock, so we are safe. Jean II */
- spin_lock(&lap->lsaps->hb_spinlock);
-
- seq_printf(seq, "\n Connected LSAPs:\n");
- for (self = (struct lsap_cb *) hashbin_get_first(lap->lsaps);
- self != NULL;
- self = (struct lsap_cb *)hashbin_get_next(lap->lsaps)) {
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC,
- goto outloop;);
- seq_printf(seq, " lsap state: %s, ",
- irlsap_state[ self->lsap_state]);
- seq_printf(seq,
- "slsap_sel: %#02x, dlsap_sel: %#02x, ",
- self->slsap_sel, self->dlsap_sel);
- seq_printf(seq, "(%s)", self->notify.name);
- seq_putc(seq, '\n');
-
- }
- IRDA_ASSERT_LABEL(outloop:)
- spin_unlock(&lap->lsaps->hb_spinlock);
- seq_putc(seq, '\n');
- } else
- return -EINVAL;
-
- return 0;
-}
-
-static const struct seq_operations irlmp_seq_ops = {
- .start = irlmp_seq_start,
- .next = irlmp_seq_next,
- .stop = irlmp_seq_stop,
- .show = irlmp_seq_show,
-};
-
-static int irlmp_seq_open(struct inode *inode, struct file *file)
-{
- IRDA_ASSERT(irlmp != NULL, return -EINVAL;);
-
- return seq_open_private(file, &irlmp_seq_ops,
- sizeof(struct irlmp_iter_state));
-}
-
-const struct file_operations irlmp_seq_fops = {
- .owner = THIS_MODULE,
- .open = irlmp_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
-#endif /* PROC_FS */
diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c
deleted file mode 100644
index e306cf2c1e04..000000000000
--- a/net/irda/irlmp_event.c
+++ /dev/null
@@ -1,886 +0,0 @@
-/*********************************************************************
- *
- * Filename: irlmp_event.c
- * Version: 0.8
- * Description: An IrDA LMP event driver for Linux
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Mon Aug 4 20:40:53 1997
- * Modified at: Tue Dec 14 23:04:16 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>,
- * All Rights Reserved.
- * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/kernel.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/timer.h>
-#include <net/irda/irlap.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irlmp_frame.h>
-#include <net/irda/irlmp_event.h>
-
-const char *const irlmp_state[] = {
- "LAP_STANDBY",
- "LAP_U_CONNECT",
- "LAP_ACTIVE",
-};
-
-const char *const irlsap_state[] = {
- "LSAP_DISCONNECTED",
- "LSAP_CONNECT",
- "LSAP_CONNECT_PEND",
- "LSAP_DATA_TRANSFER_READY",
- "LSAP_SETUP",
- "LSAP_SETUP_PEND",
-};
-
-static const char *const irlmp_event[] __maybe_unused = {
- "LM_CONNECT_REQUEST",
- "LM_CONNECT_CONFIRM",
- "LM_CONNECT_RESPONSE",
- "LM_CONNECT_INDICATION",
-
- "LM_DISCONNECT_INDICATION",
- "LM_DISCONNECT_REQUEST",
-
- "LM_DATA_REQUEST",
- "LM_UDATA_REQUEST",
- "LM_DATA_INDICATION",
- "LM_UDATA_INDICATION",
-
- "LM_WATCHDOG_TIMEOUT",
-
- /* IrLAP events */
- "LM_LAP_CONNECT_REQUEST",
- "LM_LAP_CONNECT_INDICATION",
- "LM_LAP_CONNECT_CONFIRM",
- "LM_LAP_DISCONNECT_INDICATION",
- "LM_LAP_DISCONNECT_REQUEST",
- "LM_LAP_DISCOVERY_REQUEST",
- "LM_LAP_DISCOVERY_CONFIRM",
- "LM_LAP_IDLE_TIMEOUT",
-};
-
-/* LAP Connection control proto declarations */
-static void irlmp_state_standby (struct lap_cb *, IRLMP_EVENT,
- struct sk_buff *);
-static void irlmp_state_u_connect(struct lap_cb *, IRLMP_EVENT,
- struct sk_buff *);
-static void irlmp_state_active (struct lap_cb *, IRLMP_EVENT,
- struct sk_buff *);
-
-/* LSAP Connection control proto declarations */
-static int irlmp_state_disconnected(struct lsap_cb *, IRLMP_EVENT,
- struct sk_buff *);
-static int irlmp_state_connect (struct lsap_cb *, IRLMP_EVENT,
- struct sk_buff *);
-static int irlmp_state_connect_pend(struct lsap_cb *, IRLMP_EVENT,
- struct sk_buff *);
-static int irlmp_state_dtr (struct lsap_cb *, IRLMP_EVENT,
- struct sk_buff *);
-static int irlmp_state_setup (struct lsap_cb *, IRLMP_EVENT,
- struct sk_buff *);
-static int irlmp_state_setup_pend (struct lsap_cb *, IRLMP_EVENT,
- struct sk_buff *);
-
-static void (*lap_state[]) (struct lap_cb *, IRLMP_EVENT, struct sk_buff *) =
-{
- irlmp_state_standby,
- irlmp_state_u_connect,
- irlmp_state_active,
-};
-
-static int (*lsap_state[])( struct lsap_cb *, IRLMP_EVENT, struct sk_buff *) =
-{
- irlmp_state_disconnected,
- irlmp_state_connect,
- irlmp_state_connect_pend,
- irlmp_state_dtr,
- irlmp_state_setup,
- irlmp_state_setup_pend
-};
-
-static inline void irlmp_next_lap_state(struct lap_cb *self,
- IRLMP_STATE state)
-{
- /*
- pr_debug("%s(), LMP LAP = %s\n", __func__, irlmp_state[state]);
- */
- self->lap_state = state;
-}
-
-static inline void irlmp_next_lsap_state(struct lsap_cb *self,
- LSAP_STATE state)
-{
- /*
- IRDA_ASSERT(self != NULL, return;);
- pr_debug("%s(), LMP LSAP = %s\n", __func__, irlsap_state[state]);
- */
- self->lsap_state = state;
-}
-
-/* Do connection control events */
-int irlmp_do_lsap_event(struct lsap_cb *self, IRLMP_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-
- pr_debug("%s(), EVENT = %s, STATE = %s\n",
- __func__, irlmp_event[event], irlsap_state[self->lsap_state]);
-
- return (*lsap_state[self->lsap_state]) (self, event, skb);
-}
-
-/*
- * Function do_lap_event (event, skb, info)
- *
- * Do IrLAP control events
- *
- */
-void irlmp_do_lap_event(struct lap_cb *self, IRLMP_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
-
- pr_debug("%s(), EVENT = %s, STATE = %s\n", __func__,
- irlmp_event[event],
- irlmp_state[self->lap_state]);
-
- (*lap_state[self->lap_state]) (self, event, skb);
-}
-
-void irlmp_discovery_timer_expired(void *data)
-{
- /* We always cleanup the log (active & passive discovery) */
- irlmp_do_expiry();
-
- irlmp_do_discovery(sysctl_discovery_slots);
-
- /* Restart timer */
- irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout * HZ);
-}
-
-void irlmp_watchdog_timer_expired(void *data)
-{
- struct lsap_cb *self = (struct lsap_cb *) data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
-
- irlmp_do_lsap_event(self, LM_WATCHDOG_TIMEOUT, NULL);
-}
-
-void irlmp_idle_timer_expired(void *data)
-{
- struct lap_cb *self = (struct lap_cb *) data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
-
- irlmp_do_lap_event(self, LM_LAP_IDLE_TIMEOUT, NULL);
-}
-
-/*
- * Send an event on all LSAPs attached to this LAP.
- */
-static inline void
-irlmp_do_all_lsap_event(hashbin_t * lsap_hashbin,
- IRLMP_EVENT event)
-{
- struct lsap_cb *lsap;
- struct lsap_cb *lsap_next;
-
- /* Note : this function use the new hashbin_find_next()
- * function, instead of the old hashbin_get_next().
- * This make sure that we are always pointing one lsap
- * ahead, so that if the current lsap is removed as the
- * result of sending the event, we don't care.
- * Also, as we store the context ourselves, if an enumeration
- * of the same lsap hashbin happens as the result of sending the
- * event, we don't care.
- * The only problem is if the next lsap is removed. In that case,
- * hashbin_find_next() will return NULL and we will abort the
- * enumeration. - Jean II */
-
- /* Also : we don't accept any skb in input. We can *NOT* pass
- * the same skb to multiple clients safely, we would need to
- * skb_clone() it. - Jean II */
-
- lsap = (struct lsap_cb *) hashbin_get_first(lsap_hashbin);
-
- while (NULL != hashbin_find_next(lsap_hashbin,
- (long) lsap,
- NULL,
- (void *) &lsap_next) ) {
- irlmp_do_lsap_event(lsap, event, NULL);
- lsap = lsap_next;
- }
-}
-
-/*********************************************************************
- *
- * LAP connection control states
- *
- ********************************************************************/
-
-/*
- * Function irlmp_state_standby (event, skb, info)
- *
- * STANDBY, The IrLAP connection does not exist.
- *
- */
-static void irlmp_state_standby(struct lap_cb *self, IRLMP_EVENT event,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self->irlap != NULL, return;);
-
- switch (event) {
- case LM_LAP_DISCOVERY_REQUEST:
- /* irlmp_next_station_state( LMP_DISCOVER); */
-
- irlap_discovery_request(self->irlap, &irlmp->discovery_cmd);
- break;
- case LM_LAP_CONNECT_INDICATION:
- /* It's important to switch state first, to avoid IrLMP to
- * think that the link is free since IrLMP may then start
- * discovery before the connection is properly set up. DB.
- */
- irlmp_next_lap_state(self, LAP_ACTIVE);
-
- /* Just accept connection TODO, this should be fixed */
- irlap_connect_response(self->irlap, skb);
- break;
- case LM_LAP_CONNECT_REQUEST:
- pr_debug("%s() LS_CONNECT_REQUEST\n", __func__);
-
- irlmp_next_lap_state(self, LAP_U_CONNECT);
-
- /* FIXME: need to set users requested QoS */
- irlap_connect_request(self->irlap, self->daddr, NULL, 0);
- break;
- case LM_LAP_DISCONNECT_INDICATION:
- pr_debug("%s(), Error LM_LAP_DISCONNECT_INDICATION\n",
- __func__);
-
- irlmp_next_lap_state(self, LAP_STANDBY);
- break;
- default:
- pr_debug("%s(), Unknown event %s\n",
- __func__, irlmp_event[event]);
- break;
- }
-}
-
-/*
- * Function irlmp_state_u_connect (event, skb, info)
- *
- * U_CONNECT, The layer above has tried to open an LSAP connection but
- * since the IrLAP connection does not exist, we must first start an
- * IrLAP connection. We are now waiting response from IrLAP.
- * */
-static void irlmp_state_u_connect(struct lap_cb *self, IRLMP_EVENT event,
- struct sk_buff *skb)
-{
- pr_debug("%s(), event=%s\n", __func__, irlmp_event[event]);
-
- switch (event) {
- case LM_LAP_CONNECT_INDICATION:
- /* It's important to switch state first, to avoid IrLMP to
- * think that the link is free since IrLMP may then start
- * discovery before the connection is properly set up. DB.
- */
- irlmp_next_lap_state(self, LAP_ACTIVE);
-
- /* Just accept connection TODO, this should be fixed */
- irlap_connect_response(self->irlap, skb);
-
- /* Tell LSAPs that they can start sending data */
- irlmp_do_all_lsap_event(self->lsaps, LM_LAP_CONNECT_CONFIRM);
-
- /* Note : by the time we get there (LAP retries and co),
- * the lsaps may already have gone. This avoid getting stuck
- * forever in LAP_ACTIVE state - Jean II */
- if (HASHBIN_GET_SIZE(self->lsaps) == 0) {
- pr_debug("%s() NO LSAPs !\n", __func__);
- irlmp_start_idle_timer(self, LM_IDLE_TIMEOUT);
- }
- break;
- case LM_LAP_CONNECT_REQUEST:
- /* Already trying to connect */
- break;
- case LM_LAP_CONNECT_CONFIRM:
- /* For all lsap_ce E Associated do LS_Connect_confirm */
- irlmp_next_lap_state(self, LAP_ACTIVE);
-
- /* Tell LSAPs that they can start sending data */
- irlmp_do_all_lsap_event(self->lsaps, LM_LAP_CONNECT_CONFIRM);
-
- /* Note : by the time we get there (LAP retries and co),
- * the lsaps may already have gone. This avoid getting stuck
- * forever in LAP_ACTIVE state - Jean II */
- if (HASHBIN_GET_SIZE(self->lsaps) == 0) {
- pr_debug("%s() NO LSAPs !\n", __func__);
- irlmp_start_idle_timer(self, LM_IDLE_TIMEOUT);
- }
- break;
- case LM_LAP_DISCONNECT_INDICATION:
- pr_debug("%s(), LM_LAP_DISCONNECT_INDICATION\n", __func__);
- irlmp_next_lap_state(self, LAP_STANDBY);
-
- /* Send disconnect event to all LSAPs using this link */
- irlmp_do_all_lsap_event(self->lsaps,
- LM_LAP_DISCONNECT_INDICATION);
- break;
- case LM_LAP_DISCONNECT_REQUEST:
- pr_debug("%s(), LM_LAP_DISCONNECT_REQUEST\n", __func__);
-
- /* One of the LSAP did timeout or was closed, if it was
- * the last one, try to get out of here - Jean II */
- if (HASHBIN_GET_SIZE(self->lsaps) <= 1) {
- irlap_disconnect_request(self->irlap);
- }
- break;
- default:
- pr_debug("%s(), Unknown event %s\n",
- __func__, irlmp_event[event]);
- break;
- }
-}
-
-/*
- * Function irlmp_state_active (event, skb, info)
- *
- * ACTIVE, IrLAP connection is active
- *
- */
-static void irlmp_state_active(struct lap_cb *self, IRLMP_EVENT event,
- struct sk_buff *skb)
-{
- switch (event) {
- case LM_LAP_CONNECT_REQUEST:
- pr_debug("%s(), LS_CONNECT_REQUEST\n", __func__);
-
- /*
- * IrLAP may have a pending disconnect. We tried to close
- * IrLAP, but it was postponed because the link was
- * busy or we were still sending packets. As we now
- * need it, make sure it stays on. Jean II
- */
- irlap_clear_disconnect(self->irlap);
-
- /*
- * LAP connection already active, just bounce back! Since we
- * don't know which LSAP that tried to do this, we have to
- * notify all LSAPs using this LAP, but that should be safe to
- * do anyway.
- */
- irlmp_do_all_lsap_event(self->lsaps, LM_LAP_CONNECT_CONFIRM);
-
- /* Needed by connect indication */
- irlmp_do_all_lsap_event(irlmp->unconnected_lsaps,
- LM_LAP_CONNECT_CONFIRM);
- /* Keep state */
- break;
- case LM_LAP_DISCONNECT_REQUEST:
- /*
- * Need to find out if we should close IrLAP or not. If there
- * is only one LSAP connection left on this link, that LSAP
- * must be the one that tries to close IrLAP. It will be
- * removed later and moved to the list of unconnected LSAPs
- */
- if (HASHBIN_GET_SIZE(self->lsaps) > 0) {
- /* Timer value is checked in irsysctl - Jean II */
- irlmp_start_idle_timer(self, sysctl_lap_keepalive_time * HZ / 1000);
- } else {
- /* No more connections, so close IrLAP */
-
- /* We don't want to change state just yet, because
- * we want to reflect accurately the real state of
- * the LAP, not the state we wish it was in,
- * so that we don't lose LM_LAP_CONNECT_REQUEST.
- * In some cases, IrLAP won't close the LAP
- * immediately. For example, it might still be
- * retrying packets or waiting for the pf bit.
- * As the LAP always send a DISCONNECT_INDICATION
- * in PCLOSE or SCLOSE, just change state on that.
- * Jean II */
- irlap_disconnect_request(self->irlap);
- }
- break;
- case LM_LAP_IDLE_TIMEOUT:
- if (HASHBIN_GET_SIZE(self->lsaps) == 0) {
- /* Same reasoning as above - keep state */
- irlap_disconnect_request(self->irlap);
- }
- break;
- case LM_LAP_DISCONNECT_INDICATION:
- irlmp_next_lap_state(self, LAP_STANDBY);
-
- /* In some case, at this point our side has already closed
- * all lsaps, and we are waiting for the idle_timer to
- * expire. If another device reconnect immediately, the
- * idle timer will expire in the midle of the connection
- * initialisation, screwing up things a lot...
- * Therefore, we must stop the timer... */
- irlmp_stop_idle_timer(self);
-
- /*
- * Inform all connected LSAP's using this link
- */
- irlmp_do_all_lsap_event(self->lsaps,
- LM_LAP_DISCONNECT_INDICATION);
-
- /* Force an expiry of the discovery log.
- * Now that the LAP is free, the system may attempt to
- * connect to another device. Unfortunately, our entries
- * are stale. There is a small window (<3s) before the
- * normal discovery will run and where irlmp_connect_request()
- * can get the wrong info, so make sure things get
- * cleaned *NOW* ;-) - Jean II */
- irlmp_do_expiry();
- break;
- default:
- pr_debug("%s(), Unknown event %s\n",
- __func__, irlmp_event[event]);
- break;
- }
-}
-
-/*********************************************************************
- *
- * LSAP connection control states
- *
- ********************************************************************/
-
-/*
- * Function irlmp_state_disconnected (event, skb, info)
- *
- * DISCONNECTED
- *
- */
-static int irlmp_state_disconnected(struct lsap_cb *self, IRLMP_EVENT event,
- struct sk_buff *skb)
-{
- int ret = 0;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-
- switch (event) {
-#ifdef CONFIG_IRDA_ULTRA
- case LM_UDATA_INDICATION:
- /* This is most bizarre. Those packets are aka unreliable
- * connected, aka IrLPT or SOCK_DGRAM/IRDAPROTO_UNITDATA.
- * Why do we pass them as Ultra ??? Jean II */
- irlmp_connless_data_indication(self, skb);
- break;
-#endif /* CONFIG_IRDA_ULTRA */
- case LM_CONNECT_REQUEST:
- pr_debug("%s(), LM_CONNECT_REQUEST\n", __func__);
-
- if (self->conn_skb) {
- net_warn_ratelimited("%s: busy with another request!\n",
- __func__);
- return -EBUSY;
- }
- /* Don't forget to refcount it (see irlmp_connect_request()) */
- skb_get(skb);
- self->conn_skb = skb;
-
- irlmp_next_lsap_state(self, LSAP_SETUP_PEND);
-
- /* Start watchdog timer (5 secs for now) */
- irlmp_start_watchdog_timer(self, 5*HZ);
-
- irlmp_do_lap_event(self->lap, LM_LAP_CONNECT_REQUEST, NULL);
- break;
- case LM_CONNECT_INDICATION:
- if (self->conn_skb) {
- net_warn_ratelimited("%s: busy with another request!\n",
- __func__);
- return -EBUSY;
- }
- /* Don't forget to refcount it (see irlap_driver_rcv()) */
- skb_get(skb);
- self->conn_skb = skb;
-
- irlmp_next_lsap_state(self, LSAP_CONNECT_PEND);
-
- /* Start watchdog timer
- * This is not mentionned in the spec, but there is a rare
- * race condition that can get the socket stuck.
- * If we receive this event while our LAP is closing down,
- * the LM_LAP_CONNECT_REQUEST get lost and we get stuck in
- * CONNECT_PEND state forever.
- * The other cause of getting stuck down there is if the
- * higher layer never reply to the CONNECT_INDICATION.
- * Anyway, it make sense to make sure that we always have
- * a backup plan. 1 second is plenty (should be immediate).
- * Jean II */
- irlmp_start_watchdog_timer(self, 1*HZ);
-
- irlmp_do_lap_event(self->lap, LM_LAP_CONNECT_REQUEST, NULL);
- break;
- default:
- pr_debug("%s(), Unknown event %s on LSAP %#02x\n",
- __func__, irlmp_event[event], self->slsap_sel);
- break;
- }
- return ret;
-}
-
-/*
- * Function irlmp_state_connect (self, event, skb)
- *
- * CONNECT
- *
- */
-static int irlmp_state_connect(struct lsap_cb *self, IRLMP_EVENT event,
- struct sk_buff *skb)
-{
- struct lsap_cb *lsap;
- int ret = 0;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-
- switch (event) {
- case LM_CONNECT_RESPONSE:
- /*
- * Bind this LSAP to the IrLAP link where the connect was
- * received
- */
- lsap = hashbin_remove(irlmp->unconnected_lsaps, (long) self,
- NULL);
-
- IRDA_ASSERT(lsap == self, return -1;);
- IRDA_ASSERT(self->lap != NULL, return -1;);
- IRDA_ASSERT(self->lap->lsaps != NULL, return -1;);
-
- hashbin_insert(self->lap->lsaps, (irda_queue_t *) self,
- (long) self, NULL);
-
- set_bit(0, &self->connected); /* TRUE */
-
- irlmp_send_lcf_pdu(self->lap, self->dlsap_sel,
- self->slsap_sel, CONNECT_CNF, skb);
-
- del_timer(&self->watchdog_timer);
-
- irlmp_next_lsap_state(self, LSAP_DATA_TRANSFER_READY);
- break;
- case LM_WATCHDOG_TIMEOUT:
- /* May happen, who knows...
- * Jean II */
- pr_debug("%s() WATCHDOG_TIMEOUT!\n", __func__);
-
- /* Disconnect, get out... - Jean II */
- self->lap = NULL;
- self->dlsap_sel = LSAP_ANY;
- irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
- break;
- default:
- /* LM_LAP_DISCONNECT_INDICATION : Should never happen, we
- * are *not* yet bound to the IrLAP link. Jean II */
- pr_debug("%s(), Unknown event %s on LSAP %#02x\n",
- __func__, irlmp_event[event], self->slsap_sel);
- break;
- }
- return ret;
-}
-
-/*
- * Function irlmp_state_connect_pend (event, skb, info)
- *
- * CONNECT_PEND
- *
- */
-static int irlmp_state_connect_pend(struct lsap_cb *self, IRLMP_EVENT event,
- struct sk_buff *skb)
-{
- struct sk_buff *tx_skb;
- int ret = 0;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-
- switch (event) {
- case LM_CONNECT_REQUEST:
- /* Keep state */
- break;
- case LM_CONNECT_RESPONSE:
- pr_debug("%s(), LM_CONNECT_RESPONSE, no indication issued yet\n",
- __func__);
- /* Keep state */
- break;
- case LM_DISCONNECT_REQUEST:
- pr_debug("%s(), LM_DISCONNECT_REQUEST, not yet bound to IrLAP connection\n",
- __func__);
- /* Keep state */
- break;
- case LM_LAP_CONNECT_CONFIRM:
- pr_debug("%s(), LS_CONNECT_CONFIRM\n", __func__);
- irlmp_next_lsap_state(self, LSAP_CONNECT);
-
- tx_skb = self->conn_skb;
- self->conn_skb = NULL;
-
- irlmp_connect_indication(self, tx_skb);
- /* Drop reference count - see irlmp_connect_indication(). */
- dev_kfree_skb(tx_skb);
- break;
- case LM_WATCHDOG_TIMEOUT:
- /* Will happen in some rare cases because of a race condition.
- * Just make sure we don't stay there forever...
- * Jean II */
- pr_debug("%s() WATCHDOG_TIMEOUT!\n", __func__);
-
- /* Go back to disconnected mode, keep the socket waiting */
- self->lap = NULL;
- self->dlsap_sel = LSAP_ANY;
- if(self->conn_skb)
- dev_kfree_skb(self->conn_skb);
- self->conn_skb = NULL;
- irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
- break;
- default:
- /* LM_LAP_DISCONNECT_INDICATION : Should never happen, we
- * are *not* yet bound to the IrLAP link. Jean II */
- pr_debug("%s(), Unknown event %s on LSAP %#02x\n",
- __func__, irlmp_event[event], self->slsap_sel);
- break;
- }
- return ret;
-}
-
-/*
- * Function irlmp_state_dtr (self, event, skb)
- *
- * DATA_TRANSFER_READY
- *
- */
-static int irlmp_state_dtr(struct lsap_cb *self, IRLMP_EVENT event,
- struct sk_buff *skb)
-{
- LM_REASON reason;
- int ret = 0;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
- IRDA_ASSERT(self->lap != NULL, return -1;);
-
- switch (event) {
- case LM_DATA_REQUEST: /* Optimize for the common case */
- irlmp_send_data_pdu(self->lap, self->dlsap_sel,
- self->slsap_sel, FALSE, skb);
- break;
- case LM_DATA_INDICATION: /* Optimize for the common case */
- irlmp_data_indication(self, skb);
- break;
- case LM_UDATA_REQUEST:
- IRDA_ASSERT(skb != NULL, return -1;);
- irlmp_send_data_pdu(self->lap, self->dlsap_sel,
- self->slsap_sel, TRUE, skb);
- break;
- case LM_UDATA_INDICATION:
- irlmp_udata_indication(self, skb);
- break;
- case LM_CONNECT_REQUEST:
- pr_debug("%s(), LM_CONNECT_REQUEST, error, LSAP already connected\n",
- __func__);
- /* Keep state */
- break;
- case LM_CONNECT_RESPONSE:
- pr_debug("%s(), LM_CONNECT_RESPONSE, error, LSAP already connected\n",
- __func__);
- /* Keep state */
- break;
- case LM_DISCONNECT_REQUEST:
- irlmp_send_lcf_pdu(self->lap, self->dlsap_sel, self->slsap_sel,
- DISCONNECT, skb);
- irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
- /* Called only from irlmp_disconnect_request(), will
- * unbind from LAP over there. Jean II */
-
- /* Try to close the LAP connection if its still there */
- if (self->lap) {
- pr_debug("%s(), trying to close IrLAP\n",
- __func__);
- irlmp_do_lap_event(self->lap,
- LM_LAP_DISCONNECT_REQUEST,
- NULL);
- }
- break;
- case LM_LAP_DISCONNECT_INDICATION:
- irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-
- reason = irlmp_convert_lap_reason(self->lap->reason);
-
- irlmp_disconnect_indication(self, reason, NULL);
- break;
- case LM_DISCONNECT_INDICATION:
- irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-
- IRDA_ASSERT(self->lap != NULL, return -1;);
- IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;);
-
- IRDA_ASSERT(skb != NULL, return -1;);
- IRDA_ASSERT(skb->len > 3, return -1;);
- reason = skb->data[3];
-
- /* Try to close the LAP connection */
- pr_debug("%s(), trying to close IrLAP\n", __func__);
- irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL);
-
- irlmp_disconnect_indication(self, reason, skb);
- break;
- default:
- pr_debug("%s(), Unknown event %s on LSAP %#02x\n",
- __func__, irlmp_event[event], self->slsap_sel);
- break;
- }
- return ret;
-}
-
-/*
- * Function irlmp_state_setup (event, skb, info)
- *
- * SETUP, Station Control has set up the underlying IrLAP connection.
- * An LSAP connection request has been transmitted to the peer
- * LSAP-Connection Control FSM and we are awaiting reply.
- */
-static int irlmp_state_setup(struct lsap_cb *self, IRLMP_EVENT event,
- struct sk_buff *skb)
-{
- LM_REASON reason;
- int ret = 0;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-
- switch (event) {
- case LM_CONNECT_CONFIRM:
- irlmp_next_lsap_state(self, LSAP_DATA_TRANSFER_READY);
-
- del_timer(&self->watchdog_timer);
-
- irlmp_connect_confirm(self, skb);
- break;
- case LM_DISCONNECT_INDICATION:
- irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-
- IRDA_ASSERT(self->lap != NULL, return -1;);
- IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;);
-
- IRDA_ASSERT(skb != NULL, return -1;);
- IRDA_ASSERT(skb->len > 3, return -1;);
- reason = skb->data[3];
-
- /* Try to close the LAP connection */
- pr_debug("%s(), trying to close IrLAP\n", __func__);
- irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL);
-
- irlmp_disconnect_indication(self, reason, skb);
- break;
- case LM_LAP_DISCONNECT_INDICATION:
- irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-
- del_timer(&self->watchdog_timer);
-
- IRDA_ASSERT(self->lap != NULL, return -1;);
- IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;);
-
- reason = irlmp_convert_lap_reason(self->lap->reason);
-
- irlmp_disconnect_indication(self, reason, skb);
- break;
- case LM_WATCHDOG_TIMEOUT:
- pr_debug("%s() WATCHDOG_TIMEOUT!\n", __func__);
-
- IRDA_ASSERT(self->lap != NULL, return -1;);
- irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL);
- irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-
- irlmp_disconnect_indication(self, LM_CONNECT_FAILURE, NULL);
- break;
- default:
- pr_debug("%s(), Unknown event %s on LSAP %#02x\n",
- __func__, irlmp_event[event], self->slsap_sel);
- break;
- }
- return ret;
-}
-
-/*
- * Function irlmp_state_setup_pend (event, skb, info)
- *
- * SETUP_PEND, An LM_CONNECT_REQUEST has been received from the service
- * user to set up an LSAP connection. A request has been sent to the
- * LAP FSM to set up the underlying IrLAP connection, and we
- * are awaiting confirm.
- */
-static int irlmp_state_setup_pend(struct lsap_cb *self, IRLMP_EVENT event,
- struct sk_buff *skb)
-{
- struct sk_buff *tx_skb;
- LM_REASON reason;
- int ret = 0;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(irlmp != NULL, return -1;);
-
- switch (event) {
- case LM_LAP_CONNECT_CONFIRM:
- IRDA_ASSERT(self->conn_skb != NULL, return -1;);
-
- tx_skb = self->conn_skb;
- self->conn_skb = NULL;
-
- irlmp_send_lcf_pdu(self->lap, self->dlsap_sel,
- self->slsap_sel, CONNECT_CMD, tx_skb);
- /* Drop reference count - see irlap_data_request(). */
- dev_kfree_skb(tx_skb);
-
- irlmp_next_lsap_state(self, LSAP_SETUP);
- break;
- case LM_WATCHDOG_TIMEOUT:
- pr_debug("%s() : WATCHDOG_TIMEOUT !\n", __func__);
-
- IRDA_ASSERT(self->lap != NULL, return -1;);
- irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL);
- irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-
- irlmp_disconnect_indication(self, LM_CONNECT_FAILURE, NULL);
- break;
- case LM_LAP_DISCONNECT_INDICATION: /* LS_Disconnect.indication */
- del_timer( &self->watchdog_timer);
-
- irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-
- reason = irlmp_convert_lap_reason(self->lap->reason);
-
- irlmp_disconnect_indication(self, reason, NULL);
- break;
- default:
- pr_debug("%s(), Unknown event %s on LSAP %#02x\n",
- __func__, irlmp_event[event], self->slsap_sel);
- break;
- }
- return ret;
-}
diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c
deleted file mode 100644
index 38b0f994bc7b..000000000000
--- a/net/irda/irlmp_frame.c
+++ /dev/null
@@ -1,476 +0,0 @@
-/*********************************************************************
- *
- * Filename: irlmp_frame.c
- * Version: 0.9
- * Description: IrLMP frame implementation
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Tue Aug 19 02:09:59 1997
- * Modified at: Mon Dec 13 13:41:12 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>
- * All Rights Reserved.
- * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/skbuff.h>
-#include <linux/kernel.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlap.h>
-#include <net/irda/timer.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irlmp_frame.h>
-#include <net/irda/discovery.h>
-
-static struct lsap_cb *irlmp_find_lsap(struct lap_cb *self, __u8 dlsap,
- __u8 slsap, int status, hashbin_t *);
-
-inline void irlmp_send_data_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap,
- int expedited, struct sk_buff *skb)
-{
- skb->data[0] = dlsap;
- skb->data[1] = slsap;
-
- if (expedited) {
- pr_debug("%s(), sending expedited data\n", __func__);
- irlap_data_request(self->irlap, skb, TRUE);
- } else
- irlap_data_request(self->irlap, skb, FALSE);
-}
-
-/*
- * Function irlmp_send_lcf_pdu (dlsap, slsap, opcode,skb)
- *
- * Send Link Control Frame to IrLAP
- */
-void irlmp_send_lcf_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap,
- __u8 opcode, struct sk_buff *skb)
-{
- __u8 *frame;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
-
- frame = skb->data;
-
- frame[0] = dlsap | CONTROL_BIT;
- frame[1] = slsap;
-
- frame[2] = opcode;
-
- if (opcode == DISCONNECT)
- frame[3] = 0x01; /* Service user request */
- else
- frame[3] = 0x00; /* rsvd */
-
- irlap_data_request(self->irlap, skb, FALSE);
-}
-
-/*
- * Function irlmp_input (skb)
- *
- * Used by IrLAP to pass received data frames to IrLMP layer
- *
- */
-void irlmp_link_data_indication(struct lap_cb *self, struct sk_buff *skb,
- int unreliable)
-{
- struct lsap_cb *lsap;
- __u8 slsap_sel; /* Source (this) LSAP address */
- __u8 dlsap_sel; /* Destination LSAP address */
- __u8 *fp;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
- IRDA_ASSERT(skb->len > 2, return;);
-
- fp = skb->data;
-
- /*
- * The next statements may be confusing, but we do this so that
- * destination LSAP of received frame is source LSAP in our view
- */
- slsap_sel = fp[0] & LSAP_MASK;
- dlsap_sel = fp[1];
-
- /*
- * Check if this is an incoming connection, since we must deal with
- * it in a different way than other established connections.
- */
- if ((fp[0] & CONTROL_BIT) && (fp[2] == CONNECT_CMD)) {
- pr_debug("%s(), incoming connection, source LSAP=%d, dest LSAP=%d\n",
- __func__, slsap_sel, dlsap_sel);
-
- /* Try to find LSAP among the unconnected LSAPs */
- lsap = irlmp_find_lsap(self, dlsap_sel, slsap_sel, CONNECT_CMD,
- irlmp->unconnected_lsaps);
-
- /* Maybe LSAP was already connected, so try one more time */
- if (!lsap) {
- pr_debug("%s(), incoming connection for LSAP already connected\n",
- __func__);
- lsap = irlmp_find_lsap(self, dlsap_sel, slsap_sel, 0,
- self->lsaps);
- }
- } else
- lsap = irlmp_find_lsap(self, dlsap_sel, slsap_sel, 0,
- self->lsaps);
-
- if (lsap == NULL) {
- pr_debug("IrLMP, Sorry, no LSAP for received frame!\n");
- pr_debug("%s(), slsap_sel = %02x, dlsap_sel = %02x\n",
- __func__, slsap_sel, dlsap_sel);
- if (fp[0] & CONTROL_BIT) {
- pr_debug("%s(), received control frame %02x\n",
- __func__, fp[2]);
- } else {
- pr_debug("%s(), received data frame\n", __func__);
- }
- return;
- }
-
- /*
- * Check if we received a control frame?
- */
- if (fp[0] & CONTROL_BIT) {
- switch (fp[2]) {
- case CONNECT_CMD:
- lsap->lap = self;
- irlmp_do_lsap_event(lsap, LM_CONNECT_INDICATION, skb);
- break;
- case CONNECT_CNF:
- irlmp_do_lsap_event(lsap, LM_CONNECT_CONFIRM, skb);
- break;
- case DISCONNECT:
- pr_debug("%s(), Disconnect indication!\n",
- __func__);
- irlmp_do_lsap_event(lsap, LM_DISCONNECT_INDICATION,
- skb);
- break;
- case ACCESSMODE_CMD:
- pr_debug("Access mode cmd not implemented!\n");
- break;
- case ACCESSMODE_CNF:
- pr_debug("Access mode cnf not implemented!\n");
- break;
- default:
- pr_debug("%s(), Unknown control frame %02x\n",
- __func__, fp[2]);
- break;
- }
- } else if (unreliable) {
- /* Optimize and bypass the state machine if possible */
- if (lsap->lsap_state == LSAP_DATA_TRANSFER_READY)
- irlmp_udata_indication(lsap, skb);
- else
- irlmp_do_lsap_event(lsap, LM_UDATA_INDICATION, skb);
- } else {
- /* Optimize and bypass the state machine if possible */
- if (lsap->lsap_state == LSAP_DATA_TRANSFER_READY)
- irlmp_data_indication(lsap, skb);
- else
- irlmp_do_lsap_event(lsap, LM_DATA_INDICATION, skb);
- }
-}
-
-/*
- * Function irlmp_link_unitdata_indication (self, skb)
- *
- *
- *
- */
-#ifdef CONFIG_IRDA_ULTRA
-void irlmp_link_unitdata_indication(struct lap_cb *self, struct sk_buff *skb)
-{
- struct lsap_cb *lsap;
- __u8 slsap_sel; /* Source (this) LSAP address */
- __u8 dlsap_sel; /* Destination LSAP address */
- __u8 pid; /* Protocol identifier */
- __u8 *fp;
- unsigned long flags;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
- IRDA_ASSERT(skb->len > 2, return;);
-
- fp = skb->data;
-
- /*
- * The next statements may be confusing, but we do this so that
- * destination LSAP of received frame is source LSAP in our view
- */
- slsap_sel = fp[0] & LSAP_MASK;
- dlsap_sel = fp[1];
- pid = fp[2];
-
- if (pid & 0x80) {
- pr_debug("%s(), extension in PID not supp!\n",
- __func__);
- return;
- }
-
- /* Check if frame is addressed to the connectionless LSAP */
- if ((slsap_sel != LSAP_CONNLESS) || (dlsap_sel != LSAP_CONNLESS)) {
- pr_debug("%s(), dropping frame!\n", __func__);
- return;
- }
-
- /* Search the connectionless LSAP */
- spin_lock_irqsave(&irlmp->unconnected_lsaps->hb_spinlock, flags);
- lsap = (struct lsap_cb *) hashbin_get_first(irlmp->unconnected_lsaps);
- while (lsap != NULL) {
- /*
- * Check if source LSAP and dest LSAP selectors and PID match.
- */
- if ((lsap->slsap_sel == slsap_sel) &&
- (lsap->dlsap_sel == dlsap_sel) &&
- (lsap->pid == pid))
- {
- break;
- }
- lsap = (struct lsap_cb *) hashbin_get_next(irlmp->unconnected_lsaps);
- }
- spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags);
-
- if (lsap)
- irlmp_connless_data_indication(lsap, skb);
- else {
- pr_debug("%s(), found no matching LSAP!\n", __func__);
- }
-}
-#endif /* CONFIG_IRDA_ULTRA */
-
-/*
- * Function irlmp_link_disconnect_indication (reason, userdata)
- *
- * IrLAP has disconnected
- *
- */
-void irlmp_link_disconnect_indication(struct lap_cb *lap,
- struct irlap_cb *irlap,
- LAP_REASON reason,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(lap != NULL, return;);
- IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;);
-
- lap->reason = reason;
- lap->daddr = DEV_ADDR_ANY;
-
- /* FIXME: must do something with the skb if any */
-
- /*
- * Inform station state machine
- */
- irlmp_do_lap_event(lap, LM_LAP_DISCONNECT_INDICATION, NULL);
-}
-
-/*
- * Function irlmp_link_connect_indication (qos)
- *
- * Incoming LAP connection!
- *
- */
-void irlmp_link_connect_indication(struct lap_cb *self, __u32 saddr,
- __u32 daddr, struct qos_info *qos,
- struct sk_buff *skb)
-{
- /* Copy QoS settings for this session */
- self->qos = qos;
-
- /* Update destination device address */
- self->daddr = daddr;
- IRDA_ASSERT(self->saddr == saddr, return;);
-
- irlmp_do_lap_event(self, LM_LAP_CONNECT_INDICATION, skb);
-}
-
-/*
- * Function irlmp_link_connect_confirm (qos)
- *
- * LAP connection confirmed!
- *
- */
-void irlmp_link_connect_confirm(struct lap_cb *self, struct qos_info *qos,
- struct sk_buff *skb)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
- IRDA_ASSERT(qos != NULL, return;);
-
- /* Don't need use the skb for now */
-
- /* Copy QoS settings for this session */
- self->qos = qos;
-
- irlmp_do_lap_event(self, LM_LAP_CONNECT_CONFIRM, NULL);
-}
-
-/*
- * Function irlmp_link_discovery_indication (self, log)
- *
- * Device is discovering us
- *
- * It's not an answer to our own discoveries, just another device trying
- * to perform discovery, but we don't want to miss the opportunity
- * to exploit this information, because :
- * o We may not actively perform discovery (just passive discovery)
- * o This type of discovery is much more reliable. In some cases, it
- * seem that less than 50% of our discoveries get an answer, while
- * we always get ~100% of these.
- * o Make faster discovery, statistically divide time of discovery
- * events by 2 (important for the latency aspect and user feel)
- * o Even is we do active discovery, the other node might not
- * answer our discoveries (ex: Palm). The Palm will just perform
- * one active discovery and connect directly to us.
- *
- * However, when both devices discover each other, they might attempt to
- * connect to each other following the discovery event, and it would create
- * collisions on the medium (SNRM battle).
- * The "fix" for that is to disable all connection requests in IrLAP
- * for 100ms after a discovery indication by setting the media_busy flag.
- * Previously, we used to postpone the event which was quite ugly. Now
- * that IrLAP takes care of this problem, just pass the event up...
- *
- * Jean II
- */
-void irlmp_link_discovery_indication(struct lap_cb *self,
- discovery_t *discovery)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
-
- /* Add to main log, cleanup */
- irlmp_add_discovery(irlmp->cachelog, discovery);
-
- /* Just handle it the same way as a discovery confirm,
- * bypass the LM_LAP state machine (see below) */
- irlmp_discovery_confirm(irlmp->cachelog, DISCOVERY_PASSIVE);
-}
-
-/*
- * Function irlmp_link_discovery_confirm (self, log)
- *
- * Called by IrLAP with a list of discoveries after the discovery
- * request has been carried out. A NULL log is received if IrLAP
- * was unable to carry out the discovery request
- *
- */
-void irlmp_link_discovery_confirm(struct lap_cb *self, hashbin_t *log)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
-
- /* Add to main log, cleanup */
- irlmp_add_discovery_log(irlmp->cachelog, log);
-
- /* Propagate event to various LSAPs registered for it.
- * We bypass the LM_LAP state machine because
- * 1) We do it regardless of the LM_LAP state
- * 2) It doesn't affect the LM_LAP state
- * 3) Faster, slimer, simpler, ...
- * Jean II */
- irlmp_discovery_confirm(irlmp->cachelog, DISCOVERY_ACTIVE);
-}
-
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
-static inline void irlmp_update_cache(struct lap_cb *lap,
- struct lsap_cb *lsap)
-{
- /* Prevent concurrent read to get garbage */
- lap->cache.valid = FALSE;
- /* Update cache entry */
- lap->cache.dlsap_sel = lsap->dlsap_sel;
- lap->cache.slsap_sel = lsap->slsap_sel;
- lap->cache.lsap = lsap;
- lap->cache.valid = TRUE;
-}
-#endif
-
-/*
- * Function irlmp_find_handle (self, dlsap_sel, slsap_sel, status, queue)
- *
- * Find handle associated with destination and source LSAP
- *
- * Any IrDA connection (LSAP/TSAP) is uniquely identified by
- * 3 parameters, the local lsap, the remote lsap and the remote address.
- * We may initiate multiple connections to the same remote service
- * (they will have different local lsap), a remote device may initiate
- * multiple connections to the same local service (they will have
- * different remote lsap), or multiple devices may connect to the same
- * service and may use the same remote lsap (and they will have
- * different remote address).
- * So, where is the remote address ? Each LAP connection is made with
- * a single remote device, so imply a specific remote address.
- * Jean II
- */
-static struct lsap_cb *irlmp_find_lsap(struct lap_cb *self, __u8 dlsap_sel,
- __u8 slsap_sel, int status,
- hashbin_t *queue)
-{
- struct lsap_cb *lsap;
- unsigned long flags;
-
- /*
- * Optimize for the common case. We assume that the last frame
- * received is in the same connection as the last one, so check in
- * cache first to avoid the linear search
- */
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
- if ((self->cache.valid) &&
- (self->cache.slsap_sel == slsap_sel) &&
- (self->cache.dlsap_sel == dlsap_sel))
- {
- return self->cache.lsap;
- }
-#endif
-
- spin_lock_irqsave(&queue->hb_spinlock, flags);
-
- lsap = (struct lsap_cb *) hashbin_get_first(queue);
- while (lsap != NULL) {
- /*
- * If this is an incoming connection, then the destination
- * LSAP selector may have been specified as LM_ANY so that
- * any client can connect. In that case we only need to check
- * if the source LSAP (in our view!) match!
- */
- if ((status == CONNECT_CMD) &&
- (lsap->slsap_sel == slsap_sel) &&
- (lsap->dlsap_sel == LSAP_ANY)) {
- /* This is where the dest lsap sel is set on incoming
- * lsaps */
- lsap->dlsap_sel = dlsap_sel;
- break;
- }
- /*
- * Check if source LSAP and dest LSAP selectors match.
- */
- if ((lsap->slsap_sel == slsap_sel) &&
- (lsap->dlsap_sel == dlsap_sel))
- break;
-
- lsap = (struct lsap_cb *) hashbin_get_next(queue);
- }
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
- if(lsap)
- irlmp_update_cache(self, lsap);
-#endif
- spin_unlock_irqrestore(&queue->hb_spinlock, flags);
-
- /* Return what we've found or NULL */
- return lsap;
-}
diff --git a/net/irda/irmod.c b/net/irda/irmod.c
deleted file mode 100644
index c5e35b85c477..000000000000
--- a/net/irda/irmod.c
+++ /dev/null
@@ -1,199 +0,0 @@
-/*********************************************************************
- *
- * Filename: irmod.c
- * Version: 0.9
- * Description: IrDA stack main entry points
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Mon Dec 15 13:55:39 1997
- * Modified at: Wed Jan 5 15:12:41 2000
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1997, 1999-2000 Dag Brattli, All Rights Reserved.
- * Copyright (c) 2000-2004 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-/*
- * This file contains the main entry points of the IrDA stack.
- * They are in this file and not af_irda.c because some developpers
- * are using the IrDA stack without the socket API (compiling out
- * af_irda.c).
- * Jean II
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irmod.h> /* notify_t */
-#include <net/irda/irlap.h> /* irlap_init */
-#include <net/irda/irlmp.h> /* irlmp_init */
-#include <net/irda/iriap.h> /* iriap_init */
-#include <net/irda/irttp.h> /* irttp_init */
-#include <net/irda/irda_device.h> /* irda_device_init */
-
-/* Packet type handler.
- * Tell the kernel how IrDA packets should be handled.
- */
-static struct packet_type irda_packet_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_IRDA),
- .func = irlap_driver_rcv, /* Packet type handler irlap_frame.c */
-};
-
-/*
- * Function irda_notify_init (notify)
- *
- * Used for initializing the notify structure
- *
- */
-void irda_notify_init(notify_t *notify)
-{
- notify->data_indication = NULL;
- notify->udata_indication = NULL;
- notify->connect_confirm = NULL;
- notify->connect_indication = NULL;
- notify->disconnect_indication = NULL;
- notify->flow_indication = NULL;
- notify->status_indication = NULL;
- notify->instance = NULL;
- strlcpy(notify->name, "Unknown", sizeof(notify->name));
-}
-EXPORT_SYMBOL(irda_notify_init);
-
-/*
- * Function irda_init (void)
- *
- * Protocol stack initialisation entry point.
- * Initialise the various components of the IrDA stack
- */
-static int __init irda_init(void)
-{
- int ret = 0;
-
- /* Lower layer of the stack */
- irlmp_init();
- irlap_init();
-
- /* Driver/dongle support */
- irda_device_init();
-
- /* Higher layers of the stack */
- iriap_init();
- irttp_init();
- ret = irsock_init();
- if (ret < 0)
- goto out_err_1;
-
- /* Add IrDA packet type (Start receiving packets) */
- dev_add_pack(&irda_packet_type);
-
- /* External APIs */
-#ifdef CONFIG_PROC_FS
- irda_proc_register();
-#endif
-#ifdef CONFIG_SYSCTL
- ret = irda_sysctl_register();
- if (ret < 0)
- goto out_err_2;
-#endif
-
- ret = irda_nl_register();
- if (ret < 0)
- goto out_err_3;
-
- return 0;
-
- out_err_3:
-#ifdef CONFIG_SYSCTL
- irda_sysctl_unregister();
- out_err_2:
-#endif
-#ifdef CONFIG_PROC_FS
- irda_proc_unregister();
-#endif
-
- /* Remove IrDA packet type (stop receiving packets) */
- dev_remove_pack(&irda_packet_type);
-
- /* Remove higher layers */
- irsock_cleanup();
- out_err_1:
- irttp_cleanup();
- iriap_cleanup();
-
- /* Remove lower layers */
- irda_device_cleanup();
- irlap_cleanup(); /* Must be done before irlmp_cleanup()! DB */
-
- /* Remove middle layer */
- irlmp_cleanup();
-
-
- return ret;
-}
-
-/*
- * Function irda_cleanup (void)
- *
- * Protocol stack cleanup/removal entry point.
- * Cleanup the various components of the IrDA stack
- */
-static void __exit irda_cleanup(void)
-{
- /* Remove External APIs */
- irda_nl_unregister();
-
-#ifdef CONFIG_SYSCTL
- irda_sysctl_unregister();
-#endif
-#ifdef CONFIG_PROC_FS
- irda_proc_unregister();
-#endif
-
- /* Remove IrDA packet type (stop receiving packets) */
- dev_remove_pack(&irda_packet_type);
-
- /* Remove higher layers */
- irsock_cleanup();
- irttp_cleanup();
- iriap_cleanup();
-
- /* Remove lower layers */
- irda_device_cleanup();
- irlap_cleanup(); /* Must be done before irlmp_cleanup()! DB */
-
- /* Remove middle layer */
- irlmp_cleanup();
-}
-
-/*
- * The IrDA stack must be initialised *before* drivers get initialised,
- * and *before* higher protocols (IrLAN/IrCOMM/IrNET) get initialised,
- * otherwise bad things will happen (hashbins will be NULL for example).
- * Those modules are at module_init()/device_initcall() level.
- *
- * On the other hand, it needs to be initialised *after* the basic
- * networking, the /proc/net filesystem and sysctl module. Those are
- * currently initialised in .../init/main.c (before initcalls).
- * Also, IrDA drivers needs to be initialised *after* the random number
- * generator (main stack and higher layer init don't need it anymore).
- *
- * Jean II
- */
-subsys_initcall(irda_init);
-module_exit(irda_cleanup);
-
-MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no> & Jean Tourrilhes <jt@hpl.hp.com>");
-MODULE_DESCRIPTION("The Linux IrDA Protocol Stack");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_IRDA);
diff --git a/net/irda/irnet/Kconfig b/net/irda/irnet/Kconfig
deleted file mode 100644
index 28c557f0fdd2..000000000000
--- a/net/irda/irnet/Kconfig
+++ /dev/null
@@ -1,13 +0,0 @@
-config IRNET
- tristate "IrNET protocol"
- depends on IRDA && PPP
- help
- Say Y here if you want to build support for the IrNET protocol.
- To compile it as a module, choose M here: the module will be
- called irnet. IrNET is a PPP driver, so you will also need a
- working PPP subsystem (driver, daemon and config)...
-
- IrNET is an alternate way to transfer TCP/IP traffic over IrDA. It
- uses synchronous PPP over a set of point to point IrDA sockets. You
- can use it between Linux machine or with W2k.
-
diff --git a/net/irda/irnet/Makefile b/net/irda/irnet/Makefile
deleted file mode 100644
index 61c365c8a2a0..000000000000
--- a/net/irda/irnet/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for the Linux IrDA IrNET protocol layer.
-#
-
-obj-$(CONFIG_IRNET) += irnet.o
-
-irnet-y := irnet_ppp.o irnet_irda.o
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
deleted file mode 100644
index 9d451f8ed47a..000000000000
--- a/net/irda/irnet/irnet.h
+++ /dev/null
@@ -1,522 +0,0 @@
-/*
- * IrNET protocol module : Synchronous PPP over an IrDA socket.
- *
- * Jean II - HPL `00 - <jt@hpl.hp.com>
- *
- * This file contains definitions and declarations global to the IrNET module,
- * all grouped in one place...
- * This file is a *private* header, so other modules don't want to know
- * what's in there...
- *
- * Note : as most part of the Linux kernel, this module is available
- * under the GNU General Public License (GPL).
- */
-
-#ifndef IRNET_H
-#define IRNET_H
-
-/************************** DOCUMENTATION ***************************/
-/*
- * What is IrNET
- * -------------
- * IrNET is a protocol allowing to carry TCP/IP traffic between two
- * IrDA peers in an efficient fashion. It is a thin layer, passing PPP
- * packets to IrTTP and vice versa. It uses PPP in synchronous mode,
- * because IrTTP offer a reliable sequenced packet service (as opposed
- * to a byte stream). In fact, you could see IrNET as carrying TCP/IP
- * in a IrDA socket, using PPP to provide the glue.
- *
- * The main difference with traditional PPP over IrCOMM is that we
- * avoid the framing and serial emulation which are a performance
- * bottleneck. It also allows multipoint communications in a sensible
- * fashion.
- *
- * The main difference with IrLAN is that we use PPP for the link
- * management, which is more standard, interoperable and flexible than
- * the IrLAN protocol. For example, PPP adds authentication,
- * encryption, compression, header compression and automated routing
- * setup. And, as IrNET let PPP do the hard work, the implementation
- * is much simpler than IrLAN.
- *
- * The Linux implementation
- * ------------------------
- * IrNET is written on top of the Linux-IrDA stack, and interface with
- * the generic Linux PPP driver. Because IrNET depend on recent
- * changes of the PPP driver interface, IrNET will work only with very
- * recent kernel (2.3.99-pre6 and up).
- *
- * The present implementation offer the following features :
- * o simple user interface using pppd
- * o efficient implementation (interface directly to PPP and IrTTP)
- * o addressing (you can specify the name of the IrNET recipient)
- * o multipoint operation (limited by IrLAP specification)
- * o information in /proc/net/irda/irnet
- * o IrNET events on /dev/irnet (for user space daemon)
- * o IrNET daemon (irnetd) to automatically handle incoming requests
- * o Windows 2000 compatibility (tested, but need more work)
- * Currently missing :
- * o Lot's of testing (that's your job)
- * o Connection retries (may be too hard to do)
- * o Check pppd persist mode
- * o User space daemon (to automatically handle incoming requests)
- *
- * The setup is not currently the most easy, but this should get much
- * better when everything will get integrated...
- *
- * Acknowledgements
- * ----------------
- * This module is based on :
- * o The PPP driver (ppp_synctty/ppp_generic) by Paul Mackerras
- * o The IrLAN protocol (irlan_common/XXX) by Dag Brattli
- * o The IrSock interface (af_irda) by Dag Brattli
- * o Some other bits from the kernel and my drivers...
- * Infinite thanks to those brave souls for providing the infrastructure
- * upon which IrNET is built.
- *
- * Thanks to all my colleagues in HP for helping me. In particular,
- * thanks to Salil Pradhan and Bill Serra for W2k testing...
- * Thanks to Luiz Magalhaes for irnetd and much testing...
- *
- * Thanks to Alan Cox for answering lot's of my stupid questions, and
- * to Paul Mackerras answering my questions on how to best integrate
- * IrNET and pppd.
- *
- * Jean II
- *
- * Note on some implementations choices...
- * ------------------------------------
- * 1) Direct interface vs tty/socket
- * I could have used a tty interface to hook to ppp and use the full
- * socket API to connect to IrDA. The code would have been easier to
- * maintain, and maybe the code would have been smaller...
- * Instead, we hook directly to ppp_generic and to IrTTP, which make
- * things more complicated...
- *
- * The first reason is flexibility : this allow us to create IrNET
- * instances on demand (no /dev/ircommX crap) and to allow linkname
- * specification on pppd command line...
- *
- * Second reason is speed optimisation. If you look closely at the
- * transmit and receive paths, you will notice that they are "super lean"
- * (that's why they look ugly), with no function calls and as little data
- * copy and modification as I could...
- *
- * 2) irnetd in user space
- * irnetd is implemented in user space, which is necessary to call pppd.
- * This also give maximum benefits in term of flexibility and customability,
- * and allow to offer the event channel, useful for other stuff like debug.
- *
- * On the other hand, this require a loose coordination between the
- * present module and irnetd. One critical area is how incoming request
- * are handled.
- * When irnet receive an incoming request, it send an event to irnetd and
- * drop the incoming IrNET socket.
- * irnetd start a pppd instance, which create a new IrNET socket. This new
- * socket is then connected in the originating node to the pppd instance.
- * At this point, in the originating node, the first socket is closed.
- *
- * I admit, this is a bit messy and waste some resources. The alternative
- * is caching incoming socket, and that's also quite messy and waste
- * resources.
- * We also make connection time slower. For example, on a 115 kb/s link it
- * adds 60ms to the connection time (770 ms). However, this is slower than
- * the time it takes to fire up pppd on my P133...
- *
- *
- * History :
- * -------
- *
- * v1 - 15.5.00 - Jean II
- * o Basic IrNET (hook to ppp_generic & IrTTP - incl. multipoint)
- * o control channel on /dev/irnet (set name/address)
- * o event channel on /dev/irnet (for user space daemon)
- *
- * v2 - 5.6.00 - Jean II
- * o Enable DROP_NOT_READY to avoid PPP timeouts & other weirdness...
- * o Add DISCONNECT_TO event and rename DISCONNECT_FROM.
- * o Set official device number alloaction on /dev/irnet
- *
- * v3 - 30.8.00 - Jean II
- * o Update to latest Linux-IrDA changes :
- * - queue_t => irda_queue_t
- * o Update to ppp-2.4.0 :
- * - move irda_irnet_connect from PPPIOCATTACH to TIOCSETD
- * o Add EXPIRE event (depend on new IrDA-Linux patch)
- * o Switch from `hashbin_remove' to `hashbin_remove_this' to fix
- * a multilink bug... (depend on new IrDA-Linux patch)
- * o fix a self->daddr to self->raddr in irda_irnet_connect to fix
- * another multilink bug (darn !)
- * o Remove LINKNAME_IOCTL cruft
- *
- * v3b - 31.8.00 - Jean II
- * o Dump discovery log at event channel startup
- *
- * v4 - 28.9.00 - Jean II
- * o Fix interaction between poll/select and dump discovery log
- * o Add IRNET_BLOCKED_LINK event (depend on new IrDA-Linux patch)
- * o Add IRNET_NOANSWER_FROM event (mostly to help support)
- * o Release flow control in disconnect_indication
- * o Block packets while connecting (speed up connections)
- *
- * v5 - 11.01.01 - Jean II
- * o Init self->max_header_size, just in case...
- * o Set up ap->chan.hdrlen, to get zero copy on tx side working.
- * o avoid tx->ttp->flow->ppp->tx->... loop, by checking flow state
- * Thanks to Christian Gennerat for finding this bug !
- * ---
- * o Declare the proper MTU/MRU that we can support
- * (but PPP doesn't read the MTU value :-()
- * o Declare hashbin HB_NOLOCK instead of HB_LOCAL to avoid
- * disabling and enabling irq twice
- *
- * v6 - 31.05.01 - Jean II
- * o Print source address in Found, Discovery, Expiry & Request events
- * o Print requested source address in /proc/net/irnet
- * o Change control channel input. Allow multiple commands in one line.
- * o Add saddr command to change ap->rsaddr (and use that in IrDA)
- * ---
- * o Make the IrDA connection procedure totally asynchronous.
- * Heavy rewrite of the IAS query code and the whole connection
- * procedure. Now, irnet_connect() no longer need to be called from
- * a process context...
- * o Enable IrDA connect retries in ppp_irnet_send(). The good thing
- * is that IrDA connect retries are directly driven by PPP LCP
- * retries (we retry for each LCP packet), so that everything
- * is transparently controlled from pppd lcp-max-configure.
- * o Add ttp_connect flag to prevent rentry on the connect procedure
- * o Test and fixups to eliminate side effects of retries
- *
- * v7 - 22.08.01 - Jean II
- * o Cleanup : Change "saddr = 0x0" to "saddr = DEV_ADDR_ANY"
- * o Fix bug in BLOCK_WHEN_CONNECT introduced in v6 : due to the
- * asynchronous IAS query, self->tsap is NULL when PPP send the
- * first packet. This was preventing "connect-delay 0" to work.
- * Change the test in ppp_irnet_send() to self->ttp_connect.
- *
- * v8 - 1.11.01 - Jean II
- * o Tighten the use of self->ttp_connect and self->ttp_open to
- * prevent various race conditions.
- * o Avoid leaking discovery log and skb
- * o Replace "self" with "server" in irnet_connect_indication() to
- * better detect cut'n'paste error ;-)
- *
- * v9 - 29.11.01 - Jean II
- * o Fix event generation in disconnect indication that I broke in v8
- * It was always generation "No-Answer" because I was testing ttp_open
- * just after clearing it. *blush*.
- * o Use newly created irttp_listen() to fix potential crash when LAP
- * destroyed before irnet module removed.
- *
- * v10 - 4.3.2 - Jean II
- * o When receiving a disconnect indication, don't reenable the
- * PPP Tx queue, this will trigger a reconnect. Instead, close
- * the channel, which will kill pppd...
- *
- * v11 - 20.3.02 - Jean II
- * o Oops ! v10 fix disabled IrNET retries and passive behaviour.
- * Better fix in irnet_disconnect_indication() :
- * - if connected, kill pppd via hangup.
- * - if not connected, reenable ppp Tx, which trigger IrNET retry.
- *
- * v12 - 10.4.02 - Jean II
- * o Fix race condition in irnet_connect_indication().
- * If the socket was already trying to connect, drop old connection
- * and use new one only if acting as primary. See comments.
- *
- * v13 - 30.5.02 - Jean II
- * o Update module init code
- *
- * v14 - 20.2.03 - Jean II
- * o Add discovery hint bits in the control channel.
- * o Remove obsolete MOD_INC/DEC_USE_COUNT in favor of .owner
- *
- * v15 - 7.4.03 - Jean II
- * o Replace spin_lock_irqsave() with spin_lock_bh() so that we can
- * use ppp_unit_number(). It's probably also better overall...
- * o Disable call to ppp_unregister_channel(), because we can't do it.
- */
-
-/***************************** INCLUDES *****************************/
-
-#include <linux/module.h>
-
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/tty.h>
-#include <linux/proc_fs.h>
-#include <linux/netdevice.h>
-#include <linux/poll.h>
-#include <linux/capability.h>
-#include <linux/ctype.h> /* isspace() */
-#include <linux/string.h> /* skip_spaces() */
-#include <linux/uaccess.h>
-#include <linux/init.h>
-
-#include <linux/ppp_defs.h>
-#include <linux/ppp-ioctl.h>
-#include <linux/ppp_channel.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irias_object.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irttp.h>
-#include <net/irda/discovery.h>
-
-/***************************** OPTIONS *****************************/
-/*
- * Define or undefine to compile or not some optional part of the
- * IrNET driver...
- * Note : the present defaults make sense, play with that at your
- * own risk...
- */
-/* IrDA side of the business... */
-#define DISCOVERY_NOMASK /* To enable W2k compatibility... */
-#define ADVERTISE_HINT /* Advertise IrLAN hint bit */
-#define ALLOW_SIMULT_CONNECT /* This seem to work, cross fingers... */
-#define DISCOVERY_EVENTS /* Query the discovery log to post events */
-#define INITIAL_DISCOVERY /* Dump current discovery log as events */
-#undef STREAM_COMPAT /* Not needed - potentially messy */
-#undef CONNECT_INDIC_KICK /* Might mess IrDA, not needed */
-#undef FAIL_SEND_DISCONNECT /* Might mess IrDA, not needed */
-#undef PASS_CONNECT_PACKETS /* Not needed ? Safe */
-#undef MISSING_PPP_API /* Stuff I wish I could do */
-
-/* PPP side of the business */
-#define BLOCK_WHEN_CONNECT /* Block packets when connecting */
-#define CONNECT_IN_SEND /* Retry IrDA connection procedure */
-#undef FLUSH_TO_PPP /* Not sure about this one, let's play safe */
-#undef SECURE_DEVIRNET /* Bah... */
-
-/****************************** DEBUG ******************************/
-
-/*
- * This set of flags enable and disable all the various warning,
- * error and debug message of this driver.
- * Each section can be enabled and disabled independently
- */
-/* In the PPP part */
-#define DEBUG_CTRL_TRACE 0 /* Control channel */
-#define DEBUG_CTRL_INFO 0 /* various info */
-#define DEBUG_CTRL_ERROR 1 /* problems */
-#define DEBUG_FS_TRACE 0 /* filesystem callbacks */
-#define DEBUG_FS_INFO 0 /* various info */
-#define DEBUG_FS_ERROR 1 /* problems */
-#define DEBUG_PPP_TRACE 0 /* PPP related functions */
-#define DEBUG_PPP_INFO 0 /* various info */
-#define DEBUG_PPP_ERROR 1 /* problems */
-#define DEBUG_MODULE_TRACE 0 /* module insertion/removal */
-#define DEBUG_MODULE_ERROR 1 /* problems */
-
-/* In the IrDA part */
-#define DEBUG_IRDA_SR_TRACE 0 /* IRDA subroutines */
-#define DEBUG_IRDA_SR_INFO 0 /* various info */
-#define DEBUG_IRDA_SR_ERROR 1 /* problems */
-#define DEBUG_IRDA_SOCK_TRACE 0 /* IRDA main socket functions */
-#define DEBUG_IRDA_SOCK_INFO 0 /* various info */
-#define DEBUG_IRDA_SOCK_ERROR 1 /* problems */
-#define DEBUG_IRDA_SERV_TRACE 0 /* The IrNET server */
-#define DEBUG_IRDA_SERV_INFO 0 /* various info */
-#define DEBUG_IRDA_SERV_ERROR 1 /* problems */
-#define DEBUG_IRDA_TCB_TRACE 0 /* IRDA IrTTP callbacks */
-#define DEBUG_IRDA_CB_INFO 0 /* various info */
-#define DEBUG_IRDA_CB_ERROR 1 /* problems */
-#define DEBUG_IRDA_OCB_TRACE 0 /* IRDA other callbacks */
-#define DEBUG_IRDA_OCB_INFO 0 /* various info */
-#define DEBUG_IRDA_OCB_ERROR 1 /* problems */
-
-#define DEBUG_ASSERT 0 /* Verify all assertions */
-
-/*
- * These are the macros we are using to actually print the debug
- * statements. Don't look at it, it's ugly...
- *
- * One of the trick is that, as the DEBUG_XXX are constant, the
- * compiler will optimise away the if() in all cases.
- */
-/* All error messages (will show up in the normal logs) */
-#define DERROR(dbg, format, args...) \
- {if(DEBUG_##dbg) \
- printk(KERN_INFO "irnet: %s(): " format, __func__ , ##args);}
-
-/* Normal debug message (will show up in /var/log/debug) */
-#define DEBUG(dbg, format, args...) \
- {if(DEBUG_##dbg) \
- printk(KERN_DEBUG "irnet: %s(): " format, __func__ , ##args);}
-
-/* Entering a function (trace) */
-#define DENTER(dbg, format, args...) \
- {if(DEBUG_##dbg) \
- printk(KERN_DEBUG "irnet: -> %s" format, __func__ , ##args);}
-
-/* Entering and exiting a function in one go (trace) */
-#define DPASS(dbg, format, args...) \
- {if(DEBUG_##dbg) \
- printk(KERN_DEBUG "irnet: <>%s" format, __func__ , ##args);}
-
-/* Exiting a function (trace) */
-#define DEXIT(dbg, format, args...) \
- {if(DEBUG_##dbg) \
- printk(KERN_DEBUG "irnet: <-%s()" format, __func__ , ##args);}
-
-/* Exit a function with debug */
-#define DRETURN(ret, dbg, args...) \
- {DEXIT(dbg, ": " args);\
- return ret; }
-
-/* Exit a function on failed condition */
-#define DABORT(cond, ret, dbg, args...) \
- {if(cond) {\
- DERROR(dbg, args);\
- return ret; }}
-
-/* Invalid assertion, print out an error and exit... */
-#define DASSERT(cond, ret, dbg, args...) \
- {if((DEBUG_ASSERT) && !(cond)) {\
- DERROR(dbg, "Invalid assertion: " args);\
- return ret; }}
-
-/************************ CONSTANTS & MACROS ************************/
-
-/* Paranoia */
-#define IRNET_MAGIC 0xB00754
-
-/* Number of control events in the control channel buffer... */
-#define IRNET_MAX_EVENTS 8 /* Should be more than enough... */
-
-/****************************** TYPES ******************************/
-
-/*
- * This is the main structure where we store all the data pertaining to
- * one instance of irnet.
- * Note : in irnet functions, a pointer this structure is usually called
- * "ap" or "self". If the code is borrowed from the IrDA stack, it tend
- * to be called "self", and if it is borrowed from the PPP driver it is
- * "ap". Apart from that, it's exactly the same structure ;-)
- */
-typedef struct irnet_socket
-{
- /* ------------------- Instance management ------------------- */
- /* We manage a linked list of IrNET socket instances */
- irda_queue_t q; /* Must be first - for hasbin */
- int magic; /* Paranoia */
-
- /* --------------------- FileSystem part --------------------- */
- /* "pppd" interact directly with us on a /dev/ file */
- struct file * file; /* File descriptor of this instance */
- /* TTY stuff - to keep "pppd" happy */
- struct ktermios termios; /* Various tty flags */
- /* Stuff for the control channel */
- int event_index; /* Last read in the event log */
-
- /* ------------------------- PPP part ------------------------- */
- /* We interface directly to the ppp_generic driver in the kernel */
- int ppp_open; /* registered with ppp_generic */
- struct ppp_channel chan; /* Interface to generic ppp layer */
-
- int mru; /* Max size of PPP payload */
- u32 xaccm[8]; /* Asynchronous character map (just */
- u32 raccm; /* to please pppd - dummy) */
- unsigned int flags; /* PPP flags (compression, ...) */
- unsigned int rbits; /* Unused receive flags ??? */
- struct work_struct disconnect_work; /* Process context disconnection */
- /* ------------------------ IrTTP part ------------------------ */
- /* We create a pseudo "socket" over the IrDA tranport */
- unsigned long ttp_open; /* Set when IrTTP is ready */
- unsigned long ttp_connect; /* Set when IrTTP is connecting */
- struct tsap_cb * tsap; /* IrTTP instance (the connection) */
-
- char rname[NICKNAME_MAX_LEN + 1];
- /* IrDA nickname of destination */
- __u32 rdaddr; /* Requested peer IrDA address */
- __u32 rsaddr; /* Requested local IrDA address */
- __u32 daddr; /* actual peer IrDA address */
- __u32 saddr; /* my local IrDA address */
- __u8 dtsap_sel; /* Remote TSAP selector */
- __u8 stsap_sel; /* Local TSAP selector */
-
- __u32 max_sdu_size_rx;/* Socket parameters used for IrTTP */
- __u32 max_sdu_size_tx;
- __u32 max_data_size;
- __u8 max_header_size;
- LOCAL_FLOW tx_flow; /* State of the Tx path in IrTTP */
-
- /* ------------------- IrLMP and IrIAS part ------------------- */
- /* Used for IrDA Discovery and socket name resolution */
- void * ckey; /* IrLMP client handle */
- __u16 mask; /* Hint bits mask (filter discov.)*/
- int nslots; /* Number of slots for discovery */
-
- struct iriap_cb * iriap; /* Used to query remote IAS */
- int errno; /* status of the IAS query */
-
- /* -------------------- Discovery log part -------------------- */
- /* Used by initial discovery on the control channel
- * and by irnet_discover_daddr_and_lsap_sel() */
- struct irda_device_info *discoveries; /* Copy of the discovery log */
- int disco_index; /* Last read in the discovery log */
- int disco_number; /* Size of the discovery log */
-
- struct mutex lock;
-
-} irnet_socket;
-
-/*
- * This is the various event that we will generate on the control channel
- */
-typedef enum irnet_event
-{
- IRNET_DISCOVER, /* New IrNET node discovered */
- IRNET_EXPIRE, /* IrNET node expired */
- IRNET_CONNECT_TO, /* IrNET socket has connected to other node */
- IRNET_CONNECT_FROM, /* Other node has connected to IrNET socket */
- IRNET_REQUEST_FROM, /* Non satisfied connection request */
- IRNET_NOANSWER_FROM, /* Failed connection request */
- IRNET_BLOCKED_LINK, /* Link (IrLAP) is blocked for > 3s */
- IRNET_DISCONNECT_FROM, /* IrNET socket has disconnected */
- IRNET_DISCONNECT_TO /* Closing IrNET socket */
-} irnet_event;
-
-/*
- * This is the storage for an event and its arguments
- */
-typedef struct irnet_log
-{
- irnet_event event;
- int unit;
- __u32 saddr;
- __u32 daddr;
- char name[NICKNAME_MAX_LEN + 1]; /* 21 + 1 */
- __u16_host_order hints; /* Discovery hint bits */
-} irnet_log;
-
-/*
- * This is the storage for all events and related stuff...
- */
-typedef struct irnet_ctrl_channel
-{
- irnet_log log[IRNET_MAX_EVENTS]; /* Event log */
- int index; /* Current index in log */
- spinlock_t spinlock; /* Serialize access to the event log */
- wait_queue_head_t rwait; /* processes blocked on read (or poll) */
-} irnet_ctrl_channel;
-
-/**************************** PROTOTYPES ****************************/
-/*
- * Global functions of the IrNET module
- * Note : we list here also functions called from one file to the other.
- */
-
-/* -------------------------- IRDA PART -------------------------- */
-int irda_irnet_create(irnet_socket *); /* Initialise an IrNET socket */
-int irda_irnet_connect(irnet_socket *); /* Try to connect over IrDA */
-void irda_irnet_destroy(irnet_socket *); /* Teardown an IrNET socket */
-int irda_irnet_init(void); /* Initialise IrDA part of IrNET */
-void irda_irnet_cleanup(void); /* Teardown IrDA part of IrNET */
-
-/**************************** VARIABLES ****************************/
-
-/* Control channel stuff - allocated in irnet_irda.h */
-extern struct irnet_ctrl_channel irnet_events;
-
-#endif /* IRNET_H */
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
deleted file mode 100644
index e390bceeb2f8..000000000000
--- a/net/irda/irnet/irnet_irda.c
+++ /dev/null
@@ -1,1885 +0,0 @@
-/*
- * IrNET protocol module : Synchronous PPP over an IrDA socket.
- *
- * Jean II - HPL `00 - <jt@hpl.hp.com>
- *
- * This file implement the IRDA interface of IrNET.
- * Basically, we sit on top of IrTTP. We set up IrTTP, IrIAS properly,
- * and exchange frames with IrTTP.
- */
-
-#include "irnet_irda.h" /* Private header */
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <asm/unaligned.h>
-
-/*
- * PPP disconnect work: we need to make sure we're in
- * process context when calling ppp_unregister_channel().
- */
-static void irnet_ppp_disconnect(struct work_struct *work)
-{
- irnet_socket * self =
- container_of(work, irnet_socket, disconnect_work);
-
- if (self == NULL)
- return;
- /*
- * If we were connected, cleanup & close the PPP
- * channel, which will kill pppd (hangup) and the rest.
- */
- if (self->ppp_open && !self->ttp_open && !self->ttp_connect) {
- ppp_unregister_channel(&self->chan);
- self->ppp_open = 0;
- }
-}
-
-/************************* CONTROL CHANNEL *************************/
-/*
- * When ppp is not active, /dev/irnet act as a control channel.
- * Writing allow to set up the IrDA destination of the IrNET channel,
- * and any application may be read events happening on IrNET...
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Post an event to the control channel...
- * Put the event in the log, and then wait all process blocked on read
- * so they can read the log...
- */
-static void
-irnet_post_event(irnet_socket * ap,
- irnet_event event,
- __u32 saddr,
- __u32 daddr,
- char * name,
- __u16 hints)
-{
- int index; /* In the log */
-
- DENTER(CTRL_TRACE, "(ap=0x%p, event=%d, daddr=%08x, name=``%s'')\n",
- ap, event, daddr, name);
-
- /* Protect this section via spinlock.
- * Note : as we are the only event producer, we only need to exclude
- * ourself when touching the log, which is nice and easy.
- */
- spin_lock_bh(&irnet_events.spinlock);
-
- /* Copy the event in the log */
- index = irnet_events.index;
- irnet_events.log[index].event = event;
- irnet_events.log[index].daddr = daddr;
- irnet_events.log[index].saddr = saddr;
- /* Try to copy IrDA nickname */
- if(name)
- strcpy(irnet_events.log[index].name, name);
- else
- irnet_events.log[index].name[0] = '\0';
- /* Copy hints */
- irnet_events.log[index].hints.word = hints;
- /* Try to get ppp unit number */
- if((ap != (irnet_socket *) NULL) && (ap->ppp_open))
- irnet_events.log[index].unit = ppp_unit_number(&ap->chan);
- else
- irnet_events.log[index].unit = -1;
-
- /* Increment the index
- * Note that we increment the index only after the event is written,
- * to make sure that the readers don't get garbage... */
- irnet_events.index = (index + 1) % IRNET_MAX_EVENTS;
-
- DEBUG(CTRL_INFO, "New event index is %d\n", irnet_events.index);
-
- /* Spin lock end */
- spin_unlock_bh(&irnet_events.spinlock);
-
- /* Now : wake up everybody waiting for events... */
- wake_up_interruptible_all(&irnet_events.rwait);
-
- DEXIT(CTRL_TRACE, "\n");
-}
-
-/************************* IRDA SUBROUTINES *************************/
-/*
- * These are a bunch of subroutines called from other functions
- * down there, mostly common code or to improve readability...
- *
- * Note : we duplicate quite heavily some routines of af_irda.c,
- * because our input structure (self) is quite different
- * (struct irnet instead of struct irda_sock), which make sharing
- * the same code impossible (at least, without templates).
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Function irda_open_tsap (self)
- *
- * Open local Transport Service Access Point (TSAP)
- *
- * Create a IrTTP instance for us and set all the IrTTP callbacks.
- */
-static inline int
-irnet_open_tsap(irnet_socket * self)
-{
- notify_t notify; /* Callback structure */
-
- DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
-
- DABORT(self->tsap != NULL, -EBUSY, IRDA_SR_ERROR, "Already busy !\n");
-
- /* Initialize IrTTP callbacks to be used by the IrDA stack */
- irda_notify_init(&notify);
- notify.connect_confirm = irnet_connect_confirm;
- notify.connect_indication = irnet_connect_indication;
- notify.disconnect_indication = irnet_disconnect_indication;
- notify.data_indication = irnet_data_indication;
- /*notify.udata_indication = NULL;*/
- notify.flow_indication = irnet_flow_indication;
- notify.status_indication = irnet_status_indication;
- notify.instance = self;
- strlcpy(notify.name, IRNET_NOTIFY_NAME, sizeof(notify.name));
-
- /* Open an IrTTP instance */
- self->tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT,
- &notify);
- DABORT(self->tsap == NULL, -ENOMEM,
- IRDA_SR_ERROR, "Unable to allocate TSAP !\n");
-
- /* Remember which TSAP selector we actually got */
- self->stsap_sel = self->tsap->stsap_sel;
-
- DEXIT(IRDA_SR_TRACE, " - tsap=0x%p, sel=0x%X\n",
- self->tsap, self->stsap_sel);
- return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_ias_to_tsap (self, result, value)
- *
- * Examine an IAS object and extract TSAP
- *
- * We do an IAP query to find the TSAP associated with the IrNET service.
- * When IrIAP pass us the result of the query, this function look at
- * the return values to check for failures and extract the TSAP if
- * possible.
- * Also deallocate value
- * The failure is in self->errno
- * Return TSAP or -1
- */
-static inline __u8
-irnet_ias_to_tsap(irnet_socket * self,
- int result,
- struct ias_value * value)
-{
- __u8 dtsap_sel = 0; /* TSAP we are looking for */
-
- DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
-
- /* By default, no error */
- self->errno = 0;
-
- /* Check if request succeeded */
- switch(result)
- {
- /* Standard errors : service not available */
- case IAS_CLASS_UNKNOWN:
- case IAS_ATTRIB_UNKNOWN:
- DEBUG(IRDA_SR_INFO, "IAS object doesn't exist ! (%d)\n", result);
- self->errno = -EADDRNOTAVAIL;
- break;
-
- /* Other errors, most likely IrDA stack failure */
- default :
- DEBUG(IRDA_SR_INFO, "IAS query failed ! (%d)\n", result);
- self->errno = -EHOSTUNREACH;
- break;
-
- /* Success : we got what we wanted */
- case IAS_SUCCESS:
- break;
- }
-
- /* Check what was returned to us */
- if(value != NULL)
- {
- /* What type of argument have we got ? */
- switch(value->type)
- {
- case IAS_INTEGER:
- DEBUG(IRDA_SR_INFO, "result=%d\n", value->t.integer);
- if(value->t.integer != -1)
- /* Get the remote TSAP selector */
- dtsap_sel = value->t.integer;
- else
- self->errno = -EADDRNOTAVAIL;
- break;
- default:
- self->errno = -EADDRNOTAVAIL;
- DERROR(IRDA_SR_ERROR, "bad type ! (0x%X)\n", value->type);
- break;
- }
-
- /* Cleanup */
- irias_delete_value(value);
- }
- else /* value == NULL */
- {
- /* Nothing returned to us - usually result != SUCCESS */
- if(!(self->errno))
- {
- DERROR(IRDA_SR_ERROR,
- "IrDA bug : result == SUCCESS && value == NULL\n");
- self->errno = -EHOSTUNREACH;
- }
- }
- DEXIT(IRDA_SR_TRACE, "\n");
-
- /* Return the TSAP */
- return dtsap_sel;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_find_lsap_sel (self)
- *
- * Try to lookup LSAP selector in remote LM-IAS
- *
- * Basically, we start a IAP query, and then go to sleep. When the query
- * return, irnet_getvalue_confirm will wake us up, and we can examine the
- * result of the query...
- * Note that in some case, the query fail even before we go to sleep,
- * creating some races...
- */
-static inline int
-irnet_find_lsap_sel(irnet_socket * self)
-{
- DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
-
- /* This should not happen */
- DABORT(self->iriap, -EBUSY, IRDA_SR_ERROR, "busy with a previous query.\n");
-
- /* Create an IAP instance, will be closed in irnet_getvalue_confirm() */
- self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
- irnet_getvalue_confirm);
-
- /* Treat unexpected signals as disconnect */
- self->errno = -EHOSTUNREACH;
-
- /* Query remote LM-IAS */
- iriap_getvaluebyclass_request(self->iriap, self->rsaddr, self->daddr,
- IRNET_SERVICE_NAME, IRNET_IAS_VALUE);
-
- /* The above request is non-blocking.
- * After a while, IrDA will call us back in irnet_getvalue_confirm()
- * We will then call irnet_ias_to_tsap() and finish the
- * connection procedure */
-
- DEXIT(IRDA_SR_TRACE, "\n");
- return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_connect_tsap (self)
- *
- * Initialise the TTP socket and initiate TTP connection
- *
- */
-static inline int
-irnet_connect_tsap(irnet_socket * self)
-{
- int err;
-
- DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
-
- /* Open a local TSAP (an IrTTP instance) */
- err = irnet_open_tsap(self);
- if(err != 0)
- {
- clear_bit(0, &self->ttp_connect);
- DERROR(IRDA_SR_ERROR, "connect aborted!\n");
- return err;
- }
-
- /* Connect to remote device */
- err = irttp_connect_request(self->tsap, self->dtsap_sel,
- self->rsaddr, self->daddr, NULL,
- self->max_sdu_size_rx, NULL);
- if(err != 0)
- {
- clear_bit(0, &self->ttp_connect);
- DERROR(IRDA_SR_ERROR, "connect aborted!\n");
- return err;
- }
-
- /* The above call is non-blocking.
- * After a while, the IrDA stack will either call us back in
- * irnet_connect_confirm() or irnet_disconnect_indication()
- * See you there ;-) */
-
- DEXIT(IRDA_SR_TRACE, "\n");
- return err;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_discover_next_daddr (self)
- *
- * Query the IrNET TSAP of the next device in the log.
- *
- * Used in the TSAP discovery procedure.
- */
-static inline int
-irnet_discover_next_daddr(irnet_socket * self)
-{
- /* Close the last instance of IrIAP, and open a new one.
- * We can't reuse the IrIAP instance in the IrIAP callback */
- if(self->iriap)
- {
- iriap_close(self->iriap);
- self->iriap = NULL;
- }
- /* Create a new IAP instance */
- self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
- irnet_discovervalue_confirm);
- if(self->iriap == NULL)
- return -ENOMEM;
-
- /* Next discovery - before the call to avoid races */
- self->disco_index++;
-
- /* Check if we have one more address to try */
- if(self->disco_index < self->disco_number)
- {
- /* Query remote LM-IAS */
- iriap_getvaluebyclass_request(self->iriap,
- self->discoveries[self->disco_index].saddr,
- self->discoveries[self->disco_index].daddr,
- IRNET_SERVICE_NAME, IRNET_IAS_VALUE);
- /* The above request is non-blocking.
- * After a while, IrDA will call us back in irnet_discovervalue_confirm()
- * We will then call irnet_ias_to_tsap() and come back here again... */
- return 0;
- }
- else
- return 1;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_discover_daddr_and_lsap_sel (self)
- *
- * This try to find a device with the requested service.
- *
- * Initiate a TSAP discovery procedure.
- * It basically look into the discovery log. For each address in the list,
- * it queries the LM-IAS of the device to find if this device offer
- * the requested service.
- * If there is more than one node supporting the service, we complain
- * to the user (it should move devices around).
- * If we find one node which have the requested TSAP, we connect to it.
- *
- * This function just start the whole procedure. It request the discovery
- * log and submit the first IAS query.
- * The bulk of the job is handled in irnet_discovervalue_confirm()
- *
- * Note : this procedure fails if there is more than one device in range
- * on the same dongle, because IrLMP doesn't disconnect the LAP when the
- * last LSAP is closed. Moreover, we would need to wait the LAP
- * disconnection...
- */
-static inline int
-irnet_discover_daddr_and_lsap_sel(irnet_socket * self)
-{
- int ret;
-
- DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
-
- /* Ask lmp for the current discovery log */
- self->discoveries = irlmp_get_discoveries(&self->disco_number, self->mask,
- DISCOVERY_DEFAULT_SLOTS);
-
- /* Check if the we got some results */
- if(self->discoveries == NULL)
- {
- self->disco_number = -1;
- clear_bit(0, &self->ttp_connect);
- DRETURN(-ENETUNREACH, IRDA_SR_INFO, "No Cachelog...\n");
- }
- DEBUG(IRDA_SR_INFO, "Got the log (0x%p), size is %d\n",
- self->discoveries, self->disco_number);
-
- /* Start with the first discovery */
- self->disco_index = -1;
- self->daddr = DEV_ADDR_ANY;
-
- /* This will fail if the log is empty - this is non-blocking */
- ret = irnet_discover_next_daddr(self);
- if(ret)
- {
- /* Close IAP */
- if(self->iriap)
- iriap_close(self->iriap);
- self->iriap = NULL;
-
- /* Cleanup our copy of the discovery log */
- kfree(self->discoveries);
- self->discoveries = NULL;
-
- clear_bit(0, &self->ttp_connect);
- DRETURN(-ENETUNREACH, IRDA_SR_INFO, "Cachelog empty...\n");
- }
-
- /* Follow me in irnet_discovervalue_confirm() */
-
- DEXIT(IRDA_SR_TRACE, "\n");
- return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_dname_to_daddr (self)
- *
- * Convert an IrDA nickname to a valid IrDA address
- *
- * It basically look into the discovery log until there is a match.
- */
-static inline int
-irnet_dname_to_daddr(irnet_socket * self)
-{
- struct irda_device_info *discoveries; /* Copy of the discovery log */
- int number; /* Number of nodes in the log */
- int i;
-
- DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
-
- /* Ask lmp for the current discovery log */
- discoveries = irlmp_get_discoveries(&number, 0xffff,
- DISCOVERY_DEFAULT_SLOTS);
- /* Check if the we got some results */
- if(discoveries == NULL)
- DRETURN(-ENETUNREACH, IRDA_SR_INFO, "Cachelog empty...\n");
-
- /*
- * Now, check all discovered devices (if any), and connect
- * client only about the services that the client is
- * interested in...
- */
- for(i = 0; i < number; i++)
- {
- /* Does the name match ? */
- if(!strncmp(discoveries[i].info, self->rname, NICKNAME_MAX_LEN))
- {
- /* Yes !!! Get it.. */
- self->daddr = discoveries[i].daddr;
- DEBUG(IRDA_SR_INFO, "discovered device ``%s'' at address 0x%08x.\n",
- self->rname, self->daddr);
- kfree(discoveries);
- DEXIT(IRDA_SR_TRACE, "\n");
- return 0;
- }
- }
- /* No luck ! */
- DEBUG(IRDA_SR_INFO, "cannot discover device ``%s'' !!!\n", self->rname);
- kfree(discoveries);
- return -EADDRNOTAVAIL;
-}
-
-
-/************************* SOCKET ROUTINES *************************/
-/*
- * This are the main operations on IrNET sockets, basically to create
- * and destroy IrNET sockets. These are called from the PPP part...
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Create a IrNET instance : just initialise some parameters...
- */
-int
-irda_irnet_create(irnet_socket * self)
-{
- DENTER(IRDA_SOCK_TRACE, "(self=0x%p)\n", self);
-
- self->magic = IRNET_MAGIC; /* Paranoia */
-
- self->ttp_open = 0; /* Prevent higher layer from accessing IrTTP */
- self->ttp_connect = 0; /* Not connecting yet */
- self->rname[0] = '\0'; /* May be set via control channel */
- self->rdaddr = DEV_ADDR_ANY; /* May be set via control channel */
- self->rsaddr = DEV_ADDR_ANY; /* May be set via control channel */
- self->daddr = DEV_ADDR_ANY; /* Until we get connected */
- self->saddr = DEV_ADDR_ANY; /* Until we get connected */
- self->max_sdu_size_rx = TTP_SAR_UNBOUND;
-
- /* Register as a client with IrLMP */
- self->ckey = irlmp_register_client(0, NULL, NULL, NULL);
-#ifdef DISCOVERY_NOMASK
- self->mask = 0xffff; /* For W2k compatibility */
-#else /* DISCOVERY_NOMASK */
- self->mask = irlmp_service_to_hint(S_LAN);
-#endif /* DISCOVERY_NOMASK */
- self->tx_flow = FLOW_START; /* Flow control from IrTTP */
-
- INIT_WORK(&self->disconnect_work, irnet_ppp_disconnect);
-
- DEXIT(IRDA_SOCK_TRACE, "\n");
- return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Connect to the other side :
- * o convert device name to an address
- * o find the socket number (dlsap)
- * o Establish the connection
- *
- * Note : We no longer mimic af_irda. The IAS query for finding the TSAP
- * is done asynchronously, like the TTP connection. This allow us to
- * call this function from any context (not only process).
- * The downside is that following what's happening in there is tricky
- * because it involve various functions all over the place...
- */
-int
-irda_irnet_connect(irnet_socket * self)
-{
- int err;
-
- DENTER(IRDA_SOCK_TRACE, "(self=0x%p)\n", self);
-
- /* Check if we are already trying to connect.
- * Because irda_irnet_connect() can be called directly by pppd plus
- * packet retries in ppp_generic and connect may take time, plus we may
- * race with irnet_connect_indication(), we need to be careful there... */
- if(test_and_set_bit(0, &self->ttp_connect))
- DRETURN(-EBUSY, IRDA_SOCK_INFO, "Already connecting...\n");
- if((self->iriap != NULL) || (self->tsap != NULL))
- DERROR(IRDA_SOCK_ERROR, "Socket not cleaned up...\n");
-
- /* Insert ourselves in the hashbin so that the IrNET server can find us.
- * Notes : 4th arg is string of 32 char max and must be null terminated
- * When 4th arg is used (string), 3rd arg isn't (int)
- * Can't re-insert (MUST remove first) so check for that... */
- if((irnet_server.running) && (self->q.q_next == NULL))
- {
- spin_lock_bh(&irnet_server.spinlock);
- hashbin_insert(irnet_server.list, (irda_queue_t *) self, 0, self->rname);
- spin_unlock_bh(&irnet_server.spinlock);
- DEBUG(IRDA_SOCK_INFO, "Inserted ``%s'' in hashbin...\n", self->rname);
- }
-
- /* If we don't have anything (no address, no name) */
- if((self->rdaddr == DEV_ADDR_ANY) && (self->rname[0] == '\0'))
- {
- /* Try to find a suitable address */
- if((err = irnet_discover_daddr_and_lsap_sel(self)) != 0)
- DRETURN(err, IRDA_SOCK_INFO, "auto-connect failed!\n");
- /* In most cases, the call above is non-blocking */
- }
- else
- {
- /* If we have only the name (no address), try to get an address */
- if(self->rdaddr == DEV_ADDR_ANY)
- {
- if((err = irnet_dname_to_daddr(self)) != 0)
- DRETURN(err, IRDA_SOCK_INFO, "name connect failed!\n");
- }
- else
- /* Use the requested destination address */
- self->daddr = self->rdaddr;
-
- /* Query remote LM-IAS to find LSAP selector */
- irnet_find_lsap_sel(self);
- /* The above call is non blocking */
- }
-
- /* At this point, we are waiting for the IrDA stack to call us back,
- * or we have already failed.
- * We will finish the connection procedure in irnet_connect_tsap().
- */
- DEXIT(IRDA_SOCK_TRACE, "\n");
- return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irda_irnet_destroy(self)
- *
- * Destroy irnet instance
- *
- * Note : this need to be called from a process context.
- */
-void
-irda_irnet_destroy(irnet_socket * self)
-{
- DENTER(IRDA_SOCK_TRACE, "(self=0x%p)\n", self);
- if(self == NULL)
- return;
-
- /* Remove ourselves from hashbin (if we are queued in hashbin)
- * Note : `irnet_server.running' protect us from calls in hashbin_delete() */
- if((irnet_server.running) && (self->q.q_next != NULL))
- {
- struct irnet_socket * entry;
- DEBUG(IRDA_SOCK_INFO, "Removing from hash..\n");
- spin_lock_bh(&irnet_server.spinlock);
- entry = hashbin_remove_this(irnet_server.list, (irda_queue_t *) self);
- self->q.q_next = NULL;
- spin_unlock_bh(&irnet_server.spinlock);
- DASSERT(entry == self, , IRDA_SOCK_ERROR, "Can't remove from hash.\n");
- }
-
- /* If we were connected, post a message */
- if(test_bit(0, &self->ttp_open))
- {
- /* Note : as the disconnect comes from ppp_generic, the unit number
- * doesn't exist anymore when we post the event, so we need to pass
- * NULL as the first arg... */
- irnet_post_event(NULL, IRNET_DISCONNECT_TO,
- self->saddr, self->daddr, self->rname, 0);
- }
-
- /* Prevent various IrDA callbacks from messing up things
- * Need to be first */
- clear_bit(0, &self->ttp_connect);
-
- /* Prevent higher layer from accessing IrTTP */
- clear_bit(0, &self->ttp_open);
-
- /* Unregister with IrLMP */
- irlmp_unregister_client(self->ckey);
-
- /* Unregister with LM-IAS */
- if(self->iriap)
- {
- iriap_close(self->iriap);
- self->iriap = NULL;
- }
-
- /* Cleanup eventual discoveries from connection attempt or control channel */
- if(self->discoveries != NULL)
- {
- /* Cleanup our copy of the discovery log */
- kfree(self->discoveries);
- self->discoveries = NULL;
- }
-
- /* Close our IrTTP connection */
- if(self->tsap)
- {
- DEBUG(IRDA_SOCK_INFO, "Closing our TTP connection.\n");
- irttp_disconnect_request(self->tsap, NULL, P_NORMAL);
- irttp_close_tsap(self->tsap);
- self->tsap = NULL;
- }
- self->stsap_sel = 0;
-
- DEXIT(IRDA_SOCK_TRACE, "\n");
-}
-
-
-/************************** SERVER SOCKET **************************/
-/*
- * The IrNET service is composed of one server socket and a variable
- * number of regular IrNET sockets. The server socket is supposed to
- * handle incoming connections and redirect them to one IrNET sockets.
- * It's a superset of the regular IrNET socket, but has a very distinct
- * behaviour...
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_daddr_to_dname (self)
- *
- * Convert an IrDA address to a IrDA nickname
- *
- * It basically look into the discovery log until there is a match.
- */
-static inline int
-irnet_daddr_to_dname(irnet_socket * self)
-{
- struct irda_device_info *discoveries; /* Copy of the discovery log */
- int number; /* Number of nodes in the log */
- int i;
-
- DENTER(IRDA_SERV_TRACE, "(self=0x%p)\n", self);
-
- /* Ask lmp for the current discovery log */
- discoveries = irlmp_get_discoveries(&number, 0xffff,
- DISCOVERY_DEFAULT_SLOTS);
- /* Check if the we got some results */
- if (discoveries == NULL)
- DRETURN(-ENETUNREACH, IRDA_SERV_INFO, "Cachelog empty...\n");
-
- /* Now, check all discovered devices (if any) */
- for(i = 0; i < number; i++)
- {
- /* Does the name match ? */
- if(discoveries[i].daddr == self->daddr)
- {
- /* Yes !!! Get it.. */
- strlcpy(self->rname, discoveries[i].info, sizeof(self->rname));
- self->rname[sizeof(self->rname) - 1] = '\0';
- DEBUG(IRDA_SERV_INFO, "Device 0x%08x is in fact ``%s''.\n",
- self->daddr, self->rname);
- kfree(discoveries);
- DEXIT(IRDA_SERV_TRACE, "\n");
- return 0;
- }
- }
- /* No luck ! */
- DEXIT(IRDA_SERV_INFO, ": cannot discover device 0x%08x !!!\n", self->daddr);
- kfree(discoveries);
- return -EADDRNOTAVAIL;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irda_find_socket (self)
- *
- * Find the correct IrNET socket
- *
- * Look into the list of IrNET sockets and finds one with the right
- * properties...
- */
-static inline irnet_socket *
-irnet_find_socket(irnet_socket * self)
-{
- irnet_socket * new = (irnet_socket *) NULL;
- int err;
-
- DENTER(IRDA_SERV_TRACE, "(self=0x%p)\n", self);
-
- /* Get the addresses of the requester */
- self->daddr = irttp_get_daddr(self->tsap);
- self->saddr = irttp_get_saddr(self->tsap);
-
- /* Try to get the IrDA nickname of the requester */
- err = irnet_daddr_to_dname(self);
-
- /* Protect access to the instance list */
- spin_lock_bh(&irnet_server.spinlock);
-
- /* So now, try to get an socket having specifically
- * requested that nickname */
- if(err == 0)
- {
- new = (irnet_socket *) hashbin_find(irnet_server.list,
- 0, self->rname);
- if(new)
- DEBUG(IRDA_SERV_INFO, "Socket 0x%p matches rname ``%s''.\n",
- new, new->rname);
- }
-
- /* If no name matches, try to find an socket by the destination address */
- /* It can be either the requested destination address (set via the
- * control channel), or the current destination address if the
- * socket is in the middle of a connection request */
- if(new == (irnet_socket *) NULL)
- {
- new = (irnet_socket *) hashbin_get_first(irnet_server.list);
- while(new !=(irnet_socket *) NULL)
- {
- /* Does it have the same address ? */
- if((new->rdaddr == self->daddr) || (new->daddr == self->daddr))
- {
- /* Yes !!! Get it.. */
- DEBUG(IRDA_SERV_INFO, "Socket 0x%p matches daddr %#08x.\n",
- new, self->daddr);
- break;
- }
- new = (irnet_socket *) hashbin_get_next(irnet_server.list);
- }
- }
-
- /* If we don't have any socket, get the first unconnected socket */
- if(new == (irnet_socket *) NULL)
- {
- new = (irnet_socket *) hashbin_get_first(irnet_server.list);
- while(new !=(irnet_socket *) NULL)
- {
- /* Is it available ? */
- if(!(test_bit(0, &new->ttp_open)) && (new->rdaddr == DEV_ADDR_ANY) &&
- (new->rname[0] == '\0') && (new->ppp_open))
- {
- /* Yes !!! Get it.. */
- DEBUG(IRDA_SERV_INFO, "Socket 0x%p is free.\n",
- new);
- break;
- }
- new = (irnet_socket *) hashbin_get_next(irnet_server.list);
- }
- }
-
- /* Spin lock end */
- spin_unlock_bh(&irnet_server.spinlock);
-
- DEXIT(IRDA_SERV_TRACE, " - new = 0x%p\n", new);
- return new;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irda_connect_socket (self)
- *
- * Connect an incoming connection to the socket
- *
- */
-static inline int
-irnet_connect_socket(irnet_socket * server,
- irnet_socket * new,
- struct qos_info * qos,
- __u32 max_sdu_size,
- __u8 max_header_size)
-{
- DENTER(IRDA_SERV_TRACE, "(server=0x%p, new=0x%p)\n",
- server, new);
-
- /* Now attach up the new socket */
- new->tsap = irttp_dup(server->tsap, new);
- DABORT(new->tsap == NULL, -1, IRDA_SERV_ERROR, "dup failed!\n");
-
- /* Set up all the relevant parameters on the new socket */
- new->stsap_sel = new->tsap->stsap_sel;
- new->dtsap_sel = new->tsap->dtsap_sel;
- new->saddr = irttp_get_saddr(new->tsap);
- new->daddr = irttp_get_daddr(new->tsap);
-
- new->max_header_size = max_header_size;
- new->max_sdu_size_tx = max_sdu_size;
- new->max_data_size = max_sdu_size;
-#ifdef STREAM_COMPAT
- /* If we want to receive "stream sockets" */
- if(max_sdu_size == 0)
- new->max_data_size = irttp_get_max_seg_size(new->tsap);
-#endif /* STREAM_COMPAT */
-
- /* Clean up the original one to keep it in listen state */
- irttp_listen(server->tsap);
-
- /* Send a connection response on the new socket */
- irttp_connect_response(new->tsap, new->max_sdu_size_rx, NULL);
-
- /* Allow PPP to send its junk over the new socket... */
- set_bit(0, &new->ttp_open);
-
- /* Not connecting anymore, and clean up last possible remains
- * of connection attempts on the socket */
- clear_bit(0, &new->ttp_connect);
- if(new->iriap)
- {
- iriap_close(new->iriap);
- new->iriap = NULL;
- }
- if(new->discoveries != NULL)
- {
- kfree(new->discoveries);
- new->discoveries = NULL;
- }
-
-#ifdef CONNECT_INDIC_KICK
- /* As currently we don't block packets in ppp_irnet_send() while passive,
- * this is not really needed...
- * Also, not doing it give IrDA a chance to finish the setup properly
- * before being swamped with packets... */
- ppp_output_wakeup(&new->chan);
-#endif /* CONNECT_INDIC_KICK */
-
- /* Notify the control channel */
- irnet_post_event(new, IRNET_CONNECT_FROM,
- new->saddr, new->daddr, server->rname, 0);
-
- DEXIT(IRDA_SERV_TRACE, "\n");
- return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irda_disconnect_server (self)
- *
- * Cleanup the server socket when the incoming connection abort
- *
- */
-static inline void
-irnet_disconnect_server(irnet_socket * self,
- struct sk_buff *skb)
-{
- DENTER(IRDA_SERV_TRACE, "(self=0x%p)\n", self);
-
- /* Put the received packet in the black hole */
- kfree_skb(skb);
-
-#ifdef FAIL_SEND_DISCONNECT
- /* Tell the other party we don't want to be connected */
- /* Hum... Is it the right thing to do ? And do we need to send
- * a connect response before ? It looks ok without this... */
- irttp_disconnect_request(self->tsap, NULL, P_NORMAL);
-#endif /* FAIL_SEND_DISCONNECT */
-
- /* Notify the control channel (see irnet_find_socket()) */
- irnet_post_event(NULL, IRNET_REQUEST_FROM,
- self->saddr, self->daddr, self->rname, 0);
-
- /* Clean up the server to keep it in listen state */
- irttp_listen(self->tsap);
-
- DEXIT(IRDA_SERV_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irda_setup_server (self)
- *
- * Create a IrTTP server and set it up...
- *
- * Register the IrLAN hint bit, create a IrTTP instance for us,
- * set all the IrTTP callbacks and create an IrIAS entry...
- */
-static inline int
-irnet_setup_server(void)
-{
- __u16 hints;
-
- DENTER(IRDA_SERV_TRACE, "()\n");
-
- /* Initialise the regular socket part of the server */
- irda_irnet_create(&irnet_server.s);
-
- /* Open a local TSAP (an IrTTP instance) for the server */
- irnet_open_tsap(&irnet_server.s);
-
- /* PPP part setup */
- irnet_server.s.ppp_open = 0;
- irnet_server.s.chan.private = NULL;
- irnet_server.s.file = NULL;
-
- /* Get the hint bit corresponding to IrLAN */
- /* Note : we overload the IrLAN hint bit. As it is only a "hint", and as
- * we provide roughly the same functionality as IrLAN, this is ok.
- * In fact, the situation is similar as JetSend overloading the Obex hint
- */
- hints = irlmp_service_to_hint(S_LAN);
-
-#ifdef ADVERTISE_HINT
- /* Register with IrLMP as a service (advertise our hint bit) */
- irnet_server.skey = irlmp_register_service(hints);
-#endif /* ADVERTISE_HINT */
-
- /* Register with LM-IAS (so that people can connect to us) */
- irnet_server.ias_obj = irias_new_object(IRNET_SERVICE_NAME, jiffies);
- irias_add_integer_attrib(irnet_server.ias_obj, IRNET_IAS_VALUE,
- irnet_server.s.stsap_sel, IAS_KERNEL_ATTR);
- irias_insert_object(irnet_server.ias_obj);
-
-#ifdef DISCOVERY_EVENTS
- /* Tell IrLMP we want to be notified of newly discovered nodes */
- irlmp_update_client(irnet_server.s.ckey, hints,
- irnet_discovery_indication, irnet_expiry_indication,
- (void *) &irnet_server.s);
-#endif
-
- DEXIT(IRDA_SERV_TRACE, " - self=0x%p\n", &irnet_server.s);
- return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irda_destroy_server (self)
- *
- * Destroy the IrTTP server...
- *
- * Reverse of the previous function...
- */
-static inline void
-irnet_destroy_server(void)
-{
- DENTER(IRDA_SERV_TRACE, "()\n");
-
-#ifdef ADVERTISE_HINT
- /* Unregister with IrLMP */
- irlmp_unregister_service(irnet_server.skey);
-#endif /* ADVERTISE_HINT */
-
- /* Unregister with LM-IAS */
- if(irnet_server.ias_obj)
- irias_delete_object(irnet_server.ias_obj);
-
- /* Cleanup the socket part */
- irda_irnet_destroy(&irnet_server.s);
-
- DEXIT(IRDA_SERV_TRACE, "\n");
-}
-
-
-/************************ IRDA-TTP CALLBACKS ************************/
-/*
- * When we create a IrTTP instance, we pass to it a set of callbacks
- * that IrTTP will call in case of various events.
- * We take care of those events here.
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_data_indication (instance, sap, skb)
- *
- * Received some data from TinyTP. Just queue it on the receive queue
- *
- */
-static int
-irnet_data_indication(void * instance,
- void * sap,
- struct sk_buff *skb)
-{
- irnet_socket * ap = (irnet_socket *) instance;
- unsigned char * p;
- int code = 0;
-
- DENTER(IRDA_TCB_TRACE, "(self/ap=0x%p, skb=0x%p)\n",
- ap, skb);
- DASSERT(skb != NULL, 0, IRDA_CB_ERROR, "skb is NULL !!!\n");
-
- /* Check is ppp is ready to receive our packet */
- if(!ap->ppp_open)
- {
- DERROR(IRDA_CB_ERROR, "PPP not ready, dropping packet...\n");
- /* When we return error, TTP will need to requeue the skb and
- * will stop the sender. IrTTP will stall until we send it a
- * flow control request... */
- return -ENOMEM;
- }
-
- /* strip address/control field if present */
- p = skb->data;
- if((p[0] == PPP_ALLSTATIONS) && (p[1] == PPP_UI))
- {
- /* chop off address/control */
- if(skb->len < 3)
- goto err_exit;
- p = skb_pull(skb, 2);
- }
-
- /* decompress protocol field if compressed */
- if(p[0] & 1)
- {
- /* protocol is compressed */
- *(u8 *)skb_push(skb, 1) = 0;
- }
- else
- if(skb->len < 2)
- goto err_exit;
-
- /* pass to generic ppp layer */
- /* Note : how do I know if ppp can accept or not the packet ? This is
- * essential if I want to manage flow control smoothly... */
- ppp_input(&ap->chan, skb);
-
- DEXIT(IRDA_TCB_TRACE, "\n");
- return 0;
-
- err_exit:
- DERROR(IRDA_CB_ERROR, "Packet too small, dropping...\n");
- kfree_skb(skb);
- ppp_input_error(&ap->chan, code);
- return 0; /* Don't return an error code, only for flow control... */
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_disconnect_indication (instance, sap, reason, skb)
- *
- * Connection has been closed. Chech reason to find out why
- *
- * Note : there are many cases where we come here :
- * o attempted to connect, timeout
- * o connected, link is broken, LAP has timeout
- * o connected, other side close the link
- * o connection request on the server not handled
- */
-static void
-irnet_disconnect_indication(void * instance,
- void * sap,
- LM_REASON reason,
- struct sk_buff *skb)
-{
- irnet_socket * self = (irnet_socket *) instance;
- int test_open;
- int test_connect;
-
- DENTER(IRDA_TCB_TRACE, "(self=0x%p)\n", self);
- DASSERT(self != NULL, , IRDA_CB_ERROR, "Self is NULL !!!\n");
-
- /* Don't care about it, but let's not leak it */
- if(skb)
- dev_kfree_skb(skb);
-
- /* Prevent higher layer from accessing IrTTP */
- test_open = test_and_clear_bit(0, &self->ttp_open);
- /* Not connecting anymore...
- * (note : TSAP is open, so IAP callbacks are no longer pending...) */
- test_connect = test_and_clear_bit(0, &self->ttp_connect);
-
- /* If both self->ttp_open and self->ttp_connect are NULL, it mean that we
- * have a race condition with irda_irnet_destroy() or
- * irnet_connect_indication(), so don't mess up tsap...
- */
- if(!(test_open || test_connect))
- {
- DERROR(IRDA_CB_ERROR, "Race condition detected...\n");
- return;
- }
-
- /* If we were active, notify the control channel */
- if(test_open)
- irnet_post_event(self, IRNET_DISCONNECT_FROM,
- self->saddr, self->daddr, self->rname, 0);
- else
- /* If we were trying to connect, notify the control channel */
- if((self->tsap) && (self != &irnet_server.s))
- irnet_post_event(self, IRNET_NOANSWER_FROM,
- self->saddr, self->daddr, self->rname, 0);
-
- /* Close our IrTTP connection, cleanup tsap */
- if((self->tsap) && (self != &irnet_server.s))
- {
- DEBUG(IRDA_CB_INFO, "Closing our TTP connection.\n");
- irttp_close_tsap(self->tsap);
- self->tsap = NULL;
- }
- /* Cleanup the socket in case we want to reconnect in ppp_output_wakeup() */
- self->stsap_sel = 0;
- self->daddr = DEV_ADDR_ANY;
- self->tx_flow = FLOW_START;
-
- /* Deal with the ppp instance if it's still alive */
- if(self->ppp_open)
- {
- if(test_open)
- {
- /* ppp_unregister_channel() wants a user context. */
- schedule_work(&self->disconnect_work);
- }
- else
- {
- /* If we were trying to connect, flush (drain) ppp_generic
- * Tx queue (most often we have blocked it), which will
- * trigger an other attempt to connect. If we are passive,
- * this will empty the Tx queue after last try. */
- ppp_output_wakeup(&self->chan);
- }
- }
-
- DEXIT(IRDA_TCB_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_connect_confirm (instance, sap, qos, max_sdu_size, skb)
- *
- * Connections has been confirmed by the remote device
- *
- */
-static void
-irnet_connect_confirm(void * instance,
- void * sap,
- struct qos_info *qos,
- __u32 max_sdu_size,
- __u8 max_header_size,
- struct sk_buff *skb)
-{
- irnet_socket * self = (irnet_socket *) instance;
-
- DENTER(IRDA_TCB_TRACE, "(self=0x%p)\n", self);
-
- /* Check if socket is closing down (via irda_irnet_destroy()) */
- if(! test_bit(0, &self->ttp_connect))
- {
- DERROR(IRDA_CB_ERROR, "Socket no longer connecting. Ouch !\n");
- return;
- }
-
- /* How much header space do we need to reserve */
- self->max_header_size = max_header_size;
-
- /* IrTTP max SDU size in transmit direction */
- self->max_sdu_size_tx = max_sdu_size;
- self->max_data_size = max_sdu_size;
-#ifdef STREAM_COMPAT
- if(max_sdu_size == 0)
- self->max_data_size = irttp_get_max_seg_size(self->tsap);
-#endif /* STREAM_COMPAT */
-
- /* At this point, IrLMP has assigned our source address */
- self->saddr = irttp_get_saddr(self->tsap);
-
- /* Allow higher layer to access IrTTP */
- set_bit(0, &self->ttp_open);
- clear_bit(0, &self->ttp_connect); /* Not racy, IrDA traffic is serial */
- /* Give a kick in the ass of ppp_generic so that he sends us some data */
- ppp_output_wakeup(&self->chan);
-
- /* Check size of received packet */
- if(skb->len > 0)
- {
-#ifdef PASS_CONNECT_PACKETS
- DEBUG(IRDA_CB_INFO, "Passing connect packet to PPP.\n");
- /* Try to pass it to PPP */
- irnet_data_indication(instance, sap, skb);
-#else /* PASS_CONNECT_PACKETS */
- DERROR(IRDA_CB_ERROR, "Dropping non empty packet.\n");
- kfree_skb(skb); /* Note : will be optimised with other kfree... */
-#endif /* PASS_CONNECT_PACKETS */
- }
- else
- kfree_skb(skb);
-
- /* Notify the control channel */
- irnet_post_event(self, IRNET_CONNECT_TO,
- self->saddr, self->daddr, self->rname, 0);
-
- DEXIT(IRDA_TCB_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_flow_indication (instance, sap, flow)
- *
- * Used by TinyTP to tell us if it can accept more data or not
- *
- */
-static void
-irnet_flow_indication(void * instance,
- void * sap,
- LOCAL_FLOW flow)
-{
- irnet_socket * self = (irnet_socket *) instance;
- LOCAL_FLOW oldflow = self->tx_flow;
-
- DENTER(IRDA_TCB_TRACE, "(self=0x%p, flow=%d)\n", self, flow);
-
- /* Update our state */
- self->tx_flow = flow;
-
- /* Check what IrTTP want us to do... */
- switch(flow)
- {
- case FLOW_START:
- DEBUG(IRDA_CB_INFO, "IrTTP wants us to start again\n");
- /* Check if we really need to wake up PPP */
- if(oldflow == FLOW_STOP)
- ppp_output_wakeup(&self->chan);
- else
- DEBUG(IRDA_CB_INFO, "But we were already transmitting !!!\n");
- break;
- case FLOW_STOP:
- DEBUG(IRDA_CB_INFO, "IrTTP wants us to slow down\n");
- break;
- default:
- DEBUG(IRDA_CB_INFO, "Unknown flow command!\n");
- break;
- }
-
- DEXIT(IRDA_TCB_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_status_indication (instance, sap, reason, skb)
- *
- * Link (IrLAP) status report.
- *
- */
-static void
-irnet_status_indication(void * instance,
- LINK_STATUS link,
- LOCK_STATUS lock)
-{
- irnet_socket * self = (irnet_socket *) instance;
-
- DENTER(IRDA_TCB_TRACE, "(self=0x%p)\n", self);
- DASSERT(self != NULL, , IRDA_CB_ERROR, "Self is NULL !!!\n");
-
- /* We can only get this event if we are connected */
- switch(link)
- {
- case STATUS_NO_ACTIVITY:
- irnet_post_event(self, IRNET_BLOCKED_LINK,
- self->saddr, self->daddr, self->rname, 0);
- break;
- default:
- DEBUG(IRDA_CB_INFO, "Unknown status...\n");
- }
-
- DEXIT(IRDA_TCB_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_connect_indication(instance, sap, qos, max_sdu_size, userdata)
- *
- * Incoming connection
- *
- * In theory, this function is called only on the server socket.
- * Some other node is attempting to connect to the IrNET service, and has
- * sent a connection request on our server socket.
- * We just redirect the connection to the relevant IrNET socket.
- *
- * Note : we also make sure that between 2 irnet nodes, there can
- * exist only one irnet connection.
- */
-static void
-irnet_connect_indication(void * instance,
- void * sap,
- struct qos_info *qos,
- __u32 max_sdu_size,
- __u8 max_header_size,
- struct sk_buff *skb)
-{
- irnet_socket * server = &irnet_server.s;
- irnet_socket * new = (irnet_socket *) NULL;
-
- DENTER(IRDA_TCB_TRACE, "(server=0x%p)\n", server);
- DASSERT(instance == &irnet_server, , IRDA_CB_ERROR,
- "Invalid instance (0x%p) !!!\n", instance);
- DASSERT(sap == irnet_server.s.tsap, , IRDA_CB_ERROR, "Invalid sap !!!\n");
-
- /* Try to find the most appropriate IrNET socket */
- new = irnet_find_socket(server);
-
- /* After all this hard work, do we have an socket ? */
- if(new == (irnet_socket *) NULL)
- {
- DEXIT(IRDA_CB_INFO, ": No socket waiting for this connection.\n");
- irnet_disconnect_server(server, skb);
- return;
- }
-
- /* Is the socket already busy ? */
- if(test_bit(0, &new->ttp_open))
- {
- DEXIT(IRDA_CB_INFO, ": Socket already connected.\n");
- irnet_disconnect_server(server, skb);
- return;
- }
-
- /* The following code is a bit tricky, so need comments ;-)
- */
- /* If ttp_connect is set, the socket is trying to connect to the other
- * end and may have sent a IrTTP connection request and is waiting for
- * a connection response (that may never come).
- * Now, the pain is that the socket may have opened a tsap and is
- * waiting on it, while the other end is trying to connect to it on
- * another tsap.
- * Because IrNET can be peer to peer, we need to workaround this.
- * Furthermore, the way the irnetd script is implemented, the
- * target will create a second IrNET connection back to the
- * originator and expect the originator to bind this new connection
- * to the original PPPD instance.
- * And of course, if we don't use irnetd, we can have a race when
- * both side try to connect simultaneously, which could leave both
- * connections half closed (yuck).
- * Conclusions :
- * 1) The "originator" must accept the new connection and get rid
- * of the old one so that irnetd works
- * 2) One side must deny the new connection to avoid races,
- * but both side must agree on which side it is...
- * Most often, the originator is primary at the LAP layer.
- * Jean II
- */
- /* Now, let's look at the way I wrote the test...
- * We need to clear up the ttp_connect flag atomically to prevent
- * irnet_disconnect_indication() to mess up the tsap we are going to close.
- * We want to clear the ttp_connect flag only if we close the tsap,
- * otherwise we will never close it, so we need to check for primary
- * *before* doing the test on the flag.
- * And of course, ALLOW_SIMULT_CONNECT can disable this entirely...
- * Jean II
- */
-
- /* Socket already connecting ? On primary ? */
- if(0
-#ifdef ALLOW_SIMULT_CONNECT
- || ((irttp_is_primary(server->tsap) == 1) && /* primary */
- (test_and_clear_bit(0, &new->ttp_connect)))
-#endif /* ALLOW_SIMULT_CONNECT */
- )
- {
- DERROR(IRDA_CB_ERROR, "Socket already connecting, but going to reuse it !\n");
-
- /* Cleanup the old TSAP if necessary - IrIAP will be cleaned up later */
- if(new->tsap != NULL)
- {
- /* Close the old connection the new socket was attempting,
- * so that we can hook it up to the new connection.
- * It's now safe to do it... */
- irttp_close_tsap(new->tsap);
- new->tsap = NULL;
- }
- }
- else
- {
- /* Three options :
- * 1) socket was not connecting or connected : ttp_connect should be 0.
- * 2) we don't want to connect the socket because we are secondary or
- * ALLOW_SIMULT_CONNECT is undefined. ttp_connect should be 1.
- * 3) we are half way in irnet_disconnect_indication(), and it's a
- * nice race condition... Fortunately, we can detect that by checking
- * if tsap is still alive. On the other hand, we can't be in
- * irda_irnet_destroy() otherwise we would not have found this
- * socket in the hashbin.
- * Jean II */
- if((test_bit(0, &new->ttp_connect)) || (new->tsap != NULL))
- {
- /* Don't mess this socket, somebody else in in charge... */
- DERROR(IRDA_CB_ERROR, "Race condition detected, socket in use, abort connect...\n");
- irnet_disconnect_server(server, skb);
- return;
- }
- }
-
- /* So : at this point, we have a socket, and it is idle. Good ! */
- irnet_connect_socket(server, new, qos, max_sdu_size, max_header_size);
-
- /* Check size of received packet */
- if(skb->len > 0)
- {
-#ifdef PASS_CONNECT_PACKETS
- DEBUG(IRDA_CB_INFO, "Passing connect packet to PPP.\n");
- /* Try to pass it to PPP */
- irnet_data_indication(new, new->tsap, skb);
-#else /* PASS_CONNECT_PACKETS */
- DERROR(IRDA_CB_ERROR, "Dropping non empty packet.\n");
- kfree_skb(skb); /* Note : will be optimised with other kfree... */
-#endif /* PASS_CONNECT_PACKETS */
- }
- else
- kfree_skb(skb);
-
- DEXIT(IRDA_TCB_TRACE, "\n");
-}
-
-
-/********************** IRDA-IAS/LMP CALLBACKS **********************/
-/*
- * These are the callbacks called by other layers of the IrDA stack,
- * mainly LMP for discovery and IAS for name queries.
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_getvalue_confirm (result, obj_id, value, priv)
- *
- * Got answer from remote LM-IAS, just connect
- *
- * This is the reply to a IAS query we were doing to find the TSAP of
- * the device we want to connect to.
- * If we have found a valid TSAP, just initiate the TTP connection
- * on this TSAP.
- */
-static void
-irnet_getvalue_confirm(int result,
- __u16 obj_id,
- struct ias_value *value,
- void * priv)
-{
- irnet_socket * self = (irnet_socket *) priv;
-
- DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self);
- DASSERT(self != NULL, , IRDA_OCB_ERROR, "Self is NULL !!!\n");
-
- /* Check if already connected (via irnet_connect_socket())
- * or socket is closing down (via irda_irnet_destroy()) */
- if(! test_bit(0, &self->ttp_connect))
- {
- DERROR(IRDA_OCB_ERROR, "Socket no longer connecting. Ouch !\n");
- return;
- }
-
- /* We probably don't need to make any more queries */
- iriap_close(self->iriap);
- self->iriap = NULL;
-
- /* Post process the IAS reply */
- self->dtsap_sel = irnet_ias_to_tsap(self, result, value);
-
- /* If error, just go out */
- if(self->errno)
- {
- clear_bit(0, &self->ttp_connect);
- DERROR(IRDA_OCB_ERROR, "IAS connect failed ! (0x%X)\n", self->errno);
- return;
- }
-
- DEBUG(IRDA_OCB_INFO, "daddr = %08x, lsap = %d, starting IrTTP connection\n",
- self->daddr, self->dtsap_sel);
-
- /* Start up TTP - non blocking */
- irnet_connect_tsap(self);
-
- DEXIT(IRDA_OCB_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_discovervalue_confirm (result, obj_id, value, priv)
- *
- * Handle the TSAP discovery procedure state machine.
- * Got answer from remote LM-IAS, try next device
- *
- * We are doing a TSAP discovery procedure, and we got an answer to
- * a IAS query we were doing to find the TSAP on one of the address
- * in the discovery log.
- *
- * If we have found a valid TSAP for the first time, save it. If it's
- * not the first time we found one, complain.
- *
- * If we have more addresses in the log, just initiate a new query.
- * Note that those query may fail (see irnet_discover_daddr_and_lsap_sel())
- *
- * Otherwise, wrap up the procedure (cleanup), check if we have found
- * any device and connect to it.
- */
-static void
-irnet_discovervalue_confirm(int result,
- __u16 obj_id,
- struct ias_value *value,
- void * priv)
-{
- irnet_socket * self = (irnet_socket *) priv;
- __u8 dtsap_sel; /* TSAP we are looking for */
-
- DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self);
- DASSERT(self != NULL, , IRDA_OCB_ERROR, "Self is NULL !!!\n");
-
- /* Check if already connected (via irnet_connect_socket())
- * or socket is closing down (via irda_irnet_destroy()) */
- if(! test_bit(0, &self->ttp_connect))
- {
- DERROR(IRDA_OCB_ERROR, "Socket no longer connecting. Ouch !\n");
- return;
- }
-
- /* Post process the IAS reply */
- dtsap_sel = irnet_ias_to_tsap(self, result, value);
-
- /* Have we got something ? */
- if(self->errno == 0)
- {
- /* We found the requested service */
- if(self->daddr != DEV_ADDR_ANY)
- {
- DERROR(IRDA_OCB_ERROR, "More than one device in range supports IrNET...\n");
- }
- else
- {
- /* First time we found that one, save it ! */
- self->daddr = self->discoveries[self->disco_index].daddr;
- self->dtsap_sel = dtsap_sel;
- }
- }
-
- /* If no failure */
- if((self->errno == -EADDRNOTAVAIL) || (self->errno == 0))
- {
- int ret;
-
- /* Search the next node */
- ret = irnet_discover_next_daddr(self);
- if(!ret)
- {
- /* In this case, the above request was non-blocking.
- * We will return here after a while... */
- return;
- }
- /* In this case, we have processed the last discovery item */
- }
-
- /* No more queries to be done (failure or last one) */
-
- /* We probably don't need to make any more queries */
- iriap_close(self->iriap);
- self->iriap = NULL;
-
- /* No more items : remove the log and signal termination */
- DEBUG(IRDA_OCB_INFO, "Cleaning up log (0x%p)\n",
- self->discoveries);
- if(self->discoveries != NULL)
- {
- /* Cleanup our copy of the discovery log */
- kfree(self->discoveries);
- self->discoveries = NULL;
- }
- self->disco_number = -1;
-
- /* Check out what we found */
- if(self->daddr == DEV_ADDR_ANY)
- {
- self->daddr = DEV_ADDR_ANY;
- clear_bit(0, &self->ttp_connect);
- DEXIT(IRDA_OCB_TRACE, ": cannot discover IrNET in any device !!!\n");
- return;
- }
-
- /* We have a valid address - just connect */
-
- DEBUG(IRDA_OCB_INFO, "daddr = %08x, lsap = %d, starting IrTTP connection\n",
- self->daddr, self->dtsap_sel);
-
- /* Start up TTP - non blocking */
- irnet_connect_tsap(self);
-
- DEXIT(IRDA_OCB_TRACE, "\n");
-}
-
-#ifdef DISCOVERY_EVENTS
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_discovery_indication (discovery)
- *
- * Got a discovery indication from IrLMP, post an event
- *
- * Note : IrLMP take care of matching the hint mask for us, and also
- * check if it is a "new" node for us...
- *
- * As IrLMP filter on the IrLAN hint bit, we get both IrLAN and IrNET
- * nodes, so it's only at connection time that we will know if the
- * node support IrNET, IrLAN or both. The other solution is to check
- * in IAS the PNP ids and service name.
- * Note : even if a node support IrNET (or IrLAN), it's no guarantee
- * that we will be able to connect to it, the node might already be
- * busy...
- *
- * One last thing : in some case, this function will trigger duplicate
- * discovery events. On the other hand, we should catch all
- * discoveries properly (i.e. not miss one). Filtering duplicate here
- * is to messy, so we leave that to user space...
- */
-static void
-irnet_discovery_indication(discinfo_t * discovery,
- DISCOVERY_MODE mode,
- void * priv)
-{
- irnet_socket * self = &irnet_server.s;
-
- DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self);
- DASSERT(priv == &irnet_server, , IRDA_OCB_ERROR,
- "Invalid instance (0x%p) !!!\n", priv);
-
- DEBUG(IRDA_OCB_INFO, "Discovered new IrNET/IrLAN node %s...\n",
- discovery->info);
-
- /* Notify the control channel */
- irnet_post_event(NULL, IRNET_DISCOVER,
- discovery->saddr, discovery->daddr, discovery->info,
- get_unaligned((__u16 *)discovery->hints));
-
- DEXIT(IRDA_OCB_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_expiry_indication (expiry)
- *
- * Got a expiry indication from IrLMP, post an event
- *
- * Note : IrLMP take care of matching the hint mask for us, we only
- * check if it is a "new" node...
- */
-static void
-irnet_expiry_indication(discinfo_t * expiry,
- DISCOVERY_MODE mode,
- void * priv)
-{
- irnet_socket * self = &irnet_server.s;
-
- DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self);
- DASSERT(priv == &irnet_server, , IRDA_OCB_ERROR,
- "Invalid instance (0x%p) !!!\n", priv);
-
- DEBUG(IRDA_OCB_INFO, "IrNET/IrLAN node %s expired...\n",
- expiry->info);
-
- /* Notify the control channel */
- irnet_post_event(NULL, IRNET_EXPIRE,
- expiry->saddr, expiry->daddr, expiry->info,
- get_unaligned((__u16 *)expiry->hints));
-
- DEXIT(IRDA_OCB_TRACE, "\n");
-}
-#endif /* DISCOVERY_EVENTS */
-
-
-/*********************** PROC ENTRY CALLBACKS ***********************/
-/*
- * We create a instance in the /proc filesystem, and here we take care
- * of that...
- */
-
-#ifdef CONFIG_PROC_FS
-static int
-irnet_proc_show(struct seq_file *m, void *v)
-{
- irnet_socket * self;
- char * state;
- int i = 0;
-
- /* Get the IrNET server information... */
- seq_printf(m, "IrNET server - ");
- seq_printf(m, "IrDA state: %s, ",
- (irnet_server.running ? "running" : "dead"));
- seq_printf(m, "stsap_sel: %02x, ", irnet_server.s.stsap_sel);
- seq_printf(m, "dtsap_sel: %02x\n", irnet_server.s.dtsap_sel);
-
- /* Do we need to continue ? */
- if(!irnet_server.running)
- return 0;
-
- /* Protect access to the instance list */
- spin_lock_bh(&irnet_server.spinlock);
-
- /* Get the sockets one by one... */
- self = (irnet_socket *) hashbin_get_first(irnet_server.list);
- while(self != NULL)
- {
- /* Start printing info about the socket. */
- seq_printf(m, "\nIrNET socket %d - ", i++);
-
- /* First, get the requested configuration */
- seq_printf(m, "Requested IrDA name: \"%s\", ", self->rname);
- seq_printf(m, "daddr: %08x, ", self->rdaddr);
- seq_printf(m, "saddr: %08x\n", self->rsaddr);
-
- /* Second, get all the PPP info */
- seq_printf(m, " PPP state: %s",
- (self->ppp_open ? "registered" : "unregistered"));
- if(self->ppp_open)
- {
- seq_printf(m, ", unit: ppp%d",
- ppp_unit_number(&self->chan));
- seq_printf(m, ", channel: %d",
- ppp_channel_index(&self->chan));
- seq_printf(m, ", mru: %d",
- self->mru);
- /* Maybe add self->flags ? Later... */
- }
-
- /* Then, get all the IrDA specific info... */
- if(self->ttp_open)
- state = "connected";
- else
- if(self->tsap != NULL)
- state = "connecting";
- else
- if(self->iriap != NULL)
- state = "searching";
- else
- if(self->ttp_connect)
- state = "weird";
- else
- state = "idle";
- seq_printf(m, "\n IrDA state: %s, ", state);
- seq_printf(m, "daddr: %08x, ", self->daddr);
- seq_printf(m, "stsap_sel: %02x, ", self->stsap_sel);
- seq_printf(m, "dtsap_sel: %02x\n", self->dtsap_sel);
-
- /* Next socket, please... */
- self = (irnet_socket *) hashbin_get_next(irnet_server.list);
- }
-
- /* Spin lock end */
- spin_unlock_bh(&irnet_server.spinlock);
-
- return 0;
-}
-
-static int irnet_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, irnet_proc_show, NULL);
-}
-
-static const struct file_operations irnet_proc_fops = {
- .owner = THIS_MODULE,
- .open = irnet_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif /* PROC_FS */
-
-
-/********************** CONFIGURATION/CLEANUP **********************/
-/*
- * Initialisation and teardown of the IrDA part, called at module
- * insertion and removal...
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Prepare the IrNET layer for operation...
- */
-int __init
-irda_irnet_init(void)
-{
- int err = 0;
-
- DENTER(MODULE_TRACE, "()\n");
-
- /* Pure paranoia - should be redundant */
- memset(&irnet_server, 0, sizeof(struct irnet_root));
-
- /* Setup start of irnet instance list */
- irnet_server.list = hashbin_new(HB_NOLOCK);
- DABORT(irnet_server.list == NULL, -ENOMEM,
- MODULE_ERROR, "Can't allocate hashbin!\n");
- /* Init spinlock for instance list */
- spin_lock_init(&irnet_server.spinlock);
-
- /* Initialise control channel */
- init_waitqueue_head(&irnet_events.rwait);
- irnet_events.index = 0;
- /* Init spinlock for event logging */
- spin_lock_init(&irnet_events.spinlock);
-
-#ifdef CONFIG_PROC_FS
- /* Add a /proc file for irnet infos */
- proc_create("irnet", 0, proc_irda, &irnet_proc_fops);
-#endif /* CONFIG_PROC_FS */
-
- /* Setup the IrNET server */
- err = irnet_setup_server();
-
- if(!err)
- /* We are no longer functional... */
- irnet_server.running = 1;
-
- DEXIT(MODULE_TRACE, "\n");
- return err;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Cleanup at exit...
- */
-void __exit
-irda_irnet_cleanup(void)
-{
- DENTER(MODULE_TRACE, "()\n");
-
- /* We are no longer there... */
- irnet_server.running = 0;
-
-#ifdef CONFIG_PROC_FS
- /* Remove our /proc file */
- remove_proc_entry("irnet", proc_irda);
-#endif /* CONFIG_PROC_FS */
-
- /* Remove our IrNET server from existence */
- irnet_destroy_server();
-
- /* Remove all instances of IrNET socket still present */
- hashbin_delete(irnet_server.list, (FREE_FUNC) irda_irnet_destroy);
-
- DEXIT(MODULE_TRACE, "\n");
-}
diff --git a/net/irda/irnet/irnet_irda.h b/net/irda/irnet/irnet_irda.h
deleted file mode 100644
index 3e408952a3f1..000000000000
--- a/net/irda/irnet/irnet_irda.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * IrNET protocol module : Synchronous PPP over an IrDA socket.
- *
- * Jean II - HPL `00 - <jt@hpl.hp.com>
- *
- * This file contains all definitions and declarations necessary for the
- * IRDA part of the IrNET module (dealing with IrTTP, IrIAS and co).
- * This file is a private header, so other modules don't want to know
- * what's in there...
- */
-
-#ifndef IRNET_IRDA_H
-#define IRNET_IRDA_H
-
-/***************************** INCLUDES *****************************/
-/* Please add other headers in irnet.h */
-
-#include "irnet.h" /* Module global include */
-
-/************************ CONSTANTS & MACROS ************************/
-
-/*
- * Name of the service (socket name) used by IrNET
- */
-/* IAS object name (or part of it) */
-#define IRNET_SERVICE_NAME "IrNetv1"
-/* IAS attribute */
-#define IRNET_IAS_VALUE "IrDA:TinyTP:LsapSel"
-/* LMP notify name for client (only for /proc/net/irda/irlmp) */
-#define IRNET_NOTIFY_NAME "IrNET socket"
-/* LMP notify name for server (only for /proc/net/irda/irlmp) */
-#define IRNET_NOTIFY_NAME_SERV "IrNET server"
-
-/****************************** TYPES ******************************/
-
-/*
- * This is the main structure where we store all the data pertaining to
- * the IrNET server (listen for connection requests) and the root
- * of the IrNET socket list
- */
-typedef struct irnet_root
-{
- irnet_socket s; /* To pretend we are a client... */
-
- /* Generic stuff */
- int magic; /* Paranoia */
- int running; /* Are we operational ? */
-
- /* Link list of all IrNET instances opened */
- hashbin_t * list;
- spinlock_t spinlock; /* Serialize access to the list */
- /* Note : the way hashbin has been designed is absolutely not
- * reentrant, beware... So, we blindly protect all with spinlock */
-
- /* Handle for the hint bit advertised in IrLMP */
- void * skey;
-
- /* Server socket part */
- struct ias_object * ias_obj; /* Our service name + lsap in IAS */
-
-} irnet_root;
-
-
-/**************************** PROTOTYPES ****************************/
-
-/* ----------------------- CONTROL CHANNEL ----------------------- */
-static void
- irnet_post_event(irnet_socket *,
- irnet_event,
- __u32,
- __u32,
- char *,
- __u16);
-/* ----------------------- IRDA SUBROUTINES ----------------------- */
-static inline int
- irnet_open_tsap(irnet_socket *);
-static inline __u8
- irnet_ias_to_tsap(irnet_socket *,
- int,
- struct ias_value *);
-static inline int
- irnet_find_lsap_sel(irnet_socket *);
-static inline int
- irnet_connect_tsap(irnet_socket *);
-static inline int
- irnet_discover_next_daddr(irnet_socket *);
-static inline int
- irnet_discover_daddr_and_lsap_sel(irnet_socket *);
-static inline int
- irnet_dname_to_daddr(irnet_socket *);
-/* ------------------------ SERVER SOCKET ------------------------ */
-static inline int
- irnet_daddr_to_dname(irnet_socket *);
-static inline irnet_socket *
- irnet_find_socket(irnet_socket *);
-static inline int
- irnet_connect_socket(irnet_socket *,
- irnet_socket *,
- struct qos_info *,
- __u32,
- __u8);
-static inline void
- irnet_disconnect_server(irnet_socket *,
- struct sk_buff *);
-static inline int
- irnet_setup_server(void);
-static inline void
- irnet_destroy_server(void);
-/* ---------------------- IRDA-TTP CALLBACKS ---------------------- */
-static int
- irnet_data_indication(void *, /* instance */
- void *, /* sap */
- struct sk_buff *);
-static void
- irnet_disconnect_indication(void *,
- void *,
- LM_REASON,
- struct sk_buff *);
-static void
- irnet_connect_confirm(void *,
- void *,
- struct qos_info *,
- __u32,
- __u8,
- struct sk_buff *);
-static void
- irnet_flow_indication(void *,
- void *,
- LOCAL_FLOW);
-static void
- irnet_status_indication(void *,
- LINK_STATUS,
- LOCK_STATUS);
-static void
- irnet_connect_indication(void *,
- void *,
- struct qos_info *,
- __u32,
- __u8,
- struct sk_buff *);
-/* -------------------- IRDA-IAS/LMP CALLBACKS -------------------- */
-static void
- irnet_getvalue_confirm(int,
- __u16,
- struct ias_value *,
- void *);
-static void
- irnet_discovervalue_confirm(int,
- __u16,
- struct ias_value *,
- void *);
-#ifdef DISCOVERY_EVENTS
-static void
- irnet_discovery_indication(discinfo_t *,
- DISCOVERY_MODE,
- void *);
-static void
- irnet_expiry_indication(discinfo_t *,
- DISCOVERY_MODE,
- void *);
-#endif
-
-/**************************** VARIABLES ****************************/
-
-/*
- * The IrNET server. Listen to connection requests and co...
- */
-static struct irnet_root irnet_server;
-
-/* Control channel stuff (note : extern) */
-struct irnet_ctrl_channel irnet_events;
-
-/* The /proc/net/irda directory, defined elsewhere... */
-#ifdef CONFIG_PROC_FS
-extern struct proc_dir_entry *proc_irda;
-#endif /* CONFIG_PROC_FS */
-
-#endif /* IRNET_IRDA_H */
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
deleted file mode 100644
index 7025dcb853d0..000000000000
--- a/net/irda/irnet/irnet_ppp.c
+++ /dev/null
@@ -1,1189 +0,0 @@
-/*
- * IrNET protocol module : Synchronous PPP over an IrDA socket.
- *
- * Jean II - HPL `00 - <jt@hpl.hp.com>
- *
- * This file implement the PPP interface and /dev/irnet character device.
- * The PPP interface hook to the ppp_generic module, handle all our
- * relationship to the PPP code in the kernel (and by extension to pppd),
- * and exchange PPP frames with this module (send/receive).
- * The /dev/irnet device is used primarily for 2 functions :
- * 1) as a stub for pppd (the ppp daemon), so that we can appropriately
- * generate PPP sessions (we pretend we are a tty).
- * 2) as a control channel (write commands, read events)
- */
-
-#include <linux/sched/signal.h>
-#include <linux/slab.h>
-
-#include "irnet_ppp.h" /* Private header */
-/* Please put other headers in irnet.h - Thanks */
-
-/* Generic PPP callbacks (to call us) */
-static const struct ppp_channel_ops irnet_ppp_ops = {
- .start_xmit = ppp_irnet_send,
- .ioctl = ppp_irnet_ioctl
-};
-
-/************************* CONTROL CHANNEL *************************/
-/*
- * When a pppd instance is not active on /dev/irnet, it acts as a control
- * channel.
- * Writing allow to set up the IrDA destination of the IrNET channel,
- * and any application may be read events happening in IrNET...
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Write is used to send a command to configure a IrNET channel
- * before it is open by pppd. The syntax is : "command argument"
- * Currently there is only two defined commands :
- * o name : set the requested IrDA nickname of the IrNET peer.
- * o addr : set the requested IrDA address of the IrNET peer.
- * Note : the code is crude, but effective...
- */
-static inline ssize_t
-irnet_ctrl_write(irnet_socket * ap,
- const char __user *buf,
- size_t count)
-{
- char command[IRNET_MAX_COMMAND];
- char * start; /* Current command being processed */
- char * next; /* Next command to process */
- int length; /* Length of current command */
-
- DENTER(CTRL_TRACE, "(ap=0x%p, count=%zd)\n", ap, count);
-
- /* Check for overflow... */
- DABORT(count >= IRNET_MAX_COMMAND, -ENOMEM,
- CTRL_ERROR, "Too much data !!!\n");
-
- /* Get the data in the driver */
- if(copy_from_user(command, buf, count))
- {
- DERROR(CTRL_ERROR, "Invalid user space pointer.\n");
- return -EFAULT;
- }
-
- /* Safe terminate the string */
- command[count] = '\0';
- DEBUG(CTRL_INFO, "Command line received is ``%s'' (%zd).\n",
- command, count);
-
- /* Check every commands in the command line */
- next = command;
- while(next != NULL)
- {
- /* Look at the next command */
- start = next;
-
- /* Scrap whitespaces before the command */
- start = skip_spaces(start);
-
- /* ',' is our command separator */
- next = strchr(start, ',');
- if(next)
- {
- *next = '\0'; /* Terminate command */
- length = next - start; /* Length */
- next++; /* Skip the '\0' */
- }
- else
- length = strlen(start);
-
- DEBUG(CTRL_INFO, "Found command ``%s'' (%d).\n", start, length);
-
- /* Check if we recognised one of the known command
- * We can't use "switch" with strings, so hack with "continue" */
-
- /* First command : name -> Requested IrDA nickname */
- if(!strncmp(start, "name", 4))
- {
- /* Copy the name only if is included and not "any" */
- if((length > 5) && (strcmp(start + 5, "any")))
- {
- /* Strip out trailing whitespaces */
- while(isspace(start[length - 1]))
- length--;
-
- DABORT(length < 5 || length > NICKNAME_MAX_LEN + 5,
- -EINVAL, CTRL_ERROR, "Invalid nickname.\n");
-
- /* Copy the name for later reuse */
- memcpy(ap->rname, start + 5, length - 5);
- ap->rname[length - 5] = '\0';
- }
- else
- ap->rname[0] = '\0';
- DEBUG(CTRL_INFO, "Got rname = ``%s''\n", ap->rname);
-
- /* Restart the loop */
- continue;
- }
-
- /* Second command : addr, daddr -> Requested IrDA destination address
- * Also process : saddr -> Requested IrDA source address */
- if((!strncmp(start, "addr", 4)) ||
- (!strncmp(start, "daddr", 5)) ||
- (!strncmp(start, "saddr", 5)))
- {
- __u32 addr = DEV_ADDR_ANY;
-
- /* Copy the address only if is included and not "any" */
- if((length > 5) && (strcmp(start + 5, "any")))
- {
- char * begp = start + 5;
- char * endp;
-
- /* Scrap whitespaces before the command */
- begp = skip_spaces(begp);
-
- /* Convert argument to a number (last arg is the base) */
- addr = simple_strtoul(begp, &endp, 16);
- /* Has it worked ? (endp should be start + length) */
- DABORT(endp <= (start + 5), -EINVAL,
- CTRL_ERROR, "Invalid address.\n");
- }
- /* Which type of address ? */
- if(start[0] == 's')
- {
- /* Save it */
- ap->rsaddr = addr;
- DEBUG(CTRL_INFO, "Got rsaddr = %08x\n", ap->rsaddr);
- }
- else
- {
- /* Save it */
- ap->rdaddr = addr;
- DEBUG(CTRL_INFO, "Got rdaddr = %08x\n", ap->rdaddr);
- }
-
- /* Restart the loop */
- continue;
- }
-
- /* Other possible command : connect N (number of retries) */
-
- /* No command matched -> Failed... */
- DABORT(1, -EINVAL, CTRL_ERROR, "Not a recognised IrNET command.\n");
- }
-
- /* Success : we have parsed all commands successfully */
- return count;
-}
-
-#ifdef INITIAL_DISCOVERY
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_get_discovery_log (self)
- *
- * Query the content on the discovery log if not done
- *
- * This function query the current content of the discovery log
- * at the startup of the event channel and save it in the internal struct.
- */
-static void
-irnet_get_discovery_log(irnet_socket * ap)
-{
- __u16 mask = irlmp_service_to_hint(S_LAN);
-
- /* Ask IrLMP for the current discovery log */
- ap->discoveries = irlmp_get_discoveries(&ap->disco_number, mask,
- DISCOVERY_DEFAULT_SLOTS);
-
- /* Check if the we got some results */
- if(ap->discoveries == NULL)
- ap->disco_number = -1;
-
- DEBUG(CTRL_INFO, "Got the log (0x%p), size is %d\n",
- ap->discoveries, ap->disco_number);
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_read_discovery_log (self, event)
- *
- * Read the content on the discovery log
- *
- * This function dump the current content of the discovery log
- * at the startup of the event channel.
- * Return 1 if wrote an event on the control channel...
- *
- * State of the ap->disco_XXX variables :
- * Socket creation : discoveries = NULL ; disco_index = 0 ; disco_number = 0
- * While reading : discoveries = ptr ; disco_index = X ; disco_number = Y
- * After reading : discoveries = NULL ; disco_index = Y ; disco_number = -1
- */
-static inline int
-irnet_read_discovery_log(irnet_socket *ap, char *event, int buf_size)
-{
- int done_event = 0;
-
- DENTER(CTRL_TRACE, "(ap=0x%p, event=0x%p)\n",
- ap, event);
-
- /* Test if we have some work to do or we have already finished */
- if(ap->disco_number == -1)
- {
- DEBUG(CTRL_INFO, "Already done\n");
- return 0;
- }
-
- /* Test if it's the first time and therefore we need to get the log */
- if(ap->discoveries == NULL)
- irnet_get_discovery_log(ap);
-
- /* Check if we have more item to dump */
- if(ap->disco_index < ap->disco_number)
- {
- /* Write an event */
- snprintf(event, buf_size,
- "Found %08x (%s) behind %08x {hints %02X-%02X}\n",
- ap->discoveries[ap->disco_index].daddr,
- ap->discoveries[ap->disco_index].info,
- ap->discoveries[ap->disco_index].saddr,
- ap->discoveries[ap->disco_index].hints[0],
- ap->discoveries[ap->disco_index].hints[1]);
- DEBUG(CTRL_INFO, "Writing discovery %d : %s\n",
- ap->disco_index, ap->discoveries[ap->disco_index].info);
-
- /* We have an event */
- done_event = 1;
- /* Next discovery */
- ap->disco_index++;
- }
-
- /* Check if we have done the last item */
- if(ap->disco_index >= ap->disco_number)
- {
- /* No more items : remove the log and signal termination */
- DEBUG(CTRL_INFO, "Cleaning up log (0x%p)\n",
- ap->discoveries);
- if(ap->discoveries != NULL)
- {
- /* Cleanup our copy of the discovery log */
- kfree(ap->discoveries);
- ap->discoveries = NULL;
- }
- ap->disco_number = -1;
- }
-
- return done_event;
-}
-#endif /* INITIAL_DISCOVERY */
-
-/*------------------------------------------------------------------*/
-/*
- * Read is used to get IrNET events
- */
-static inline ssize_t
-irnet_ctrl_read(irnet_socket * ap,
- struct file * file,
- char __user * buf,
- size_t count)
-{
- DECLARE_WAITQUEUE(wait, current);
- char event[75];
- ssize_t ret = 0;
-
- DENTER(CTRL_TRACE, "(ap=0x%p, count=%zd)\n", ap, count);
-
-#ifdef INITIAL_DISCOVERY
- /* Check if we have read the log */
- if (irnet_read_discovery_log(ap, event, sizeof(event)))
- {
- count = min(strlen(event), count);
- if (copy_to_user(buf, event, count))
- {
- DERROR(CTRL_ERROR, "Invalid user space pointer.\n");
- return -EFAULT;
- }
-
- DEXIT(CTRL_TRACE, "\n");
- return count;
- }
-#endif /* INITIAL_DISCOVERY */
-
- /* Put ourselves on the wait queue to be woken up */
- add_wait_queue(&irnet_events.rwait, &wait);
- set_current_state(TASK_INTERRUPTIBLE);
- for(;;)
- {
- /* If there is unread events */
- ret = 0;
- if(ap->event_index != irnet_events.index)
- break;
- ret = -EAGAIN;
- if(file->f_flags & O_NONBLOCK)
- break;
- ret = -ERESTARTSYS;
- if(signal_pending(current))
- break;
- /* Yield and wait to be woken up */
- schedule();
- }
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&irnet_events.rwait, &wait);
-
- /* Did we got it ? */
- if(ret != 0)
- {
- /* No, return the error code */
- DEXIT(CTRL_TRACE, " - ret %zd\n", ret);
- return ret;
- }
-
- /* Which event is it ? */
- switch(irnet_events.log[ap->event_index].event)
- {
- case IRNET_DISCOVER:
- snprintf(event, sizeof(event),
- "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n",
- irnet_events.log[ap->event_index].daddr,
- irnet_events.log[ap->event_index].name,
- irnet_events.log[ap->event_index].saddr,
- irnet_events.log[ap->event_index].hints.byte[0],
- irnet_events.log[ap->event_index].hints.byte[1]);
- break;
- case IRNET_EXPIRE:
- snprintf(event, sizeof(event),
- "Expired %08x (%s) behind %08x {hints %02X-%02X}\n",
- irnet_events.log[ap->event_index].daddr,
- irnet_events.log[ap->event_index].name,
- irnet_events.log[ap->event_index].saddr,
- irnet_events.log[ap->event_index].hints.byte[0],
- irnet_events.log[ap->event_index].hints.byte[1]);
- break;
- case IRNET_CONNECT_TO:
- snprintf(event, sizeof(event), "Connected to %08x (%s) on ppp%d\n",
- irnet_events.log[ap->event_index].daddr,
- irnet_events.log[ap->event_index].name,
- irnet_events.log[ap->event_index].unit);
- break;
- case IRNET_CONNECT_FROM:
- snprintf(event, sizeof(event), "Connection from %08x (%s) on ppp%d\n",
- irnet_events.log[ap->event_index].daddr,
- irnet_events.log[ap->event_index].name,
- irnet_events.log[ap->event_index].unit);
- break;
- case IRNET_REQUEST_FROM:
- snprintf(event, sizeof(event), "Request from %08x (%s) behind %08x\n",
- irnet_events.log[ap->event_index].daddr,
- irnet_events.log[ap->event_index].name,
- irnet_events.log[ap->event_index].saddr);
- break;
- case IRNET_NOANSWER_FROM:
- snprintf(event, sizeof(event), "No-answer from %08x (%s) on ppp%d\n",
- irnet_events.log[ap->event_index].daddr,
- irnet_events.log[ap->event_index].name,
- irnet_events.log[ap->event_index].unit);
- break;
- case IRNET_BLOCKED_LINK:
- snprintf(event, sizeof(event), "Blocked link with %08x (%s) on ppp%d\n",
- irnet_events.log[ap->event_index].daddr,
- irnet_events.log[ap->event_index].name,
- irnet_events.log[ap->event_index].unit);
- break;
- case IRNET_DISCONNECT_FROM:
- snprintf(event, sizeof(event), "Disconnection from %08x (%s) on ppp%d\n",
- irnet_events.log[ap->event_index].daddr,
- irnet_events.log[ap->event_index].name,
- irnet_events.log[ap->event_index].unit);
- break;
- case IRNET_DISCONNECT_TO:
- snprintf(event, sizeof(event), "Disconnected to %08x (%s)\n",
- irnet_events.log[ap->event_index].daddr,
- irnet_events.log[ap->event_index].name);
- break;
- default:
- snprintf(event, sizeof(event), "Bug\n");
- }
- /* Increment our event index */
- ap->event_index = (ap->event_index + 1) % IRNET_MAX_EVENTS;
-
- DEBUG(CTRL_INFO, "Event is :%s", event);
-
- count = min(strlen(event), count);
- if (copy_to_user(buf, event, count))
- {
- DERROR(CTRL_ERROR, "Invalid user space pointer.\n");
- return -EFAULT;
- }
-
- DEXIT(CTRL_TRACE, "\n");
- return count;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Poll : called when someone do a select on /dev/irnet.
- * Just check if there are new events...
- */
-static inline unsigned int
-irnet_ctrl_poll(irnet_socket * ap,
- struct file * file,
- poll_table * wait)
-{
- unsigned int mask;
-
- DENTER(CTRL_TRACE, "(ap=0x%p)\n", ap);
-
- poll_wait(file, &irnet_events.rwait, wait);
- mask = POLLOUT | POLLWRNORM;
- /* If there is unread events */
- if(ap->event_index != irnet_events.index)
- mask |= POLLIN | POLLRDNORM;
-#ifdef INITIAL_DISCOVERY
- if(ap->disco_number != -1)
- {
- /* Test if it's the first time and therefore we need to get the log */
- if(ap->discoveries == NULL)
- irnet_get_discovery_log(ap);
- /* Recheck */
- if(ap->disco_number != -1)
- mask |= POLLIN | POLLRDNORM;
- }
-#endif /* INITIAL_DISCOVERY */
-
- DEXIT(CTRL_TRACE, " - mask=0x%X\n", mask);
- return mask;
-}
-
-
-/*********************** FILESYSTEM CALLBACKS ***********************/
-/*
- * Implement the usual open, read, write functions that will be called
- * by the file system when some action is performed on /dev/irnet.
- * Most of those actions will in fact be performed by "pppd" or
- * the control channel, we just act as a redirector...
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Open : when somebody open /dev/irnet
- * We basically create a new instance of irnet and initialise it.
- */
-static int
-dev_irnet_open(struct inode * inode,
- struct file * file)
-{
- struct irnet_socket * ap;
- int err;
-
- DENTER(FS_TRACE, "(file=0x%p)\n", file);
-
-#ifdef SECURE_DEVIRNET
- /* This could (should?) be enforced by the permissions on /dev/irnet. */
- if(!capable(CAP_NET_ADMIN))
- return -EPERM;
-#endif /* SECURE_DEVIRNET */
-
- /* Allocate a private structure for this IrNET instance */
- ap = kzalloc(sizeof(*ap), GFP_KERNEL);
- DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n");
-
- /* initialize the irnet structure */
- ap->file = file;
-
- /* PPP channel setup */
- ap->ppp_open = 0;
- ap->chan.private = ap;
- ap->chan.ops = &irnet_ppp_ops;
- ap->chan.mtu = (2048 - TTP_MAX_HEADER - 2 - PPP_HDRLEN);
- ap->chan.hdrlen = 2 + TTP_MAX_HEADER; /* for A/C + Max IrDA hdr */
- /* PPP parameters */
- ap->mru = (2048 - TTP_MAX_HEADER - 2 - PPP_HDRLEN);
- ap->xaccm[0] = ~0U;
- ap->xaccm[3] = 0x60000000U;
- ap->raccm = ~0U;
-
- /* Setup the IrDA part... */
- err = irda_irnet_create(ap);
- if(err)
- {
- DERROR(FS_ERROR, "Can't setup IrDA link...\n");
- kfree(ap);
-
- return err;
- }
-
- /* For the control channel */
- ap->event_index = irnet_events.index; /* Cancel all past events */
-
- mutex_init(&ap->lock);
-
- /* Put our stuff where we will be able to find it later */
- file->private_data = ap;
-
- DEXIT(FS_TRACE, " - ap=0x%p\n", ap);
-
- return 0;
-}
-
-
-/*------------------------------------------------------------------*/
-/*
- * Close : when somebody close /dev/irnet
- * Destroy the instance of /dev/irnet
- */
-static int
-dev_irnet_close(struct inode * inode,
- struct file * file)
-{
- irnet_socket * ap = file->private_data;
-
- DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
- file, ap);
- DABORT(ap == NULL, 0, FS_ERROR, "ap is NULL !!!\n");
-
- /* Detach ourselves */
- file->private_data = NULL;
-
- /* Close IrDA stuff */
- irda_irnet_destroy(ap);
-
- /* Disconnect from the generic PPP layer if not already done */
- if(ap->ppp_open)
- {
- DERROR(FS_ERROR, "Channel still registered - deregistering !\n");
- ap->ppp_open = 0;
- ppp_unregister_channel(&ap->chan);
- }
-
- kfree(ap);
-
- DEXIT(FS_TRACE, "\n");
- return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Write does nothing.
- * (we receive packet from ppp_generic through ppp_irnet_send())
- */
-static ssize_t
-dev_irnet_write(struct file * file,
- const char __user *buf,
- size_t count,
- loff_t * ppos)
-{
- irnet_socket * ap = file->private_data;
-
- DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%zd)\n",
- file, ap, count);
- DABORT(ap == NULL, -ENXIO, FS_ERROR, "ap is NULL !!!\n");
-
- /* If we are connected to ppp_generic, let it handle the job */
- if(ap->ppp_open)
- return -EAGAIN;
- else
- return irnet_ctrl_write(ap, buf, count);
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Read doesn't do much either.
- * (pppd poll us, but ultimately reads through /dev/ppp)
- */
-static ssize_t
-dev_irnet_read(struct file * file,
- char __user * buf,
- size_t count,
- loff_t * ppos)
-{
- irnet_socket * ap = file->private_data;
-
- DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%zd)\n",
- file, ap, count);
- DABORT(ap == NULL, -ENXIO, FS_ERROR, "ap is NULL !!!\n");
-
- /* If we are connected to ppp_generic, let it handle the job */
- if(ap->ppp_open)
- return -EAGAIN;
- else
- return irnet_ctrl_read(ap, file, buf, count);
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Poll : called when someone do a select on /dev/irnet
- */
-static unsigned int
-dev_irnet_poll(struct file * file,
- poll_table * wait)
-{
- irnet_socket * ap = file->private_data;
- unsigned int mask;
-
- DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
- file, ap);
-
- mask = POLLOUT | POLLWRNORM;
- DABORT(ap == NULL, mask, FS_ERROR, "ap is NULL !!!\n");
-
- /* If we are connected to ppp_generic, let it handle the job */
- if(!ap->ppp_open)
- mask |= irnet_ctrl_poll(ap, file, wait);
-
- DEXIT(FS_TRACE, " - mask=0x%X\n", mask);
- return mask;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * IOCtl : Called when someone does some ioctls on /dev/irnet
- * This is the way pppd configure us and control us while the PPP
- * instance is active.
- */
-static long
-dev_irnet_ioctl(
- struct file * file,
- unsigned int cmd,
- unsigned long arg)
-{
- irnet_socket * ap = file->private_data;
- int err;
- int val;
- void __user *argp = (void __user *)arg;
-
- DENTER(FS_TRACE, "(file=0x%p, ap=0x%p, cmd=0x%X)\n",
- file, ap, cmd);
-
- /* Basic checks... */
- DASSERT(ap != NULL, -ENXIO, PPP_ERROR, "ap is NULL...\n");
-#ifdef SECURE_DEVIRNET
- if(!capable(CAP_NET_ADMIN))
- return -EPERM;
-#endif /* SECURE_DEVIRNET */
-
- err = -EFAULT;
- switch(cmd)
- {
- /* Set discipline (should be N_SYNC_PPP or N_TTY) */
- case TIOCSETD:
- if(get_user(val, (int __user *)argp))
- break;
- if((val == N_SYNC_PPP) || (val == N_PPP))
- {
- DEBUG(FS_INFO, "Entering PPP discipline.\n");
- /* PPP channel setup (ap->chan in configured in dev_irnet_open())*/
- if (mutex_lock_interruptible(&ap->lock))
- return -EINTR;
-
- err = ppp_register_channel(&ap->chan);
- if(err == 0)
- {
- /* Our ppp side is active */
- ap->ppp_open = 1;
-
- DEBUG(FS_INFO, "Trying to establish a connection.\n");
- /* Setup the IrDA link now - may fail... */
- irda_irnet_connect(ap);
- }
- else
- DERROR(FS_ERROR, "Can't setup PPP channel...\n");
-
- mutex_unlock(&ap->lock);
- }
- else
- {
- /* In theory, should be N_TTY */
- DEBUG(FS_INFO, "Exiting PPP discipline.\n");
- /* Disconnect from the generic PPP layer */
- if (mutex_lock_interruptible(&ap->lock))
- return -EINTR;
-
- if(ap->ppp_open)
- {
- ap->ppp_open = 0;
- ppp_unregister_channel(&ap->chan);
- }
- else
- DERROR(FS_ERROR, "Channel not registered !\n");
- err = 0;
-
- mutex_unlock(&ap->lock);
- }
- break;
-
- /* Query PPP channel and unit number */
- case PPPIOCGCHAN:
- if (mutex_lock_interruptible(&ap->lock))
- return -EINTR;
-
- if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan),
- (int __user *)argp))
- err = 0;
-
- mutex_unlock(&ap->lock);
- break;
- case PPPIOCGUNIT:
- if (mutex_lock_interruptible(&ap->lock))
- return -EINTR;
-
- if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan),
- (int __user *)argp))
- err = 0;
-
- mutex_unlock(&ap->lock);
- break;
-
- /* All these ioctls can be passed both directly and from ppp_generic,
- * so we just deal with them in one place...
- */
- case PPPIOCGFLAGS:
- case PPPIOCSFLAGS:
- case PPPIOCGASYNCMAP:
- case PPPIOCSASYNCMAP:
- case PPPIOCGRASYNCMAP:
- case PPPIOCSRASYNCMAP:
- case PPPIOCGXASYNCMAP:
- case PPPIOCSXASYNCMAP:
- case PPPIOCGMRU:
- case PPPIOCSMRU:
- DEBUG(FS_INFO, "Standard PPP ioctl.\n");
- if(!capable(CAP_NET_ADMIN))
- err = -EPERM;
- else {
- if (mutex_lock_interruptible(&ap->lock))
- return -EINTR;
-
- err = ppp_irnet_ioctl(&ap->chan, cmd, arg);
-
- mutex_unlock(&ap->lock);
- }
- break;
-
- /* TTY IOCTLs : Pretend that we are a tty, to keep pppd happy */
- /* Get termios */
- case TCGETS:
- DEBUG(FS_INFO, "Get termios.\n");
- if (mutex_lock_interruptible(&ap->lock))
- return -EINTR;
-
-#ifndef TCGETS2
- if(!kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios))
- err = 0;
-#else
- if(kernel_termios_to_user_termios_1((struct termios __user *)argp, &ap->termios))
- err = 0;
-#endif
-
- mutex_unlock(&ap->lock);
- break;
- /* Set termios */
- case TCSETSF:
- DEBUG(FS_INFO, "Set termios.\n");
- if (mutex_lock_interruptible(&ap->lock))
- return -EINTR;
-
-#ifndef TCGETS2
- if(!user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp))
- err = 0;
-#else
- if(!user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp))
- err = 0;
-#endif
-
- mutex_unlock(&ap->lock);
- break;
-
- /* Set DTR/RTS */
- case TIOCMBIS:
- case TIOCMBIC:
- /* Set exclusive/non-exclusive mode */
- case TIOCEXCL:
- case TIOCNXCL:
- DEBUG(FS_INFO, "TTY compatibility.\n");
- err = 0;
- break;
-
- case TCGETA:
- DEBUG(FS_INFO, "TCGETA\n");
- break;
-
- case TCFLSH:
- DEBUG(FS_INFO, "TCFLSH\n");
- /* Note : this will flush buffers in PPP, so it *must* be done
- * We should also worry that we don't accept junk here and that
- * we get rid of our own buffers */
-#ifdef FLUSH_TO_PPP
- if (mutex_lock_interruptible(&ap->lock))
- return -EINTR;
- ppp_output_wakeup(&ap->chan);
- mutex_unlock(&ap->lock);
-#endif /* FLUSH_TO_PPP */
- err = 0;
- break;
-
- case FIONREAD:
- DEBUG(FS_INFO, "FIONREAD\n");
- val = 0;
- if(put_user(val, (int __user *)argp))
- break;
- err = 0;
- break;
-
- default:
- DERROR(FS_ERROR, "Unsupported ioctl (0x%X)\n", cmd);
- err = -ENOTTY;
- }
-
- DEXIT(FS_TRACE, " - err = 0x%X\n", err);
- return err;
-}
-
-/************************** PPP CALLBACKS **************************/
-/*
- * This are the functions that the generic PPP driver in the kernel
- * will call to communicate to us.
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Prepare the ppp frame for transmission over the IrDA socket.
- * We make sure that the header space is enough, and we change ppp header
- * according to flags passed by pppd.
- * This is not a callback, but just a helper function used in ppp_irnet_send()
- */
-static inline struct sk_buff *
-irnet_prepare_skb(irnet_socket * ap,
- struct sk_buff * skb)
-{
- unsigned char * data;
- int proto; /* PPP protocol */
- int islcp; /* Protocol == LCP */
- int needaddr; /* Need PPP address */
-
- DENTER(PPP_TRACE, "(ap=0x%p, skb=0x%p)\n",
- ap, skb);
-
- /* Extract PPP protocol from the frame */
- data = skb->data;
- proto = (data[0] << 8) + data[1];
-
- /* LCP packets with codes between 1 (configure-request)
- * and 7 (code-reject) must be sent as though no options
- * have been negotiated. */
- islcp = (proto == PPP_LCP) && (1 <= data[2]) && (data[2] <= 7);
-
- /* compress protocol field if option enabled */
- if((data[0] == 0) && (ap->flags & SC_COMP_PROT) && (!islcp))
- skb_pull(skb,1);
-
- /* Check if we need address/control fields */
- needaddr = 2*((ap->flags & SC_COMP_AC) == 0 || islcp);
-
- /* Is the skb headroom large enough to contain all IrDA-headers? */
- if((skb_headroom(skb) < (ap->max_header_size + needaddr)) ||
- (skb_shared(skb)))
- {
- struct sk_buff * new_skb;
-
- DEBUG(PPP_INFO, "Reallocating skb\n");
-
- /* Create a new skb */
- new_skb = skb_realloc_headroom(skb, ap->max_header_size + needaddr);
-
- /* We have to free the original skb anyway */
- dev_kfree_skb(skb);
-
- /* Did the realloc succeed ? */
- DABORT(new_skb == NULL, NULL, PPP_ERROR, "Could not realloc skb\n");
-
- /* Use the new skb instead */
- skb = new_skb;
- }
-
- /* prepend address/control fields if necessary */
- if(needaddr)
- {
- skb_push(skb, 2);
- skb->data[0] = PPP_ALLSTATIONS;
- skb->data[1] = PPP_UI;
- }
-
- DEXIT(PPP_TRACE, "\n");
-
- return skb;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Send a packet to the peer over the IrTTP connection.
- * Returns 1 iff the packet was accepted.
- * Returns 0 iff packet was not consumed.
- * If the packet was not accepted, we will call ppp_output_wakeup
- * at some later time to reactivate flow control in ppp_generic.
- */
-static int
-ppp_irnet_send(struct ppp_channel * chan,
- struct sk_buff * skb)
-{
- irnet_socket * self = (struct irnet_socket *) chan->private;
- int ret;
-
- DENTER(PPP_TRACE, "(channel=0x%p, ap/self=0x%p)\n",
- chan, self);
-
- /* Check if things are somewhat valid... */
- DASSERT(self != NULL, 0, PPP_ERROR, "Self is NULL !!!\n");
-
- /* Check if we are connected */
- if(!(test_bit(0, &self->ttp_open)))
- {
-#ifdef CONNECT_IN_SEND
- /* Let's try to connect one more time... */
- /* Note : we won't be connected after this call, but we should be
- * ready for next packet... */
- /* If we are already connecting, this will fail */
- irda_irnet_connect(self);
-#endif /* CONNECT_IN_SEND */
-
- DEBUG(PPP_INFO, "IrTTP not ready ! (%ld-%ld)\n",
- self->ttp_open, self->ttp_connect);
-
- /* Note : we can either drop the packet or block the packet.
- *
- * Blocking the packet allow us a better connection time,
- * because by calling ppp_output_wakeup() we can have
- * ppp_generic resending the LCP request immediately to us,
- * rather than waiting for one of pppd periodic transmission of
- * LCP request.
- *
- * On the other hand, if we block all packet, all those periodic
- * transmissions of pppd accumulate in ppp_generic, creating a
- * backlog of LCP request. When we eventually connect later on,
- * we have to transmit all this backlog before we can connect
- * proper (if we don't timeout before).
- *
- * The current strategy is as follow :
- * While we are attempting to connect, we block packets to get
- * a better connection time.
- * If we fail to connect, we drain the queue and start dropping packets
- */
-#ifdef BLOCK_WHEN_CONNECT
- /* If we are attempting to connect */
- if(test_bit(0, &self->ttp_connect))
- {
- /* Blocking packet, ppp_generic will retry later */
- return 0;
- }
-#endif /* BLOCK_WHEN_CONNECT */
-
- /* Dropping packet, pppd will retry later */
- dev_kfree_skb(skb);
- return 1;
- }
-
- /* Check if the queue can accept any packet, otherwise block */
- if(self->tx_flow != FLOW_START)
- DRETURN(0, PPP_INFO, "IrTTP queue full (%d skbs)...\n",
- skb_queue_len(&self->tsap->tx_queue));
-
- /* Prepare ppp frame for transmission */
- skb = irnet_prepare_skb(self, skb);
- DABORT(skb == NULL, 1, PPP_ERROR, "Prepare skb for Tx failed.\n");
-
- /* Send the packet to IrTTP */
- ret = irttp_data_request(self->tsap, skb);
- if(ret < 0)
- {
- /*
- * > IrTTPs tx queue is full, so we just have to
- * > drop the frame! You might think that we should
- * > just return -1 and don't deallocate the frame,
- * > but that is dangerous since it's possible that
- * > we have replaced the original skb with a new
- * > one with larger headroom, and that would really
- * > confuse do_dev_queue_xmit() in dev.c! I have
- * > tried :-) DB
- * Correction : we verify the flow control above (self->tx_flow),
- * so we come here only if IrTTP doesn't like the packet (empty,
- * too large, IrTTP not connected). In those rare cases, it's ok
- * to drop it, we don't want to see it here again...
- * Jean II
- */
- DERROR(PPP_ERROR, "IrTTP doesn't like this packet !!! (0x%X)\n", ret);
- /* irttp_data_request already free the packet */
- }
-
- DEXIT(PPP_TRACE, "\n");
- return 1; /* Packet has been consumed */
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Take care of the ioctls that ppp_generic doesn't want to deal with...
- * Note : we are also called from dev_irnet_ioctl().
- */
-static int
-ppp_irnet_ioctl(struct ppp_channel * chan,
- unsigned int cmd,
- unsigned long arg)
-{
- irnet_socket * ap = (struct irnet_socket *) chan->private;
- int err;
- int val;
- u32 accm[8];
- void __user *argp = (void __user *)arg;
-
- DENTER(PPP_TRACE, "(channel=0x%p, ap=0x%p, cmd=0x%X)\n",
- chan, ap, cmd);
-
- /* Basic checks... */
- DASSERT(ap != NULL, -ENXIO, PPP_ERROR, "ap is NULL...\n");
-
- err = -EFAULT;
- switch(cmd)
- {
- /* PPP flags */
- case PPPIOCGFLAGS:
- val = ap->flags | ap->rbits;
- if(put_user(val, (int __user *) argp))
- break;
- err = 0;
- break;
- case PPPIOCSFLAGS:
- if(get_user(val, (int __user *) argp))
- break;
- ap->flags = val & ~SC_RCV_BITS;
- ap->rbits = val & SC_RCV_BITS;
- err = 0;
- break;
-
- /* Async map stuff - all dummy to please pppd */
- case PPPIOCGASYNCMAP:
- if(put_user(ap->xaccm[0], (u32 __user *) argp))
- break;
- err = 0;
- break;
- case PPPIOCSASYNCMAP:
- if(get_user(ap->xaccm[0], (u32 __user *) argp))
- break;
- err = 0;
- break;
- case PPPIOCGRASYNCMAP:
- if(put_user(ap->raccm, (u32 __user *) argp))
- break;
- err = 0;
- break;
- case PPPIOCSRASYNCMAP:
- if(get_user(ap->raccm, (u32 __user *) argp))
- break;
- err = 0;
- break;
- case PPPIOCGXASYNCMAP:
- if(copy_to_user(argp, ap->xaccm, sizeof(ap->xaccm)))
- break;
- err = 0;
- break;
- case PPPIOCSXASYNCMAP:
- if(copy_from_user(accm, argp, sizeof(accm)))
- break;
- accm[2] &= ~0x40000000U; /* can't escape 0x5e */
- accm[3] |= 0x60000000U; /* must escape 0x7d, 0x7e */
- memcpy(ap->xaccm, accm, sizeof(ap->xaccm));
- err = 0;
- break;
-
- /* Max PPP frame size */
- case PPPIOCGMRU:
- if(put_user(ap->mru, (int __user *) argp))
- break;
- err = 0;
- break;
- case PPPIOCSMRU:
- if(get_user(val, (int __user *) argp))
- break;
- if(val < PPP_MRU)
- val = PPP_MRU;
- ap->mru = val;
- err = 0;
- break;
-
- default:
- DEBUG(PPP_INFO, "Unsupported ioctl (0x%X)\n", cmd);
- err = -ENOIOCTLCMD;
- }
-
- DEXIT(PPP_TRACE, " - err = 0x%X\n", err);
- return err;
-}
-
-/************************** INITIALISATION **************************/
-/*
- * Module initialisation and all that jazz...
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Hook our device callbacks in the filesystem, to connect our code
- * to /dev/irnet
- */
-static inline int __init
-ppp_irnet_init(void)
-{
- int err = 0;
-
- DENTER(MODULE_TRACE, "()\n");
-
- /* Allocate ourselves as a minor in the misc range */
- err = misc_register(&irnet_misc_device);
-
- DEXIT(MODULE_TRACE, "\n");
- return err;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Cleanup at exit...
- */
-static inline void __exit
-ppp_irnet_cleanup(void)
-{
- DENTER(MODULE_TRACE, "()\n");
-
- /* De-allocate /dev/irnet minor in misc range */
- misc_deregister(&irnet_misc_device);
-
- DEXIT(MODULE_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Module main entry point
- */
-static int __init
-irnet_init(void)
-{
- int err;
-
- /* Initialise both parts... */
- err = irda_irnet_init();
- if(!err)
- err = ppp_irnet_init();
- return err;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Module exit
- */
-static void __exit
-irnet_cleanup(void)
-{
- irda_irnet_cleanup();
- ppp_irnet_cleanup();
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Module magic
- */
-module_init(irnet_init);
-module_exit(irnet_cleanup);
-MODULE_AUTHOR("Jean Tourrilhes <jt@hpl.hp.com>");
-MODULE_DESCRIPTION("IrNET : Synchronous PPP over IrDA");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CHARDEV(10, 187);
diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h
deleted file mode 100644
index 32061442cc8e..000000000000
--- a/net/irda/irnet/irnet_ppp.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * IrNET protocol module : Synchronous PPP over an IrDA socket.
- *
- * Jean II - HPL `00 - <jt@hpl.hp.com>
- *
- * This file contains all definitions and declarations necessary for the
- * PPP part of the IrNET module.
- * This file is a private header, so other modules don't want to know
- * what's in there...
- */
-
-#ifndef IRNET_PPP_H
-#define IRNET_PPP_H
-
-/***************************** INCLUDES *****************************/
-
-#include "irnet.h" /* Module global include */
-#include <linux/miscdevice.h>
-
-/************************ CONSTANTS & MACROS ************************/
-
-/* IrNET control channel stuff */
-#define IRNET_MAX_COMMAND 256 /* Max length of a command line */
-
-/* PPP hardcore stuff */
-
-/* Bits in rbits (PPP flags in irnet struct) */
-#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
-
-/* Bit numbers in busy */
-#define XMIT_BUSY 0
-#define RECV_BUSY 1
-#define XMIT_WAKEUP 2
-#define XMIT_FULL 3
-
-/* Queue management */
-#define PPPSYNC_MAX_RQLEN 32 /* arbitrary */
-
-/****************************** TYPES ******************************/
-
-
-/**************************** PROTOTYPES ****************************/
-
-/* ----------------------- CONTROL CHANNEL ----------------------- */
-static inline ssize_t
- irnet_ctrl_write(irnet_socket *,
- const char *,
- size_t);
-static inline ssize_t
- irnet_ctrl_read(irnet_socket *,
- struct file *,
- char *,
- size_t);
-static inline unsigned int
- irnet_ctrl_poll(irnet_socket *,
- struct file *,
- poll_table *);
-/* ----------------------- CHARACTER DEVICE ----------------------- */
-static int
- dev_irnet_open(struct inode *, /* fs callback : open */
- struct file *),
- dev_irnet_close(struct inode *,
- struct file *);
-static ssize_t
- dev_irnet_write(struct file *,
- const char __user *,
- size_t,
- loff_t *),
- dev_irnet_read(struct file *,
- char __user *,
- size_t,
- loff_t *);
-static unsigned int
- dev_irnet_poll(struct file *,
- poll_table *);
-static long
- dev_irnet_ioctl(struct file *,
- unsigned int,
- unsigned long);
-/* ------------------------ PPP INTERFACE ------------------------ */
-static inline struct sk_buff *
- irnet_prepare_skb(irnet_socket *,
- struct sk_buff *);
-static int
- ppp_irnet_send(struct ppp_channel *,
- struct sk_buff *);
-static int
- ppp_irnet_ioctl(struct ppp_channel *,
- unsigned int,
- unsigned long);
-
-/**************************** VARIABLES ****************************/
-
-/* Filesystem callbacks (to call us) */
-static const struct file_operations irnet_device_fops =
-{
- .owner = THIS_MODULE,
- .read = dev_irnet_read,
- .write = dev_irnet_write,
- .poll = dev_irnet_poll,
- .unlocked_ioctl = dev_irnet_ioctl,
- .open = dev_irnet_open,
- .release = dev_irnet_close,
- .llseek = noop_llseek,
- /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */
-};
-
-/* Structure so that the misc major (drivers/char/misc.c) take care of us... */
-static struct miscdevice irnet_misc_device =
-{
- .minor = IRNET_MINOR,
- .name = "irnet",
- .fops = &irnet_device_fops
-};
-
-#endif /* IRNET_PPP_H */
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
deleted file mode 100644
index 7fc340e574cf..000000000000
--- a/net/irda/irnetlink.c
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * IrDA netlink layer, for stack configuration.
- *
- * Copyright (c) 2007 Samuel Ortiz <samuel@sortiz.org>
- *
- * Partly based on the 802.11 nelink implementation
- * (see net/wireless/nl80211.c) which is:
- * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/socket.h>
-#include <linux/irda.h>
-#include <linux/gfp.h>
-#include <net/net_namespace.h>
-#include <net/sock.h>
-#include <net/irda/irda.h>
-#include <net/irda/irlap.h>
-#include <net/genetlink.h>
-
-
-
-static struct genl_family irda_nl_family;
-
-static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info)
-{
- char * ifname;
-
- if (!info->attrs[IRDA_NL_ATTR_IFNAME])
- return NULL;
-
- ifname = nla_data(info->attrs[IRDA_NL_ATTR_IFNAME]);
-
- pr_debug("%s(): Looking for %s\n", __func__, ifname);
-
- return dev_get_by_name(net, ifname);
-}
-
-static int irda_nl_set_mode(struct sk_buff *skb, struct genl_info *info)
-{
- struct net_device * dev;
- struct irlap_cb * irlap;
- u32 mode;
-
- if (!info->attrs[IRDA_NL_ATTR_MODE])
- return -EINVAL;
-
- mode = nla_get_u32(info->attrs[IRDA_NL_ATTR_MODE]);
-
- pr_debug("%s(): Switching to mode: %d\n", __func__, mode);
-
- dev = ifname_to_netdev(&init_net, info);
- if (!dev)
- return -ENODEV;
-
- irlap = (struct irlap_cb *)dev->atalk_ptr;
- if (!irlap) {
- dev_put(dev);
- return -ENODEV;
- }
-
- irlap->mode = mode;
-
- dev_put(dev);
-
- return 0;
-}
-
-static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info)
-{
- struct net_device * dev;
- struct irlap_cb * irlap;
- struct sk_buff *msg;
- void *hdr;
- int ret = -ENOBUFS;
-
- dev = ifname_to_netdev(&init_net, info);
- if (!dev)
- return -ENODEV;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg) {
- dev_put(dev);
- return -ENOMEM;
- }
-
- irlap = (struct irlap_cb *)dev->atalk_ptr;
- if (!irlap) {
- ret = -ENODEV;
- goto err_out;
- }
-
- hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
- &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE);
- if (hdr == NULL) {
- ret = -EMSGSIZE;
- goto err_out;
- }
-
- if(nla_put_string(msg, IRDA_NL_ATTR_IFNAME,
- dev->name))
- goto err_out;
-
- if(nla_put_u32(msg, IRDA_NL_ATTR_MODE, irlap->mode))
- goto err_out;
-
- genlmsg_end(msg, hdr);
-
- return genlmsg_reply(msg, info);
-
- err_out:
- nlmsg_free(msg);
- dev_put(dev);
-
- return ret;
-}
-
-static const struct nla_policy irda_nl_policy[IRDA_NL_ATTR_MAX + 1] = {
- [IRDA_NL_ATTR_IFNAME] = { .type = NLA_NUL_STRING,
- .len = IFNAMSIZ-1 },
- [IRDA_NL_ATTR_MODE] = { .type = NLA_U32 },
-};
-
-static const struct genl_ops irda_nl_ops[] = {
- {
- .cmd = IRDA_NL_CMD_SET_MODE,
- .doit = irda_nl_set_mode,
- .policy = irda_nl_policy,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = IRDA_NL_CMD_GET_MODE,
- .doit = irda_nl_get_mode,
- .policy = irda_nl_policy,
- /* can be retrieved by unprivileged users */
- },
-
-};
-
-static struct genl_family irda_nl_family __ro_after_init = {
- .name = IRDA_NL_NAME,
- .hdrsize = 0,
- .version = IRDA_NL_VERSION,
- .maxattr = IRDA_NL_CMD_MAX,
- .module = THIS_MODULE,
- .ops = irda_nl_ops,
- .n_ops = ARRAY_SIZE(irda_nl_ops),
-};
-
-int __init irda_nl_register(void)
-{
- return genl_register_family(&irda_nl_family);
-}
-
-void irda_nl_unregister(void)
-{
- genl_unregister_family(&irda_nl_family);
-}
diff --git a/net/irda/irproc.c b/net/irda/irproc.c
deleted file mode 100644
index 77cfdde9d82f..000000000000
--- a/net/irda/irproc.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*********************************************************************
- *
- * Filename: irproc.c
- * Version: 1.0
- * Description: Various entries in the /proc file system
- * Status: Experimental.
- * Author: Thomas Davis, <ratbert@radiks.net>
- * Created at: Sat Feb 21 21:33:24 1998
- * Modified at: Sun Nov 14 08:54:54 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1998-1999, Dag Brattli <dagb@cs.uit.no>
- * Copyright (c) 1998, Thomas Davis, <ratbert@radiks.net>,
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * I, Thomas Davis, provide no warranty for any of this software.
- * This material is provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <net/net_namespace.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlap.h>
-#include <net/irda/irlmp.h>
-
-extern const struct file_operations discovery_seq_fops;
-extern const struct file_operations irlap_seq_fops;
-extern const struct file_operations irlmp_seq_fops;
-extern const struct file_operations irttp_seq_fops;
-extern const struct file_operations irias_seq_fops;
-
-struct irda_entry {
- const char *name;
- const struct file_operations *fops;
-};
-
-struct proc_dir_entry *proc_irda;
-EXPORT_SYMBOL(proc_irda);
-
-static const struct irda_entry irda_dirs[] = {
- {"discovery", &discovery_seq_fops},
- {"irttp", &irttp_seq_fops},
- {"irlmp", &irlmp_seq_fops},
- {"irlap", &irlap_seq_fops},
- {"irias", &irias_seq_fops},
-};
-
-/*
- * Function irda_proc_register (void)
- *
- * Register irda entry in /proc file system
- *
- */
-void __init irda_proc_register(void)
-{
- int i;
-
- proc_irda = proc_mkdir("irda", init_net.proc_net);
- if (proc_irda == NULL)
- return;
-
- for (i = 0; i < ARRAY_SIZE(irda_dirs); i++)
- (void) proc_create(irda_dirs[i].name, 0, proc_irda,
- irda_dirs[i].fops);
-}
-
-/*
- * Function irda_proc_unregister (void)
- *
- * Unregister irda entry in /proc file system
- *
- */
-void irda_proc_unregister(void)
-{
- int i;
-
- if (proc_irda) {
- for (i=0; i<ARRAY_SIZE(irda_dirs); i++)
- remove_proc_entry(irda_dirs[i].name, proc_irda);
-
- remove_proc_entry("irda", init_net.proc_net);
- proc_irda = NULL;
- }
-}
-
-
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c
deleted file mode 100644
index 160dc89335e2..000000000000
--- a/net/irda/irqueue.c
+++ /dev/null
@@ -1,911 +0,0 @@
-/*********************************************************************
- *
- * Filename: irqueue.c
- * Version: 0.3
- * Description: General queue implementation
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Tue Jun 9 13:29:31 1998
- * Modified at: Sun Dec 12 13:48:22 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- * Modified at: Thu Jan 4 14:29:10 CET 2001
- * Modified by: Marc Zyngier <mzyngier@freesurf.fr>
- *
- * Copyright (C) 1998-1999, Aage Kvalnes <aage@cs.uit.no>
- * Copyright (C) 1998, Dag Brattli,
- * All Rights Reserved.
- *
- * This code is taken from the Vortex Operating System written by Aage
- * Kvalnes. Aage has agreed that this code can use the GPL licence,
- * although he does not use that licence in his own code.
- *
- * This copyright does however _not_ include the ELF hash() function
- * which I currently don't know which licence or copyright it
- * has. Please inform me if you know.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-/*
- * NOTE :
- * There are various problems with this package :
- * o the hash function for ints is pathetic (but could be changed)
- * o locking is sometime suspicious (especially during enumeration)
- * o most users have only a few elements (== overhead)
- * o most users never use search, so don't benefit from hashing
- * Problem already fixed :
- * o not 64 bit compliant (most users do hashv = (int) self)
- * o hashbin_remove() is broken => use hashbin_remove_this()
- * I think most users would be better served by a simple linked list
- * (like include/linux/list.h) with a global spinlock per list.
- * Jean II
- */
-
-/*
- * Notes on the concurrent access to hashbin and other SMP issues
- * -------------------------------------------------------------
- * Hashbins are very often in the IrDA stack a global repository of
- * information, and therefore used in a very asynchronous manner following
- * various events (driver calls, timers, user calls...).
- * Therefore, very often it is highly important to consider the
- * management of concurrent access to the hashbin and how to guarantee the
- * consistency of the operations on it.
- *
- * First, we need to define the objective of locking :
- * 1) Protect user data (content pointed by the hashbin)
- * 2) Protect hashbin structure itself (linked list in each bin)
- *
- * OLD LOCKING
- * -----------
- *
- * The previous locking strategy, either HB_LOCAL or HB_GLOBAL were
- * both inadequate in *both* aspect.
- * o HB_GLOBAL was using a spinlock for each bin (local locking).
- * o HB_LOCAL was disabling irq on *all* CPUs, so use a single
- * global semaphore.
- * The problems were :
- * A) Global irq disabling is no longer supported by the kernel
- * B) No protection for the hashbin struct global data
- * o hashbin_delete()
- * o hb_current
- * C) No protection for user data in some cases
- *
- * A) HB_LOCAL use global irq disabling, so doesn't work on kernel
- * 2.5.X. Even when it is supported (kernel 2.4.X and earlier), its
- * performance is not satisfactory on SMP setups. Most hashbins were
- * HB_LOCAL, so (A) definitely need fixing.
- * B) HB_LOCAL could be modified to fix (B). However, because HB_GLOBAL
- * lock only the individual bins, it will never be able to lock the
- * global data, so can't do (B).
- * C) Some functions return pointer to data that is still in the
- * hashbin :
- * o hashbin_find()
- * o hashbin_get_first()
- * o hashbin_get_next()
- * As the data is still in the hashbin, it may be changed or free'd
- * while the caller is examinimg the data. In those case, locking can't
- * be done within the hashbin, but must include use of the data within
- * the caller.
- * The caller can easily do this with HB_LOCAL (just disable irqs).
- * However, this is impossible with HB_GLOBAL because the caller has no
- * way to know the proper bin, so don't know which spinlock to use.
- *
- * Quick summary : can no longer use HB_LOCAL, and HB_GLOBAL is
- * fundamentally broken and will never work.
- *
- * NEW LOCKING
- * -----------
- *
- * To fix those problems, I've introduce a few changes in the
- * hashbin locking :
- * 1) New HB_LOCK scheme
- * 2) hashbin->hb_spinlock
- * 3) New hashbin usage policy
- *
- * HB_LOCK :
- * -------
- * HB_LOCK is a locking scheme intermediate between the old HB_LOCAL
- * and HB_GLOBAL. It uses a single spinlock to protect the whole content
- * of the hashbin. As it is a single spinlock, it can protect the global
- * data of the hashbin and not only the bins themselves.
- * HB_LOCK can only protect some of the hashbin calls, so it only lock
- * call that can be made 100% safe and leave other call unprotected.
- * HB_LOCK in theory is slower than HB_GLOBAL, but as the hashbin
- * content is always small contention is not high, so it doesn't matter
- * much. HB_LOCK is probably faster than HB_LOCAL.
- *
- * hashbin->hb_spinlock :
- * --------------------
- * The spinlock that HB_LOCK uses is available for caller, so that
- * the caller can protect unprotected calls (see below).
- * If the caller want to do entirely its own locking (HB_NOLOCK), he
- * can do so and may use safely this spinlock.
- * Locking is done like this :
- * spin_lock_irqsave(&hashbin->hb_spinlock, flags);
- * Releasing the lock :
- * spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
- *
- * Safe & Protected calls :
- * ----------------------
- * The following calls are safe or protected via HB_LOCK :
- * o hashbin_new() -> safe
- * o hashbin_delete()
- * o hashbin_insert()
- * o hashbin_remove_first()
- * o hashbin_remove()
- * o hashbin_remove_this()
- * o HASHBIN_GET_SIZE() -> atomic
- *
- * The following calls only protect the hashbin itself :
- * o hashbin_lock_find()
- * o hashbin_find_next()
- *
- * Unprotected calls :
- * -----------------
- * The following calls need to be protected by the caller :
- * o hashbin_find()
- * o hashbin_get_first()
- * o hashbin_get_next()
- *
- * Locking Policy :
- * --------------
- * If the hashbin is used only in a single thread of execution
- * (explicitly or implicitely), you can use HB_NOLOCK
- * If the calling module already provide concurrent access protection,
- * you may use HB_NOLOCK.
- *
- * In all other cases, you need to use HB_LOCK and lock the hashbin
- * every time before calling one of the unprotected calls. You also must
- * use the pointer returned by the unprotected call within the locked
- * region.
- *
- * Extra care for enumeration :
- * --------------------------
- * hashbin_get_first() and hashbin_get_next() use the hashbin to
- * store the current position, in hb_current.
- * As long as the hashbin remains locked, this is safe. If you unlock
- * the hashbin, the current position may change if anybody else modify
- * or enumerate the hashbin.
- * Summary : do the full enumeration while locked.
- *
- * Alternatively, you may use hashbin_find_next(). But, this will
- * be slower, is more complex to use and doesn't protect the hashbin
- * content. So, care is needed here as well.
- *
- * Other issues :
- * ------------
- * I believe that we are overdoing it by using spin_lock_irqsave()
- * and we should use only spin_lock_bh() or similar. But, I don't have
- * the balls to try it out.
- * Don't believe that because hashbin are now (somewhat) SMP safe
- * that the rest of the code is. Higher layers tend to be safest,
- * but LAP and LMP would need some serious dedicated love.
- *
- * Jean II
- */
-#include <linux/module.h>
-#include <linux/slab.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irqueue.h>
-
-/************************ QUEUE SUBROUTINES ************************/
-
-/*
- * Hashbin
- */
-#define GET_HASHBIN(x) ( x & HASHBIN_MASK )
-
-/*
- * Function hash (name)
- *
- * This function hash the input string 'name' using the ELF hash
- * function for strings.
- */
-static __u32 hash( const char* name)
-{
- __u32 h = 0;
- __u32 g;
-
- while(*name) {
- h = (h<<4) + *name++;
- if ((g = (h & 0xf0000000)))
- h ^=g>>24;
- h &=~g;
- }
- return h;
-}
-
-/*
- * Function enqueue_first (queue, proc)
- *
- * Insert item first in queue.
- *
- */
-static void enqueue_first(irda_queue_t **queue, irda_queue_t* element)
-{
-
- /*
- * Check if queue is empty.
- */
- if ( *queue == NULL ) {
- /*
- * Queue is empty. Insert one element into the queue.
- */
- element->q_next = element->q_prev = *queue = element;
-
- } else {
- /*
- * Queue is not empty. Insert element into front of queue.
- */
- element->q_next = (*queue);
- (*queue)->q_prev->q_next = element;
- element->q_prev = (*queue)->q_prev;
- (*queue)->q_prev = element;
- (*queue) = element;
- }
-}
-
-
-/*
- * Function dequeue (queue)
- *
- * Remove first entry in queue
- *
- */
-static irda_queue_t *dequeue_first(irda_queue_t **queue)
-{
- irda_queue_t *ret;
-
- pr_debug("dequeue_first()\n");
-
- /*
- * Set return value
- */
- ret = *queue;
-
- if ( *queue == NULL ) {
- /*
- * Queue was empty.
- */
- } else if ( (*queue)->q_next == *queue ) {
- /*
- * Queue only contained a single element. It will now be
- * empty.
- */
- *queue = NULL;
- } else {
- /*
- * Queue contained several element. Remove the first one.
- */
- (*queue)->q_prev->q_next = (*queue)->q_next;
- (*queue)->q_next->q_prev = (*queue)->q_prev;
- *queue = (*queue)->q_next;
- }
-
- /*
- * Return the removed entry (or NULL of queue was empty).
- */
- return ret;
-}
-
-/*
- * Function dequeue_general (queue, element)
- *
- *
- */
-static irda_queue_t *dequeue_general(irda_queue_t **queue, irda_queue_t* element)
-{
- irda_queue_t *ret;
-
- pr_debug("dequeue_general()\n");
-
- /*
- * Set return value
- */
- ret = *queue;
-
- if ( *queue == NULL ) {
- /*
- * Queue was empty.
- */
- } else if ( (*queue)->q_next == *queue ) {
- /*
- * Queue only contained a single element. It will now be
- * empty.
- */
- *queue = NULL;
-
- } else {
- /*
- * Remove specific element.
- */
- element->q_prev->q_next = element->q_next;
- element->q_next->q_prev = element->q_prev;
- if ( (*queue) == element)
- (*queue) = element->q_next;
- }
-
- /*
- * Return the removed entry (or NULL of queue was empty).
- */
- return ret;
-}
-
-/************************ HASHBIN MANAGEMENT ************************/
-
-/*
- * Function hashbin_create ( type, name )
- *
- * Create hashbin!
- *
- */
-hashbin_t *hashbin_new(int type)
-{
- hashbin_t* hashbin;
-
- /*
- * Allocate new hashbin
- */
- hashbin = kzalloc(sizeof(*hashbin), GFP_ATOMIC);
- if (!hashbin)
- return NULL;
-
- /*
- * Initialize structure
- */
- hashbin->hb_type = type;
- hashbin->magic = HB_MAGIC;
- //hashbin->hb_current = NULL;
-
- /* Make sure all spinlock's are unlocked */
- if ( hashbin->hb_type & HB_LOCK ) {
- spin_lock_init(&hashbin->hb_spinlock);
- }
-
- return hashbin;
-}
-EXPORT_SYMBOL(hashbin_new);
-
-
-/*
- * Function hashbin_delete (hashbin, free_func)
- *
- * Destroy hashbin, the free_func can be a user supplied special routine
- * for deallocating this structure if it's complex. If not the user can
- * just supply kfree, which should take care of the job.
- */
-int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
-{
- irda_queue_t* queue;
- unsigned long flags = 0;
- int i;
-
- IRDA_ASSERT(hashbin != NULL, return -1;);
- IRDA_ASSERT(hashbin->magic == HB_MAGIC, return -1;);
-
- /* Synchronize */
- if (hashbin->hb_type & HB_LOCK)
- spin_lock_irqsave(&hashbin->hb_spinlock, flags);
-
- /*
- * Free the entries in the hashbin, TODO: use hashbin_clear when
- * it has been shown to work
- */
- for (i = 0; i < HASHBIN_SIZE; i ++ ) {
- while (1) {
- queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]);
-
- if (!queue)
- break;
-
- if (free_func) {
- if (hashbin->hb_type & HB_LOCK)
- spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
- free_func(queue);
- if (hashbin->hb_type & HB_LOCK)
- spin_lock_irqsave(&hashbin->hb_spinlock, flags);
- }
- }
- }
-
- /* Cleanup local data */
- hashbin->hb_current = NULL;
- hashbin->magic = ~HB_MAGIC;
-
- /* Release lock */
- if (hashbin->hb_type & HB_LOCK)
- spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
-
- /*
- * Free the hashbin structure
- */
- kfree(hashbin);
-
- return 0;
-}
-EXPORT_SYMBOL(hashbin_delete);
-
-/********************* HASHBIN LIST OPERATIONS *********************/
-
-/*
- * Function hashbin_insert (hashbin, entry, name)
- *
- * Insert an entry into the hashbin
- *
- */
-void hashbin_insert(hashbin_t* hashbin, irda_queue_t* entry, long hashv,
- const char* name)
-{
- unsigned long flags = 0;
- int bin;
-
- IRDA_ASSERT( hashbin != NULL, return;);
- IRDA_ASSERT( hashbin->magic == HB_MAGIC, return;);
-
- /*
- * Locate hashbin
- */
- if ( name )
- hashv = hash( name );
- bin = GET_HASHBIN( hashv );
-
- /* Synchronize */
- if ( hashbin->hb_type & HB_LOCK ) {
- spin_lock_irqsave(&hashbin->hb_spinlock, flags);
- } /* Default is no-lock */
-
- /*
- * Store name and key
- */
- entry->q_hash = hashv;
- if ( name )
- strlcpy( entry->q_name, name, sizeof(entry->q_name));
-
- /*
- * Insert new entry first
- */
- enqueue_first( (irda_queue_t**) &hashbin->hb_queue[ bin ],
- entry);
- hashbin->hb_size++;
-
- /* Release lock */
- if ( hashbin->hb_type & HB_LOCK ) {
- spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
- } /* Default is no-lock */
-}
-EXPORT_SYMBOL(hashbin_insert);
-
-/*
- * Function hashbin_remove_first (hashbin)
- *
- * Remove first entry of the hashbin
- *
- * Note : this function no longer use hashbin_remove(), but does things
- * similar to hashbin_remove_this(), so can be considered safe.
- * Jean II
- */
-void *hashbin_remove_first( hashbin_t *hashbin)
-{
- unsigned long flags = 0;
- irda_queue_t *entry = NULL;
-
- /* Synchronize */
- if ( hashbin->hb_type & HB_LOCK ) {
- spin_lock_irqsave(&hashbin->hb_spinlock, flags);
- } /* Default is no-lock */
-
- entry = hashbin_get_first( hashbin);
- if ( entry != NULL) {
- int bin;
- long hashv;
- /*
- * Locate hashbin
- */
- hashv = entry->q_hash;
- bin = GET_HASHBIN( hashv );
-
- /*
- * Dequeue the entry...
- */
- dequeue_general( (irda_queue_t**) &hashbin->hb_queue[ bin ],
- entry);
- hashbin->hb_size--;
- entry->q_next = NULL;
- entry->q_prev = NULL;
-
- /*
- * Check if this item is the currently selected item, and in
- * that case we must reset hb_current
- */
- if ( entry == hashbin->hb_current)
- hashbin->hb_current = NULL;
- }
-
- /* Release lock */
- if ( hashbin->hb_type & HB_LOCK ) {
- spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
- } /* Default is no-lock */
-
- return entry;
-}
-
-
-/*
- * Function hashbin_remove (hashbin, hashv, name)
- *
- * Remove entry with the given name
- *
- * The use of this function is highly discouraged, because the whole
- * concept behind hashbin_remove() is broken. In many cases, it's not
- * possible to guarantee the unicity of the index (either hashv or name),
- * leading to removing the WRONG entry.
- * The only simple safe use is :
- * hashbin_remove(hasbin, (int) self, NULL);
- * In other case, you must think hard to guarantee unicity of the index.
- * Jean II
- */
-void* hashbin_remove( hashbin_t* hashbin, long hashv, const char* name)
-{
- int bin, found = FALSE;
- unsigned long flags = 0;
- irda_queue_t* entry;
-
- IRDA_ASSERT( hashbin != NULL, return NULL;);
- IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
-
- /*
- * Locate hashbin
- */
- if ( name )
- hashv = hash( name );
- bin = GET_HASHBIN( hashv );
-
- /* Synchronize */
- if ( hashbin->hb_type & HB_LOCK ) {
- spin_lock_irqsave(&hashbin->hb_spinlock, flags);
- } /* Default is no-lock */
-
- /*
- * Search for entry
- */
- entry = hashbin->hb_queue[ bin ];
- if ( entry ) {
- do {
- /*
- * Check for key
- */
- if ( entry->q_hash == hashv ) {
- /*
- * Name compare too?
- */
- if ( name ) {
- if ( strcmp( entry->q_name, name) == 0)
- {
- found = TRUE;
- break;
- }
- } else {
- found = TRUE;
- break;
- }
- }
- entry = entry->q_next;
- } while ( entry != hashbin->hb_queue[ bin ] );
- }
-
- /*
- * If entry was found, dequeue it
- */
- if ( found ) {
- dequeue_general( (irda_queue_t**) &hashbin->hb_queue[ bin ],
- entry);
- hashbin->hb_size--;
-
- /*
- * Check if this item is the currently selected item, and in
- * that case we must reset hb_current
- */
- if ( entry == hashbin->hb_current)
- hashbin->hb_current = NULL;
- }
-
- /* Release lock */
- if ( hashbin->hb_type & HB_LOCK ) {
- spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
- } /* Default is no-lock */
-
-
- /* Return */
- if ( found )
- return entry;
- else
- return NULL;
-
-}
-EXPORT_SYMBOL(hashbin_remove);
-
-/*
- * Function hashbin_remove_this (hashbin, entry)
- *
- * Remove entry with the given name
- *
- * In some cases, the user of hashbin can't guarantee the unicity
- * of either the hashv or name.
- * In those cases, using the above function is guaranteed to cause troubles,
- * so we use this one instead...
- * And by the way, it's also faster, because we skip the search phase ;-)
- */
-void* hashbin_remove_this( hashbin_t* hashbin, irda_queue_t* entry)
-{
- unsigned long flags = 0;
- int bin;
- long hashv;
-
- IRDA_ASSERT( hashbin != NULL, return NULL;);
- IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
- IRDA_ASSERT( entry != NULL, return NULL;);
-
- /* Synchronize */
- if ( hashbin->hb_type & HB_LOCK ) {
- spin_lock_irqsave(&hashbin->hb_spinlock, flags);
- } /* Default is no-lock */
-
- /* Check if valid and not already removed... */
- if((entry->q_next == NULL) || (entry->q_prev == NULL)) {
- entry = NULL;
- goto out;
- }
-
- /*
- * Locate hashbin
- */
- hashv = entry->q_hash;
- bin = GET_HASHBIN( hashv );
-
- /*
- * Dequeue the entry...
- */
- dequeue_general( (irda_queue_t**) &hashbin->hb_queue[ bin ],
- entry);
- hashbin->hb_size--;
- entry->q_next = NULL;
- entry->q_prev = NULL;
-
- /*
- * Check if this item is the currently selected item, and in
- * that case we must reset hb_current
- */
- if ( entry == hashbin->hb_current)
- hashbin->hb_current = NULL;
-out:
- /* Release lock */
- if ( hashbin->hb_type & HB_LOCK ) {
- spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
- } /* Default is no-lock */
-
- return entry;
-}
-EXPORT_SYMBOL(hashbin_remove_this);
-
-/*********************** HASHBIN ENUMERATION ***********************/
-
-/*
- * Function hashbin_common_find (hashbin, hashv, name)
- *
- * Find item with the given hashv or name
- *
- */
-void* hashbin_find( hashbin_t* hashbin, long hashv, const char* name )
-{
- int bin;
- irda_queue_t* entry;
-
- pr_debug("hashbin_find()\n");
-
- IRDA_ASSERT( hashbin != NULL, return NULL;);
- IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
-
- /*
- * Locate hashbin
- */
- if ( name )
- hashv = hash( name );
- bin = GET_HASHBIN( hashv );
-
- /*
- * Search for entry
- */
- entry = hashbin->hb_queue[ bin];
- if ( entry ) {
- do {
- /*
- * Check for key
- */
- if ( entry->q_hash == hashv ) {
- /*
- * Name compare too?
- */
- if ( name ) {
- if ( strcmp( entry->q_name, name ) == 0 ) {
- return entry;
- }
- } else {
- return entry;
- }
- }
- entry = entry->q_next;
- } while ( entry != hashbin->hb_queue[ bin ] );
- }
-
- return NULL;
-}
-EXPORT_SYMBOL(hashbin_find);
-
-/*
- * Function hashbin_lock_find (hashbin, hashv, name)
- *
- * Find item with the given hashv or name
- *
- * Same, but with spinlock protection...
- * I call it safe, but it's only safe with respect to the hashbin, not its
- * content. - Jean II
- */
-void* hashbin_lock_find( hashbin_t* hashbin, long hashv, const char* name )
-{
- unsigned long flags = 0;
- irda_queue_t* entry;
-
- /* Synchronize */
- spin_lock_irqsave(&hashbin->hb_spinlock, flags);
-
- /*
- * Search for entry
- */
- entry = hashbin_find(hashbin, hashv, name);
-
- /* Release lock */
- spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
-
- return entry;
-}
-EXPORT_SYMBOL(hashbin_lock_find);
-
-/*
- * Function hashbin_find (hashbin, hashv, name, pnext)
- *
- * Find an item with the given hashv or name, and its successor
- *
- * This function allow to do concurrent enumerations without the
- * need to lock over the whole session, because the caller keep the
- * context of the search. On the other hand, it might fail and return
- * NULL if the entry is removed. - Jean II
- */
-void* hashbin_find_next( hashbin_t* hashbin, long hashv, const char* name,
- void ** pnext)
-{
- unsigned long flags = 0;
- irda_queue_t* entry;
-
- /* Synchronize */
- spin_lock_irqsave(&hashbin->hb_spinlock, flags);
-
- /*
- * Search for current entry
- * This allow to check if the current item is still in the
- * hashbin or has been removed.
- */
- entry = hashbin_find(hashbin, hashv, name);
-
- /*
- * Trick hashbin_get_next() to return what we want
- */
- if(entry) {
- hashbin->hb_current = entry;
- *pnext = hashbin_get_next( hashbin );
- } else
- *pnext = NULL;
-
- /* Release lock */
- spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
-
- return entry;
-}
-
-/*
- * Function hashbin_get_first (hashbin)
- *
- * Get a pointer to first element in hashbin, this function must be
- * called before any calls to hashbin_get_next()!
- *
- */
-irda_queue_t *hashbin_get_first( hashbin_t* hashbin)
-{
- irda_queue_t *entry;
- int i;
-
- IRDA_ASSERT( hashbin != NULL, return NULL;);
- IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
-
- if ( hashbin == NULL)
- return NULL;
-
- for ( i = 0; i < HASHBIN_SIZE; i ++ ) {
- entry = hashbin->hb_queue[ i];
- if ( entry) {
- hashbin->hb_current = entry;
- return entry;
- }
- }
- /*
- * Did not find any item in hashbin
- */
- return NULL;
-}
-EXPORT_SYMBOL(hashbin_get_first);
-
-/*
- * Function hashbin_get_next (hashbin)
- *
- * Get next item in hashbin. A series of hashbin_get_next() calls must
- * be started by a call to hashbin_get_first(). The function returns
- * NULL when all items have been traversed
- *
- * The context of the search is stored within the hashbin, so you must
- * protect yourself from concurrent enumerations. - Jean II
- */
-irda_queue_t *hashbin_get_next( hashbin_t *hashbin)
-{
- irda_queue_t* entry;
- int bin;
- int i;
-
- IRDA_ASSERT( hashbin != NULL, return NULL;);
- IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
-
- if ( hashbin->hb_current == NULL) {
- IRDA_ASSERT( hashbin->hb_current != NULL, return NULL;);
- return NULL;
- }
- entry = hashbin->hb_current->q_next;
- bin = GET_HASHBIN( entry->q_hash);
-
- /*
- * Make sure that we are not back at the beginning of the queue
- * again
- */
- if ( entry != hashbin->hb_queue[ bin ]) {
- hashbin->hb_current = entry;
-
- return entry;
- }
-
- /*
- * Check that this is not the last queue in hashbin
- */
- if ( bin >= HASHBIN_SIZE)
- return NULL;
-
- /*
- * Move to next queue in hashbin
- */
- bin++;
- for ( i = bin; i < HASHBIN_SIZE; i++ ) {
- entry = hashbin->hb_queue[ i];
- if ( entry) {
- hashbin->hb_current = entry;
-
- return entry;
- }
- }
- return NULL;
-}
-EXPORT_SYMBOL(hashbin_get_next);
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
deleted file mode 100644
index 873da5e7d428..000000000000
--- a/net/irda/irsysctl.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*********************************************************************
- *
- * Filename: irsysctl.c
- * Version: 1.0
- * Description: Sysctl interface for IrDA
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sun May 24 22:12:06 1998
- * Modified at: Fri Jun 4 02:50:15 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1997, 1999 Dag Brattli, All Rights Reserved.
- * Copyright (c) 2000-2001 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/mm.h>
-#include <linux/ctype.h>
-#include <linux/sysctl.h>
-#include <linux/init.h>
-
-#include <net/irda/irda.h> /* irda_debug */
-#include <net/irda/irlmp.h>
-#include <net/irda/timer.h>
-#include <net/irda/irias_object.h>
-
-extern int sysctl_discovery;
-extern int sysctl_discovery_slots;
-extern int sysctl_discovery_timeout;
-extern int sysctl_slot_timeout;
-extern int sysctl_fast_poll_increase;
-extern char sysctl_devname[];
-extern int sysctl_max_baud_rate;
-extern unsigned int sysctl_min_tx_turn_time;
-extern unsigned int sysctl_max_tx_data_size;
-extern unsigned int sysctl_max_tx_window;
-extern int sysctl_max_noreply_time;
-extern int sysctl_warn_noreply_time;
-extern int sysctl_lap_keepalive_time;
-
-extern struct irlmp_cb *irlmp;
-
-/* this is needed for the proc_dointvec_minmax - Jean II */
-static int max_discovery_slots = 16; /* ??? */
-static int min_discovery_slots = 1;
-/* IrLAP 6.13.2 says 25ms to 10+70ms - allow higher since some devices
- * seems to require it. (from Dag's comment) */
-static int max_slot_timeout = 160;
-static int min_slot_timeout = 20;
-static int max_max_baud_rate = 16000000; /* See qos.c - IrLAP spec */
-static int min_max_baud_rate = 2400;
-static int max_min_tx_turn_time = 10000; /* See qos.c - IrLAP spec */
-static int min_min_tx_turn_time;
-static int max_max_tx_data_size = 2048; /* See qos.c - IrLAP spec */
-static int min_max_tx_data_size = 64;
-static int max_max_tx_window = 7; /* See qos.c - IrLAP spec */
-static int min_max_tx_window = 1;
-static int max_max_noreply_time = 40; /* See qos.c - IrLAP spec */
-static int min_max_noreply_time = 3;
-static int max_warn_noreply_time = 3; /* 3s == standard */
-static int min_warn_noreply_time = 1; /* 1s == min WD_TIMER */
-static int max_lap_keepalive_time = 10000; /* 10s */
-static int min_lap_keepalive_time = 100; /* 100us */
-/* For other sysctl, I've no idea of the range. Maybe Dag could help
- * us on that - Jean II */
-
-static int do_devname(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret;
-
- ret = proc_dostring(table, write, buffer, lenp, ppos);
- if (ret == 0 && write) {
- struct ias_value *val;
-
- val = irias_new_string_value(sysctl_devname);
- if (val)
- irias_object_change_attribute("Device", "DeviceName", val);
- }
- return ret;
-}
-
-
-static int do_discovery(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret;
-
- ret = proc_dointvec(table, write, buffer, lenp, ppos);
- if (ret)
- return ret;
-
- if (irlmp == NULL)
- return -ENODEV;
-
- if (sysctl_discovery)
- irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout*HZ);
- else
- del_timer_sync(&irlmp->discovery_timer);
-
- return ret;
-}
-
-/* One file */
-static struct ctl_table irda_table[] = {
- {
- .procname = "discovery",
- .data = &sysctl_discovery,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = do_discovery,
- },
- {
- .procname = "devname",
- .data = sysctl_devname,
- .maxlen = 65,
- .mode = 0644,
- .proc_handler = do_devname,
- },
-#ifdef CONFIG_IRDA_FAST_RR
- {
- .procname = "fast_poll_increase",
- .data = &sysctl_fast_poll_increase,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
-#endif
- {
- .procname = "discovery_slots",
- .data = &sysctl_discovery_slots,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_discovery_slots,
- .extra2 = &max_discovery_slots
- },
- {
- .procname = "discovery_timeout",
- .data = &sysctl_discovery_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "slot_timeout",
- .data = &sysctl_slot_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_slot_timeout,
- .extra2 = &max_slot_timeout
- },
- {
- .procname = "max_baud_rate",
- .data = &sysctl_max_baud_rate,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_max_baud_rate,
- .extra2 = &max_max_baud_rate
- },
- {
- .procname = "min_tx_turn_time",
- .data = &sysctl_min_tx_turn_time,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_min_tx_turn_time,
- .extra2 = &max_min_tx_turn_time
- },
- {
- .procname = "max_tx_data_size",
- .data = &sysctl_max_tx_data_size,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_max_tx_data_size,
- .extra2 = &max_max_tx_data_size
- },
- {
- .procname = "max_tx_window",
- .data = &sysctl_max_tx_window,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_max_tx_window,
- .extra2 = &max_max_tx_window
- },
- {
- .procname = "max_noreply_time",
- .data = &sysctl_max_noreply_time,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_max_noreply_time,
- .extra2 = &max_max_noreply_time
- },
- {
- .procname = "warn_noreply_time",
- .data = &sysctl_warn_noreply_time,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_warn_noreply_time,
- .extra2 = &max_warn_noreply_time
- },
- {
- .procname = "lap_keepalive_time",
- .data = &sysctl_lap_keepalive_time,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_lap_keepalive_time,
- .extra2 = &max_lap_keepalive_time
- },
- { }
-};
-
-static struct ctl_table_header *irda_table_header;
-
-/*
- * Function irda_sysctl_register (void)
- *
- * Register our sysctl interface
- *
- */
-int __init irda_sysctl_register(void)
-{
- irda_table_header = register_net_sysctl(&init_net, "net/irda", irda_table);
- if (!irda_table_header)
- return -ENOMEM;
-
- return 0;
-}
-
-/*
- * Function irda_sysctl_unregister (void)
- *
- * Unregister our sysctl interface
- *
- */
-void irda_sysctl_unregister(void)
-{
- unregister_net_sysctl_table(irda_table_header);
-}
-
-
-
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
deleted file mode 100644
index b6ab41d5b3a3..000000000000
--- a/net/irda/irttp.c
+++ /dev/null
@@ -1,1891 +0,0 @@
-/*********************************************************************
- *
- * Filename: irttp.c
- * Version: 1.2
- * Description: Tiny Transport Protocol (TTP) implementation
- * Status: Stable
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sun Aug 31 20:14:31 1997
- * Modified at: Wed Jan 5 11:31:27 2000
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>,
- * All Rights Reserved.
- * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-
-#include <asm/byteorder.h>
-#include <asm/unaligned.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlap.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/parameters.h>
-#include <net/irda/irttp.h>
-
-static struct irttp_cb *irttp;
-
-static void __irttp_close_tsap(struct tsap_cb *self);
-
-static int irttp_data_indication(void *instance, void *sap,
- struct sk_buff *skb);
-static int irttp_udata_indication(void *instance, void *sap,
- struct sk_buff *skb);
-static void irttp_disconnect_indication(void *instance, void *sap,
- LM_REASON reason, struct sk_buff *);
-static void irttp_connect_indication(void *instance, void *sap,
- struct qos_info *qos, __u32 max_sdu_size,
- __u8 header_size, struct sk_buff *skb);
-static void irttp_connect_confirm(void *instance, void *sap,
- struct qos_info *qos, __u32 max_sdu_size,
- __u8 header_size, struct sk_buff *skb);
-static void irttp_run_tx_queue(struct tsap_cb *self);
-static void irttp_run_rx_queue(struct tsap_cb *self);
-
-static void irttp_flush_queues(struct tsap_cb *self);
-static void irttp_fragment_skb(struct tsap_cb *self, struct sk_buff *skb);
-static struct sk_buff *irttp_reassemble_skb(struct tsap_cb *self);
-static void irttp_todo_expired(unsigned long data);
-static int irttp_param_max_sdu_size(void *instance, irda_param_t *param,
- int get);
-
-static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow);
-static void irttp_status_indication(void *instance,
- LINK_STATUS link, LOCK_STATUS lock);
-
-/* Information for parsing parameters in IrTTP */
-static const pi_minor_info_t pi_minor_call_table[] = {
- { NULL, 0 }, /* 0x00 */
- { irttp_param_max_sdu_size, PV_INTEGER | PV_BIG_ENDIAN } /* 0x01 */
-};
-static const pi_major_info_t pi_major_call_table[] = {
- { pi_minor_call_table, 2 }
-};
-static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 };
-
-/************************ GLOBAL PROCEDURES ************************/
-
-/*
- * Function irttp_init (void)
- *
- * Initialize the IrTTP layer. Called by module initialization code
- *
- */
-int __init irttp_init(void)
-{
- irttp = kzalloc(sizeof(struct irttp_cb), GFP_KERNEL);
- if (irttp == NULL)
- return -ENOMEM;
-
- irttp->magic = TTP_MAGIC;
-
- irttp->tsaps = hashbin_new(HB_LOCK);
- if (!irttp->tsaps) {
- net_err_ratelimited("%s: can't allocate IrTTP hashbin!\n",
- __func__);
- kfree(irttp);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-/*
- * Function irttp_cleanup (void)
- *
- * Called by module destruction/cleanup code
- *
- */
-void irttp_cleanup(void)
-{
- /* Check for main structure */
- IRDA_ASSERT(irttp->magic == TTP_MAGIC, return;);
-
- /*
- * Delete hashbin and close all TSAP instances in it
- */
- hashbin_delete(irttp->tsaps, (FREE_FUNC) __irttp_close_tsap);
-
- irttp->magic = 0;
-
- /* De-allocate main structure */
- kfree(irttp);
-
- irttp = NULL;
-}
-
-/*************************** SUBROUTINES ***************************/
-
-/*
- * Function irttp_start_todo_timer (self, timeout)
- *
- * Start todo timer.
- *
- * Made it more effient and unsensitive to race conditions - Jean II
- */
-static inline void irttp_start_todo_timer(struct tsap_cb *self, int timeout)
-{
- /* Set new value for timer */
- mod_timer(&self->todo_timer, jiffies + timeout);
-}
-
-/*
- * Function irttp_todo_expired (data)
- *
- * Todo timer has expired!
- *
- * One of the restriction of the timer is that it is run only on the timer
- * interrupt which run every 10ms. This mean that even if you set the timer
- * with a delay of 0, it may take up to 10ms before it's run.
- * So, to minimise latency and keep cache fresh, we try to avoid using
- * it as much as possible.
- * Note : we can't use tasklets, because they can't be asynchronously
- * killed (need user context), and we can't guarantee that here...
- * Jean II
- */
-static void irttp_todo_expired(unsigned long data)
-{
- struct tsap_cb *self = (struct tsap_cb *) data;
-
- /* Check that we still exist */
- if (!self || self->magic != TTP_TSAP_MAGIC)
- return;
-
- pr_debug("%s(instance=%p)\n", __func__, self);
-
- /* Try to make some progress, especially on Tx side - Jean II */
- irttp_run_rx_queue(self);
- irttp_run_tx_queue(self);
-
- /* Check if time for disconnect */
- if (test_bit(0, &self->disconnect_pend)) {
- /* Check if it's possible to disconnect yet */
- if (skb_queue_empty(&self->tx_queue)) {
- /* Make sure disconnect is not pending anymore */
- clear_bit(0, &self->disconnect_pend); /* FALSE */
-
- /* Note : self->disconnect_skb may be NULL */
- irttp_disconnect_request(self, self->disconnect_skb,
- P_NORMAL);
- self->disconnect_skb = NULL;
- } else {
- /* Try again later */
- irttp_start_todo_timer(self, HZ/10);
-
- /* No reason to try and close now */
- return;
- }
- }
-
- /* Check if it's closing time */
- if (self->close_pend)
- /* Finish cleanup */
- irttp_close_tsap(self);
-}
-
-/*
- * Function irttp_flush_queues (self)
- *
- * Flushes (removes all frames) in transitt-buffer (tx_list)
- */
-static void irttp_flush_queues(struct tsap_cb *self)
-{
- struct sk_buff *skb;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-
- /* Deallocate frames waiting to be sent */
- while ((skb = skb_dequeue(&self->tx_queue)) != NULL)
- dev_kfree_skb(skb);
-
- /* Deallocate received frames */
- while ((skb = skb_dequeue(&self->rx_queue)) != NULL)
- dev_kfree_skb(skb);
-
- /* Deallocate received fragments */
- while ((skb = skb_dequeue(&self->rx_fragments)) != NULL)
- dev_kfree_skb(skb);
-}
-
-/*
- * Function irttp_reassemble (self)
- *
- * Makes a new (continuous) skb of all the fragments in the fragment
- * queue
- *
- */
-static struct sk_buff *irttp_reassemble_skb(struct tsap_cb *self)
-{
- struct sk_buff *skb, *frag;
- int n = 0; /* Fragment index */
-
- IRDA_ASSERT(self != NULL, return NULL;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return NULL;);
-
- pr_debug("%s(), self->rx_sdu_size=%d\n", __func__,
- self->rx_sdu_size);
-
- skb = dev_alloc_skb(TTP_HEADER + self->rx_sdu_size);
- if (!skb)
- return NULL;
-
- /*
- * Need to reserve space for TTP header in case this skb needs to
- * be requeued in case delivery failes
- */
- skb_reserve(skb, TTP_HEADER);
- skb_put(skb, self->rx_sdu_size);
-
- /*
- * Copy all fragments to a new buffer
- */
- while ((frag = skb_dequeue(&self->rx_fragments)) != NULL) {
- skb_copy_to_linear_data_offset(skb, n, frag->data, frag->len);
- n += frag->len;
-
- dev_kfree_skb(frag);
- }
-
- pr_debug("%s(), frame len=%d, rx_sdu_size=%d, rx_max_sdu_size=%d\n",
- __func__, n, self->rx_sdu_size, self->rx_max_sdu_size);
- /* Note : irttp_run_rx_queue() calculate self->rx_sdu_size
- * by summing the size of all fragments, so we should always
- * have n == self->rx_sdu_size, except in cases where we
- * droped the last fragment (when self->rx_sdu_size exceed
- * self->rx_max_sdu_size), where n < self->rx_sdu_size.
- * Jean II */
- IRDA_ASSERT(n <= self->rx_sdu_size, n = self->rx_sdu_size;);
-
- /* Set the new length */
- skb_trim(skb, n);
-
- self->rx_sdu_size = 0;
-
- return skb;
-}
-
-/*
- * Function irttp_fragment_skb (skb)
- *
- * Fragments a frame and queues all the fragments for transmission
- *
- */
-static inline void irttp_fragment_skb(struct tsap_cb *self,
- struct sk_buff *skb)
-{
- struct sk_buff *frag;
- __u8 *frame;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
-
- /*
- * Split frame into a number of segments
- */
- while (skb->len > self->max_seg_size) {
- pr_debug("%s(), fragmenting ...\n", __func__);
-
- /* Make new segment */
- frag = alloc_skb(self->max_seg_size+self->max_header_size,
- GFP_ATOMIC);
- if (!frag)
- return;
-
- skb_reserve(frag, self->max_header_size);
-
- /* Copy data from the original skb into this fragment. */
- skb_copy_from_linear_data(skb, skb_put(frag, self->max_seg_size),
- self->max_seg_size);
-
- /* Insert TTP header, with the more bit set */
- frame = skb_push(frag, TTP_HEADER);
- frame[0] = TTP_MORE;
-
- /* Hide the copied data from the original skb */
- skb_pull(skb, self->max_seg_size);
-
- /* Queue fragment */
- skb_queue_tail(&self->tx_queue, frag);
- }
- /* Queue what is left of the original skb */
- pr_debug("%s(), queuing last segment\n", __func__);
-
- frame = skb_push(skb, TTP_HEADER);
- frame[0] = 0x00; /* Clear more bit */
-
- /* Queue fragment */
- skb_queue_tail(&self->tx_queue, skb);
-}
-
-/*
- * Function irttp_param_max_sdu_size (self, param)
- *
- * Handle the MaxSduSize parameter in the connect frames, this function
- * will be called both when this parameter needs to be inserted into, and
- * extracted from the connect frames
- */
-static int irttp_param_max_sdu_size(void *instance, irda_param_t *param,
- int get)
-{
- struct tsap_cb *self;
-
- self = instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
-
- if (get)
- param->pv.i = self->tx_max_sdu_size;
- else
- self->tx_max_sdu_size = param->pv.i;
-
- pr_debug("%s(), MaxSduSize=%d\n", __func__, param->pv.i);
-
- return 0;
-}
-
-/*************************** CLIENT CALLS ***************************/
-/************************** LMP CALLBACKS **************************/
-/* Everything is happily mixed up. Waiting for next clean up - Jean II */
-
-/*
- * Initialization, that has to be done on new tsap
- * instance allocation and on duplication
- */
-static void irttp_init_tsap(struct tsap_cb *tsap)
-{
- spin_lock_init(&tsap->lock);
- init_timer(&tsap->todo_timer);
-
- skb_queue_head_init(&tsap->rx_queue);
- skb_queue_head_init(&tsap->tx_queue);
- skb_queue_head_init(&tsap->rx_fragments);
-}
-
-/*
- * Function irttp_open_tsap (stsap, notify)
- *
- * Create TSAP connection endpoint,
- */
-struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify)
-{
- struct tsap_cb *self;
- struct lsap_cb *lsap;
- notify_t ttp_notify;
-
- IRDA_ASSERT(irttp->magic == TTP_MAGIC, return NULL;);
-
- /* The IrLMP spec (IrLMP 1.1 p10) says that we have the right to
- * use only 0x01-0x6F. Of course, we can use LSAP_ANY as well.
- * JeanII */
- if ((stsap_sel != LSAP_ANY) &&
- ((stsap_sel < 0x01) || (stsap_sel >= 0x70))) {
- pr_debug("%s(), invalid tsap!\n", __func__);
- return NULL;
- }
-
- self = kzalloc(sizeof(struct tsap_cb), GFP_ATOMIC);
- if (self == NULL)
- return NULL;
-
- /* Initialize internal objects */
- irttp_init_tsap(self);
-
- /* Initialise todo timer */
- self->todo_timer.data = (unsigned long) self;
- self->todo_timer.function = &irttp_todo_expired;
-
- /* Initialize callbacks for IrLMP to use */
- irda_notify_init(&ttp_notify);
- ttp_notify.connect_confirm = irttp_connect_confirm;
- ttp_notify.connect_indication = irttp_connect_indication;
- ttp_notify.disconnect_indication = irttp_disconnect_indication;
- ttp_notify.data_indication = irttp_data_indication;
- ttp_notify.udata_indication = irttp_udata_indication;
- ttp_notify.flow_indication = irttp_flow_indication;
- if (notify->status_indication != NULL)
- ttp_notify.status_indication = irttp_status_indication;
- ttp_notify.instance = self;
- strncpy(ttp_notify.name, notify->name, NOTIFY_MAX_NAME);
-
- self->magic = TTP_TSAP_MAGIC;
- self->connected = FALSE;
-
- /*
- * Create LSAP at IrLMP layer
- */
- lsap = irlmp_open_lsap(stsap_sel, &ttp_notify, 0);
- if (lsap == NULL) {
- pr_debug("%s: unable to allocate LSAP!!\n", __func__);
- __irttp_close_tsap(self);
- return NULL;
- }
-
- /*
- * If user specified LSAP_ANY as source TSAP selector, then IrLMP
- * will replace it with whatever source selector which is free, so
- * the stsap_sel we have might not be valid anymore
- */
- self->stsap_sel = lsap->slsap_sel;
- pr_debug("%s(), stsap_sel=%02x\n", __func__, self->stsap_sel);
-
- self->notify = *notify;
- self->lsap = lsap;
-
- hashbin_insert(irttp->tsaps, (irda_queue_t *) self, (long) self, NULL);
-
- if (credit > TTP_RX_MAX_CREDIT)
- self->initial_credit = TTP_RX_MAX_CREDIT;
- else
- self->initial_credit = credit;
-
- return self;
-}
-EXPORT_SYMBOL(irttp_open_tsap);
-
-/*
- * Function irttp_close (handle)
- *
- * Remove an instance of a TSAP. This function should only deal with the
- * deallocation of the TSAP, and resetting of the TSAPs values;
- *
- */
-static void __irttp_close_tsap(struct tsap_cb *self)
-{
- /* First make sure we're connected. */
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-
- irttp_flush_queues(self);
-
- del_timer(&self->todo_timer);
-
- /* This one won't be cleaned up if we are disconnect_pend + close_pend
- * and we receive a disconnect_indication */
- if (self->disconnect_skb)
- dev_kfree_skb(self->disconnect_skb);
-
- self->connected = FALSE;
- self->magic = ~TTP_TSAP_MAGIC;
-
- kfree(self);
-}
-
-/*
- * Function irttp_close (self)
- *
- * Remove TSAP from list of all TSAPs and then deallocate all resources
- * associated with this TSAP
- *
- * Note : because we *free* the tsap structure, it is the responsibility
- * of the caller to make sure we are called only once and to deal with
- * possible race conditions. - Jean II
- */
-int irttp_close_tsap(struct tsap_cb *self)
-{
- struct tsap_cb *tsap;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
-
- /* Make sure tsap has been disconnected */
- if (self->connected) {
- /* Check if disconnect is not pending */
- if (!test_bit(0, &self->disconnect_pend)) {
- net_warn_ratelimited("%s: TSAP still connected!\n",
- __func__);
- irttp_disconnect_request(self, NULL, P_NORMAL);
- }
- self->close_pend = TRUE;
- irttp_start_todo_timer(self, HZ/10);
-
- return 0; /* Will be back! */
- }
-
- tsap = hashbin_remove(irttp->tsaps, (long) self, NULL);
-
- IRDA_ASSERT(tsap == self, return -1;);
-
- /* Close corresponding LSAP */
- if (self->lsap) {
- irlmp_close_lsap(self->lsap);
- self->lsap = NULL;
- }
-
- __irttp_close_tsap(self);
-
- return 0;
-}
-EXPORT_SYMBOL(irttp_close_tsap);
-
-/*
- * Function irttp_udata_request (self, skb)
- *
- * Send unreliable data on this TSAP
- *
- */
-int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
-{
- int ret;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
- IRDA_ASSERT(skb != NULL, return -1;);
-
- /* Take shortcut on zero byte packets */
- if (skb->len == 0) {
- ret = 0;
- goto err;
- }
-
- /* Check that nothing bad happens */
- if (!self->connected) {
- net_warn_ratelimited("%s(), Not connected\n", __func__);
- ret = -ENOTCONN;
- goto err;
- }
-
- if (skb->len > self->max_seg_size) {
- net_err_ratelimited("%s(), UData is too large for IrLAP!\n",
- __func__);
- ret = -EMSGSIZE;
- goto err;
- }
-
- irlmp_udata_request(self->lsap, skb);
- self->stats.tx_packets++;
-
- return 0;
-
-err:
- dev_kfree_skb(skb);
- return ret;
-}
-EXPORT_SYMBOL(irttp_udata_request);
-
-
-/*
- * Function irttp_data_request (handle, skb)
- *
- * Queue frame for transmission. If SAR is enabled, fragement the frame
- * and queue the fragments for transmission
- */
-int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
-{
- __u8 *frame;
- int ret;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
- IRDA_ASSERT(skb != NULL, return -1;);
-
- pr_debug("%s() : queue len = %d\n", __func__,
- skb_queue_len(&self->tx_queue));
-
- /* Take shortcut on zero byte packets */
- if (skb->len == 0) {
- ret = 0;
- goto err;
- }
-
- /* Check that nothing bad happens */
- if (!self->connected) {
- net_warn_ratelimited("%s: Not connected\n", __func__);
- ret = -ENOTCONN;
- goto err;
- }
-
- /*
- * Check if SAR is disabled, and the frame is larger than what fits
- * inside an IrLAP frame
- */
- if ((self->tx_max_sdu_size == 0) && (skb->len > self->max_seg_size)) {
- net_err_ratelimited("%s: SAR disabled, and data is too large for IrLAP!\n",
- __func__);
- ret = -EMSGSIZE;
- goto err;
- }
-
- /*
- * Check if SAR is enabled, and the frame is larger than the
- * TxMaxSduSize
- */
- if ((self->tx_max_sdu_size != 0) &&
- (self->tx_max_sdu_size != TTP_SAR_UNBOUND) &&
- (skb->len > self->tx_max_sdu_size)) {
- net_err_ratelimited("%s: SAR enabled, but data is larger than TxMaxSduSize!\n",
- __func__);
- ret = -EMSGSIZE;
- goto err;
- }
- /*
- * Check if transmit queue is full
- */
- if (skb_queue_len(&self->tx_queue) >= TTP_TX_MAX_QUEUE) {
- /*
- * Give it a chance to empty itself
- */
- irttp_run_tx_queue(self);
-
- /* Drop packet. This error code should trigger the caller
- * to resend the data in the client code - Jean II */
- ret = -ENOBUFS;
- goto err;
- }
-
- /* Queue frame, or queue frame segments */
- if ((self->tx_max_sdu_size == 0) || (skb->len < self->max_seg_size)) {
- /* Queue frame */
- IRDA_ASSERT(skb_headroom(skb) >= TTP_HEADER, return -1;);
- frame = skb_push(skb, TTP_HEADER);
- frame[0] = 0x00; /* Clear more bit */
-
- skb_queue_tail(&self->tx_queue, skb);
- } else {
- /*
- * Fragment the frame, this function will also queue the
- * fragments, we don't care about the fact the transmit
- * queue may be overfilled by all the segments for a little
- * while
- */
- irttp_fragment_skb(self, skb);
- }
-
- /* Check if we can accept more data from client */
- if ((!self->tx_sdu_busy) &&
- (skb_queue_len(&self->tx_queue) > TTP_TX_HIGH_THRESHOLD)) {
- /* Tx queue filling up, so stop client. */
- if (self->notify.flow_indication) {
- self->notify.flow_indication(self->notify.instance,
- self, FLOW_STOP);
- }
- /* self->tx_sdu_busy is the state of the client.
- * Update state after notifying client to avoid
- * race condition with irttp_flow_indication().
- * If the queue empty itself after our test but before
- * we set the flag, we will fix ourselves below in
- * irttp_run_tx_queue().
- * Jean II */
- self->tx_sdu_busy = TRUE;
- }
-
- /* Try to make some progress */
- irttp_run_tx_queue(self);
-
- return 0;
-
-err:
- dev_kfree_skb(skb);
- return ret;
-}
-EXPORT_SYMBOL(irttp_data_request);
-
-/*
- * Function irttp_run_tx_queue (self)
- *
- * Transmit packets queued for transmission (if possible)
- *
- */
-static void irttp_run_tx_queue(struct tsap_cb *self)
-{
- struct sk_buff *skb;
- unsigned long flags;
- int n;
-
- pr_debug("%s() : send_credit = %d, queue_len = %d\n",
- __func__,
- self->send_credit, skb_queue_len(&self->tx_queue));
-
- /* Get exclusive access to the tx queue, otherwise don't touch it */
- if (irda_lock(&self->tx_queue_lock) == FALSE)
- return;
-
- /* Try to send out frames as long as we have credits
- * and as long as LAP is not full. If LAP is full, it will
- * poll us through irttp_flow_indication() - Jean II */
- while ((self->send_credit > 0) &&
- (!irlmp_lap_tx_queue_full(self->lsap)) &&
- (skb = skb_dequeue(&self->tx_queue))) {
- /*
- * Since we can transmit and receive frames concurrently,
- * the code below is a critical region and we must assure that
- * nobody messes with the credits while we update them.
- */
- spin_lock_irqsave(&self->lock, flags);
-
- n = self->avail_credit;
- self->avail_credit = 0;
-
- /* Only room for 127 credits in frame */
- if (n > 127) {
- self->avail_credit = n-127;
- n = 127;
- }
- self->remote_credit += n;
- self->send_credit--;
-
- spin_unlock_irqrestore(&self->lock, flags);
-
- /*
- * More bit must be set by the data_request() or fragment()
- * functions
- */
- skb->data[0] |= (n & 0x7f);
-
- /* Detach from socket.
- * The current skb has a reference to the socket that sent
- * it (skb->sk). When we pass it to IrLMP, the skb will be
- * stored in in IrLAP (self->wx_list). When we are within
- * IrLAP, we lose the notion of socket, so we should not
- * have a reference to a socket. So, we drop it here.
- *
- * Why does it matter ?
- * When the skb is freed (kfree_skb), if it is associated
- * with a socket, it release buffer space on the socket
- * (through sock_wfree() and sock_def_write_space()).
- * If the socket no longer exist, we may crash. Hard.
- * When we close a socket, we make sure that associated packets
- * in IrTTP are freed. However, we have no way to cancel
- * the packet that we have passed to IrLAP. So, if a packet
- * remains in IrLAP (retry on the link or else) after we
- * close the socket, we are dead !
- * Jean II */
- if (skb->sk != NULL) {
- /* IrSOCK application, IrOBEX, ... */
- skb_orphan(skb);
- }
- /* IrCOMM over IrTTP, IrLAN, ... */
-
- /* Pass the skb to IrLMP - done */
- irlmp_data_request(self->lsap, skb);
- self->stats.tx_packets++;
- }
-
- /* Check if we can accept more frames from client.
- * We don't want to wait until the todo timer to do that, and we
- * can't use tasklets (grr...), so we are obliged to give control
- * to client. That's ok, this test will be true not too often
- * (max once per LAP window) and we are called from places
- * where we can spend a bit of time doing stuff. - Jean II */
- if ((self->tx_sdu_busy) &&
- (skb_queue_len(&self->tx_queue) < TTP_TX_LOW_THRESHOLD) &&
- (!self->close_pend)) {
- if (self->notify.flow_indication)
- self->notify.flow_indication(self->notify.instance,
- self, FLOW_START);
-
- /* self->tx_sdu_busy is the state of the client.
- * We don't really have a race here, but it's always safer
- * to update our state after the client - Jean II */
- self->tx_sdu_busy = FALSE;
- }
-
- /* Reset lock */
- self->tx_queue_lock = 0;
-}
-
-/*
- * Function irttp_give_credit (self)
- *
- * Send a dataless flowdata TTP-PDU and give available credit to peer
- * TSAP
- */
-static inline void irttp_give_credit(struct tsap_cb *self)
-{
- struct sk_buff *tx_skb = NULL;
- unsigned long flags;
- int n;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-
- pr_debug("%s() send=%d,avail=%d,remote=%d\n",
- __func__,
- self->send_credit, self->avail_credit, self->remote_credit);
-
- /* Give credit to peer */
- tx_skb = alloc_skb(TTP_MAX_HEADER, GFP_ATOMIC);
- if (!tx_skb)
- return;
-
- /* Reserve space for LMP, and LAP header */
- skb_reserve(tx_skb, LMP_MAX_HEADER);
-
- /*
- * Since we can transmit and receive frames concurrently,
- * the code below is a critical region and we must assure that
- * nobody messes with the credits while we update them.
- */
- spin_lock_irqsave(&self->lock, flags);
-
- n = self->avail_credit;
- self->avail_credit = 0;
-
- /* Only space for 127 credits in frame */
- if (n > 127) {
- self->avail_credit = n - 127;
- n = 127;
- }
- self->remote_credit += n;
-
- spin_unlock_irqrestore(&self->lock, flags);
-
- skb_put(tx_skb, 1);
- tx_skb->data[0] = (__u8) (n & 0x7f);
-
- irlmp_data_request(self->lsap, tx_skb);
- self->stats.tx_packets++;
-}
-
-/*
- * Function irttp_udata_indication (instance, sap, skb)
- *
- * Received some unit-data (unreliable)
- *
- */
-static int irttp_udata_indication(void *instance, void *sap,
- struct sk_buff *skb)
-{
- struct tsap_cb *self;
- int err;
-
- self = instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
- IRDA_ASSERT(skb != NULL, return -1;);
-
- self->stats.rx_packets++;
-
- /* Just pass data to layer above */
- if (self->notify.udata_indication) {
- err = self->notify.udata_indication(self->notify.instance,
- self, skb);
- /* Same comment as in irttp_do_data_indication() */
- if (!err)
- return 0;
- }
- /* Either no handler, or handler returns an error */
- dev_kfree_skb(skb);
-
- return 0;
-}
-
-/*
- * Function irttp_data_indication (instance, sap, skb)
- *
- * Receive segment from IrLMP.
- *
- */
-static int irttp_data_indication(void *instance, void *sap,
- struct sk_buff *skb)
-{
- struct tsap_cb *self;
- unsigned long flags;
- int n;
-
- self = instance;
-
- n = skb->data[0] & 0x7f; /* Extract the credits */
-
- self->stats.rx_packets++;
-
- /* Deal with inbound credit
- * Since we can transmit and receive frames concurrently,
- * the code below is a critical region and we must assure that
- * nobody messes with the credits while we update them.
- */
- spin_lock_irqsave(&self->lock, flags);
- self->send_credit += n;
- if (skb->len > 1)
- self->remote_credit--;
- spin_unlock_irqrestore(&self->lock, flags);
-
- /*
- * Data or dataless packet? Dataless frames contains only the
- * TTP_HEADER.
- */
- if (skb->len > 1) {
- /*
- * We don't remove the TTP header, since we must preserve the
- * more bit, so the defragment routing knows what to do
- */
- skb_queue_tail(&self->rx_queue, skb);
- } else {
- /* Dataless flowdata TTP-PDU */
- dev_kfree_skb(skb);
- }
-
-
- /* Push data to the higher layer.
- * We do it synchronously because running the todo timer for each
- * receive packet would be too much overhead and latency.
- * By passing control to the higher layer, we run the risk that
- * it may take time or grab a lock. Most often, the higher layer
- * will only put packet in a queue.
- * Anyway, packets are only dripping through the IrDA, so we can
- * have time before the next packet.
- * Further, we are run from NET_BH, so the worse that can happen is
- * us missing the optimal time to send back the PF bit in LAP.
- * Jean II */
- irttp_run_rx_queue(self);
-
- /* We now give credits to peer in irttp_run_rx_queue().
- * We need to send credit *NOW*, otherwise we are going
- * to miss the next Tx window. The todo timer may take
- * a while before it's run... - Jean II */
-
- /*
- * If the peer device has given us some credits and we didn't have
- * anyone from before, then we need to shedule the tx queue.
- * We need to do that because our Tx have stopped (so we may not
- * get any LAP flow indication) and the user may be stopped as
- * well. - Jean II
- */
- if (self->send_credit == n) {
- /* Restart pushing stuff to LAP */
- irttp_run_tx_queue(self);
- /* Note : we don't want to schedule the todo timer
- * because it has horrible latency. No tasklets
- * because the tasklet API is broken. - Jean II */
- }
-
- return 0;
-}
-
-/*
- * Function irttp_status_indication (self, reason)
- *
- * Status_indication, just pass to the higher layer...
- *
- */
-static void irttp_status_indication(void *instance,
- LINK_STATUS link, LOCK_STATUS lock)
-{
- struct tsap_cb *self;
-
- self = instance;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-
- /* Check if client has already closed the TSAP and gone away */
- if (self->close_pend)
- return;
-
- /*
- * Inform service user if he has requested it
- */
- if (self->notify.status_indication != NULL)
- self->notify.status_indication(self->notify.instance,
- link, lock);
- else
- pr_debug("%s(), no handler\n", __func__);
-}
-
-/*
- * Function irttp_flow_indication (self, reason)
- *
- * Flow_indication : IrLAP tells us to send more data.
- *
- */
-static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
-{
- struct tsap_cb *self;
-
- self = instance;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-
- pr_debug("%s(instance=%p)\n", __func__, self);
-
- /* We are "polled" directly from LAP, and the LAP want to fill
- * its Tx window. We want to do our best to send it data, so that
- * we maximise the window. On the other hand, we want to limit the
- * amount of work here so that LAP doesn't hang forever waiting
- * for packets. - Jean II */
-
- /* Try to send some packets. Currently, LAP calls us every time
- * there is one free slot, so we will send only one packet.
- * This allow the scheduler to do its round robin - Jean II */
- irttp_run_tx_queue(self);
-
- /* Note regarding the interraction with higher layer.
- * irttp_run_tx_queue() may call the client when its queue
- * start to empty, via notify.flow_indication(). Initially.
- * I wanted this to happen in a tasklet, to avoid client
- * grabbing the CPU, but we can't use tasklets safely. And timer
- * is definitely too slow.
- * This will happen only once per LAP window, and usually at
- * the third packet (unless window is smaller). LAP is still
- * doing mtt and sending first packet so it's sort of OK
- * to do that. Jean II */
-
- /* If we need to send disconnect. try to do it now */
- if (self->disconnect_pend)
- irttp_start_todo_timer(self, 0);
-}
-
-/*
- * Function irttp_flow_request (self, command)
- *
- * This function could be used by the upper layers to tell IrTTP to stop
- * delivering frames if the receive queues are starting to get full, or
- * to tell IrTTP to start delivering frames again.
- */
-void irttp_flow_request(struct tsap_cb *self, LOCAL_FLOW flow)
-{
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-
- switch (flow) {
- case FLOW_STOP:
- pr_debug("%s(), flow stop\n", __func__);
- self->rx_sdu_busy = TRUE;
- break;
- case FLOW_START:
- pr_debug("%s(), flow start\n", __func__);
- self->rx_sdu_busy = FALSE;
-
- /* Client say he can accept more data, try to free our
- * queues ASAP - Jean II */
- irttp_run_rx_queue(self);
-
- break;
- default:
- pr_debug("%s(), Unknown flow command!\n", __func__);
- }
-}
-EXPORT_SYMBOL(irttp_flow_request);
-
-/*
- * Function irttp_connect_request (self, dtsap_sel, daddr, qos)
- *
- * Try to connect to remote destination TSAP selector
- *
- */
-int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
- __u32 saddr, __u32 daddr,
- struct qos_info *qos, __u32 max_sdu_size,
- struct sk_buff *userdata)
-{
- struct sk_buff *tx_skb;
- __u8 *frame;
- __u8 n;
-
- pr_debug("%s(), max_sdu_size=%d\n", __func__, max_sdu_size);
-
- IRDA_ASSERT(self != NULL, return -EBADR;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -EBADR;);
-
- if (self->connected) {
- if (userdata)
- dev_kfree_skb(userdata);
- return -EISCONN;
- }
-
- /* Any userdata supplied? */
- if (userdata == NULL) {
- tx_skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
- GFP_ATOMIC);
- if (!tx_skb)
- return -ENOMEM;
-
- /* Reserve space for MUX_CONTROL and LAP header */
- skb_reserve(tx_skb, TTP_MAX_HEADER + TTP_SAR_HEADER);
- } else {
- tx_skb = userdata;
- /*
- * Check that the client has reserved enough space for
- * headers
- */
- IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER,
- { dev_kfree_skb(userdata); return -1; });
- }
-
- /* Initialize connection parameters */
- self->connected = FALSE;
- self->avail_credit = 0;
- self->rx_max_sdu_size = max_sdu_size;
- self->rx_sdu_size = 0;
- self->rx_sdu_busy = FALSE;
- self->dtsap_sel = dtsap_sel;
-
- n = self->initial_credit;
-
- self->remote_credit = 0;
- self->send_credit = 0;
-
- /*
- * Give away max 127 credits for now
- */
- if (n > 127) {
- self->avail_credit = n - 127;
- n = 127;
- }
-
- self->remote_credit = n;
-
- /* SAR enabled? */
- if (max_sdu_size > 0) {
- IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER),
- { dev_kfree_skb(tx_skb); return -1; });
-
- /* Insert SAR parameters */
- frame = skb_push(tx_skb, TTP_HEADER + TTP_SAR_HEADER);
-
- frame[0] = TTP_PARAMETERS | n;
- frame[1] = 0x04; /* Length */
- frame[2] = 0x01; /* MaxSduSize */
- frame[3] = 0x02; /* Value length */
-
- put_unaligned(cpu_to_be16((__u16) max_sdu_size),
- (__be16 *)(frame+4));
- } else {
- /* Insert plain TTP header */
- frame = skb_push(tx_skb, TTP_HEADER);
-
- /* Insert initial credit in frame */
- frame[0] = n & 0x7f;
- }
-
- /* Connect with IrLMP. No QoS parameters for now */
- return irlmp_connect_request(self->lsap, dtsap_sel, saddr, daddr, qos,
- tx_skb);
-}
-EXPORT_SYMBOL(irttp_connect_request);
-
-/*
- * Function irttp_connect_confirm (handle, qos, skb)
- *
- * Service user confirms TSAP connection with peer.
- *
- */
-static void irttp_connect_confirm(void *instance, void *sap,
- struct qos_info *qos, __u32 max_seg_size,
- __u8 max_header_size, struct sk_buff *skb)
-{
- struct tsap_cb *self;
- int parameters;
- int ret;
- __u8 plen;
- __u8 n;
-
- self = instance;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
-
- self->max_seg_size = max_seg_size - TTP_HEADER;
- self->max_header_size = max_header_size + TTP_HEADER;
-
- /*
- * Check if we have got some QoS parameters back! This should be the
- * negotiated QoS for the link.
- */
- if (qos) {
- pr_debug("IrTTP, Negotiated BAUD_RATE: %02x\n",
- qos->baud_rate.bits);
- pr_debug("IrTTP, Negotiated BAUD_RATE: %d bps.\n",
- qos->baud_rate.value);
- }
-
- n = skb->data[0] & 0x7f;
-
- pr_debug("%s(), Initial send_credit=%d\n", __func__, n);
-
- self->send_credit = n;
- self->tx_max_sdu_size = 0;
- self->connected = TRUE;
-
- parameters = skb->data[0] & 0x80;
-
- IRDA_ASSERT(skb->len >= TTP_HEADER, return;);
- skb_pull(skb, TTP_HEADER);
-
- if (parameters) {
- plen = skb->data[0];
-
- ret = irda_param_extract_all(self, skb->data+1,
- IRDA_MIN(skb->len-1, plen),
- &param_info);
-
- /* Any errors in the parameter list? */
- if (ret < 0) {
- net_warn_ratelimited("%s: error extracting parameters\n",
- __func__);
- dev_kfree_skb(skb);
-
- /* Do not accept this connection attempt */
- return;
- }
- /* Remove parameters */
- skb_pull(skb, IRDA_MIN(skb->len, plen+1));
- }
-
- pr_debug("%s() send=%d,avail=%d,remote=%d\n", __func__,
- self->send_credit, self->avail_credit, self->remote_credit);
-
- pr_debug("%s(), MaxSduSize=%d\n", __func__,
- self->tx_max_sdu_size);
-
- if (self->notify.connect_confirm) {
- self->notify.connect_confirm(self->notify.instance, self, qos,
- self->tx_max_sdu_size,
- self->max_header_size, skb);
- } else
- dev_kfree_skb(skb);
-}
-
-/*
- * Function irttp_connect_indication (handle, skb)
- *
- * Some other device is connecting to this TSAP
- *
- */
-static void irttp_connect_indication(void *instance, void *sap,
- struct qos_info *qos, __u32 max_seg_size, __u8 max_header_size,
- struct sk_buff *skb)
-{
- struct tsap_cb *self;
- struct lsap_cb *lsap;
- int parameters;
- int ret;
- __u8 plen;
- __u8 n;
-
- self = instance;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
- IRDA_ASSERT(skb != NULL, return;);
-
- lsap = sap;
-
- self->max_seg_size = max_seg_size - TTP_HEADER;
- self->max_header_size = max_header_size+TTP_HEADER;
-
- pr_debug("%s(), TSAP sel=%02x\n", __func__, self->stsap_sel);
-
- /* Need to update dtsap_sel if its equal to LSAP_ANY */
- self->dtsap_sel = lsap->dlsap_sel;
-
- n = skb->data[0] & 0x7f;
-
- self->send_credit = n;
- self->tx_max_sdu_size = 0;
-
- parameters = skb->data[0] & 0x80;
-
- IRDA_ASSERT(skb->len >= TTP_HEADER, return;);
- skb_pull(skb, TTP_HEADER);
-
- if (parameters) {
- plen = skb->data[0];
-
- ret = irda_param_extract_all(self, skb->data+1,
- IRDA_MIN(skb->len-1, plen),
- &param_info);
-
- /* Any errors in the parameter list? */
- if (ret < 0) {
- net_warn_ratelimited("%s: error extracting parameters\n",
- __func__);
- dev_kfree_skb(skb);
-
- /* Do not accept this connection attempt */
- return;
- }
-
- /* Remove parameters */
- skb_pull(skb, IRDA_MIN(skb->len, plen+1));
- }
-
- if (self->notify.connect_indication) {
- self->notify.connect_indication(self->notify.instance, self,
- qos, self->tx_max_sdu_size,
- self->max_header_size, skb);
- } else
- dev_kfree_skb(skb);
-}
-
-/*
- * Function irttp_connect_response (handle, userdata)
- *
- * Service user is accepting the connection, just pass it down to
- * IrLMP!
- *
- */
-int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size,
- struct sk_buff *userdata)
-{
- struct sk_buff *tx_skb;
- __u8 *frame;
- int ret;
- __u8 n;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
-
- pr_debug("%s(), Source TSAP selector=%02x\n", __func__,
- self->stsap_sel);
-
- /* Any userdata supplied? */
- if (userdata == NULL) {
- tx_skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
- GFP_ATOMIC);
- if (!tx_skb)
- return -ENOMEM;
-
- /* Reserve space for MUX_CONTROL and LAP header */
- skb_reserve(tx_skb, TTP_MAX_HEADER + TTP_SAR_HEADER);
- } else {
- tx_skb = userdata;
- /*
- * Check that the client has reserved enough space for
- * headers
- */
- IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER,
- { dev_kfree_skb(userdata); return -1; });
- }
-
- self->avail_credit = 0;
- self->remote_credit = 0;
- self->rx_max_sdu_size = max_sdu_size;
- self->rx_sdu_size = 0;
- self->rx_sdu_busy = FALSE;
-
- n = self->initial_credit;
-
- /* Frame has only space for max 127 credits (7 bits) */
- if (n > 127) {
- self->avail_credit = n - 127;
- n = 127;
- }
-
- self->remote_credit = n;
- self->connected = TRUE;
-
- /* SAR enabled? */
- if (max_sdu_size > 0) {
- IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER),
- { dev_kfree_skb(tx_skb); return -1; });
-
- /* Insert TTP header with SAR parameters */
- frame = skb_push(tx_skb, TTP_HEADER + TTP_SAR_HEADER);
-
- frame[0] = TTP_PARAMETERS | n;
- frame[1] = 0x04; /* Length */
-
- /* irda_param_insert(self, IRTTP_MAX_SDU_SIZE, frame+1, */
-/* TTP_SAR_HEADER, &param_info) */
-
- frame[2] = 0x01; /* MaxSduSize */
- frame[3] = 0x02; /* Value length */
-
- put_unaligned(cpu_to_be16((__u16) max_sdu_size),
- (__be16 *)(frame+4));
- } else {
- /* Insert TTP header */
- frame = skb_push(tx_skb, TTP_HEADER);
-
- frame[0] = n & 0x7f;
- }
-
- ret = irlmp_connect_response(self->lsap, tx_skb);
-
- return ret;
-}
-EXPORT_SYMBOL(irttp_connect_response);
-
-/*
- * Function irttp_dup (self, instance)
- *
- * Duplicate TSAP, can be used by servers to confirm a connection on a
- * new TSAP so it can keep listening on the old one.
- */
-struct tsap_cb *irttp_dup(struct tsap_cb *orig, void *instance)
-{
- struct tsap_cb *new;
- unsigned long flags;
-
- /* Protect our access to the old tsap instance */
- spin_lock_irqsave(&irttp->tsaps->hb_spinlock, flags);
-
- /* Find the old instance */
- if (!hashbin_find(irttp->tsaps, (long) orig, NULL)) {
- pr_debug("%s(), unable to find TSAP\n", __func__);
- spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags);
- return NULL;
- }
-
- /* Allocate a new instance */
- new = kmemdup(orig, sizeof(struct tsap_cb), GFP_ATOMIC);
- if (!new) {
- pr_debug("%s(), unable to kmalloc\n", __func__);
- spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags);
- return NULL;
- }
- spin_lock_init(&new->lock);
-
- /* We don't need the old instance any more */
- spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags);
-
- /* Try to dup the LSAP (may fail if we were too slow) */
- new->lsap = irlmp_dup(orig->lsap, new);
- if (!new->lsap) {
- pr_debug("%s(), dup failed!\n", __func__);
- kfree(new);
- return NULL;
- }
-
- /* Not everything should be copied */
- new->notify.instance = instance;
-
- /* Initialize internal objects */
- irttp_init_tsap(new);
-
- /* This is locked */
- hashbin_insert(irttp->tsaps, (irda_queue_t *) new, (long) new, NULL);
-
- return new;
-}
-EXPORT_SYMBOL(irttp_dup);
-
-/*
- * Function irttp_disconnect_request (self)
- *
- * Close this connection please! If priority is high, the queued data
- * segments, if any, will be deallocated first
- *
- */
-int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *userdata,
- int priority)
-{
- int ret;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
-
- /* Already disconnected? */
- if (!self->connected) {
- pr_debug("%s(), already disconnected!\n", __func__);
- if (userdata)
- dev_kfree_skb(userdata);
- return -1;
- }
-
- /* Disconnect already pending ?
- * We need to use an atomic operation to prevent reentry. This
- * function may be called from various context, like user, timer
- * for following a disconnect_indication() (i.e. net_bh).
- * Jean II */
- if (test_and_set_bit(0, &self->disconnect_pend)) {
- pr_debug("%s(), disconnect already pending\n",
- __func__);
- if (userdata)
- dev_kfree_skb(userdata);
-
- /* Try to make some progress */
- irttp_run_tx_queue(self);
- return -1;
- }
-
- /*
- * Check if there is still data segments in the transmit queue
- */
- if (!skb_queue_empty(&self->tx_queue)) {
- if (priority == P_HIGH) {
- /*
- * No need to send the queued data, if we are
- * disconnecting right now since the data will
- * not have any usable connection to be sent on
- */
- pr_debug("%s(): High priority!!()\n", __func__);
- irttp_flush_queues(self);
- } else if (priority == P_NORMAL) {
- /*
- * Must delay disconnect until after all data segments
- * have been sent and the tx_queue is empty
- */
- /* We'll reuse this one later for the disconnect */
- self->disconnect_skb = userdata; /* May be NULL */
-
- irttp_run_tx_queue(self);
-
- irttp_start_todo_timer(self, HZ/10);
- return -1;
- }
- }
- /* Note : we don't need to check if self->rx_queue is full and the
- * state of self->rx_sdu_busy because the disconnect response will
- * be sent at the LMP level (so even if the peer has its Tx queue
- * full of data). - Jean II */
-
- pr_debug("%s(), Disconnecting ...\n", __func__);
- self->connected = FALSE;
-
- if (!userdata) {
- struct sk_buff *tx_skb;
- tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
- if (!tx_skb)
- return -ENOMEM;
-
- /*
- * Reserve space for MUX and LAP header
- */
- skb_reserve(tx_skb, LMP_MAX_HEADER);
-
- userdata = tx_skb;
- }
- ret = irlmp_disconnect_request(self->lsap, userdata);
-
- /* The disconnect is no longer pending */
- clear_bit(0, &self->disconnect_pend); /* FALSE */
-
- return ret;
-}
-EXPORT_SYMBOL(irttp_disconnect_request);
-
-/*
- * Function irttp_disconnect_indication (self, reason)
- *
- * Disconnect indication, TSAP disconnected by peer?
- *
- */
-static void irttp_disconnect_indication(void *instance, void *sap,
- LM_REASON reason, struct sk_buff *skb)
-{
- struct tsap_cb *self;
-
- self = instance;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-
- /* Prevent higher layer to send more data */
- self->connected = FALSE;
-
- /* Check if client has already tried to close the TSAP */
- if (self->close_pend) {
- /* In this case, the higher layer is probably gone. Don't
- * bother it and clean up the remains - Jean II */
- if (skb)
- dev_kfree_skb(skb);
- irttp_close_tsap(self);
- return;
- }
-
- /* If we are here, we assume that is the higher layer is still
- * waiting for the disconnect notification and able to process it,
- * even if he tried to disconnect. Otherwise, it would have already
- * attempted to close the tsap and self->close_pend would be TRUE.
- * Jean II */
-
- /* No need to notify the client if has already tried to disconnect */
- if (self->notify.disconnect_indication)
- self->notify.disconnect_indication(self->notify.instance, self,
- reason, skb);
- else
- if (skb)
- dev_kfree_skb(skb);
-}
-
-/*
- * Function irttp_do_data_indication (self, skb)
- *
- * Try to deliver reassembled skb to layer above, and requeue it if that
- * for some reason should fail. We mark rx sdu as busy to apply back
- * pressure is necessary.
- */
-static void irttp_do_data_indication(struct tsap_cb *self, struct sk_buff *skb)
-{
- int err;
-
- /* Check if client has already closed the TSAP and gone away */
- if (self->close_pend) {
- dev_kfree_skb(skb);
- return;
- }
-
- err = self->notify.data_indication(self->notify.instance, self, skb);
-
- /* Usually the layer above will notify that it's input queue is
- * starting to get filled by using the flow request, but this may
- * be difficult, so it can instead just refuse to eat it and just
- * give an error back
- */
- if (err) {
- pr_debug("%s() requeueing skb!\n", __func__);
-
- /* Make sure we take a break */
- self->rx_sdu_busy = TRUE;
-
- /* Need to push the header in again */
- skb_push(skb, TTP_HEADER);
- skb->data[0] = 0x00; /* Make sure MORE bit is cleared */
-
- /* Put skb back on queue */
- skb_queue_head(&self->rx_queue, skb);
- }
-}
-
-/*
- * Function irttp_run_rx_queue (self)
- *
- * Check if we have any frames to be transmitted, or if we have any
- * available credit to give away.
- */
-static void irttp_run_rx_queue(struct tsap_cb *self)
-{
- struct sk_buff *skb;
- int more = 0;
-
- pr_debug("%s() send=%d,avail=%d,remote=%d\n", __func__,
- self->send_credit, self->avail_credit, self->remote_credit);
-
- /* Get exclusive access to the rx queue, otherwise don't touch it */
- if (irda_lock(&self->rx_queue_lock) == FALSE)
- return;
-
- /*
- * Reassemble all frames in receive queue and deliver them
- */
- while (!self->rx_sdu_busy && (skb = skb_dequeue(&self->rx_queue))) {
- /* This bit will tell us if it's the last fragment or not */
- more = skb->data[0] & 0x80;
-
- /* Remove TTP header */
- skb_pull(skb, TTP_HEADER);
-
- /* Add the length of the remaining data */
- self->rx_sdu_size += skb->len;
-
- /*
- * If SAR is disabled, or user has requested no reassembly
- * of received fragments then we just deliver them
- * immediately. This can be requested by clients that
- * implements byte streams without any message boundaries
- */
- if (self->rx_max_sdu_size == TTP_SAR_DISABLE) {
- irttp_do_data_indication(self, skb);
- self->rx_sdu_size = 0;
-
- continue;
- }
-
- /* Check if this is a fragment, and not the last fragment */
- if (more) {
- /*
- * Queue the fragment if we still are within the
- * limits of the maximum size of the rx_sdu
- */
- if (self->rx_sdu_size <= self->rx_max_sdu_size) {
- pr_debug("%s(), queueing frag\n",
- __func__);
- skb_queue_tail(&self->rx_fragments, skb);
- } else {
- /* Free the part of the SDU that is too big */
- dev_kfree_skb(skb);
- }
- continue;
- }
- /*
- * This is the last fragment, so time to reassemble!
- */
- if ((self->rx_sdu_size <= self->rx_max_sdu_size) ||
- (self->rx_max_sdu_size == TTP_SAR_UNBOUND)) {
- /*
- * A little optimizing. Only queue the fragment if
- * there are other fragments. Since if this is the
- * last and only fragment, there is no need to
- * reassemble :-)
- */
- if (!skb_queue_empty(&self->rx_fragments)) {
- skb_queue_tail(&self->rx_fragments,
- skb);
-
- skb = irttp_reassemble_skb(self);
- }
-
- /* Now we can deliver the reassembled skb */
- irttp_do_data_indication(self, skb);
- } else {
- pr_debug("%s(), Truncated frame\n", __func__);
-
- /* Free the part of the SDU that is too big */
- dev_kfree_skb(skb);
-
- /* Deliver only the valid but truncated part of SDU */
- skb = irttp_reassemble_skb(self);
-
- irttp_do_data_indication(self, skb);
- }
- self->rx_sdu_size = 0;
- }
-
- /*
- * It's not trivial to keep track of how many credits are available
- * by incrementing at each packet, because delivery may fail
- * (irttp_do_data_indication() may requeue the frame) and because
- * we need to take care of fragmentation.
- * We want the other side to send up to initial_credit packets.
- * We have some frames in our queues, and we have already allowed it
- * to send remote_credit.
- * No need to spinlock, write is atomic and self correcting...
- * Jean II
- */
- self->avail_credit = (self->initial_credit -
- (self->remote_credit +
- skb_queue_len(&self->rx_queue) +
- skb_queue_len(&self->rx_fragments)));
-
- /* Do we have too much credits to send to peer ? */
- if ((self->remote_credit <= TTP_RX_MIN_CREDIT) &&
- (self->avail_credit > 0)) {
- /* Send explicit credit frame */
- irttp_give_credit(self);
- /* Note : do *NOT* check if tx_queue is non-empty, that
- * will produce deadlocks. I repeat : send a credit frame
- * even if we have something to send in our Tx queue.
- * If we have credits, it means that our Tx queue is blocked.
- *
- * Let's suppose the peer can't keep up with our Tx. He will
- * flow control us by not sending us any credits, and we
- * will stop Tx and start accumulating credits here.
- * Up to the point where the peer will stop its Tx queue,
- * for lack of credits.
- * Let's assume the peer application is single threaded.
- * It will block on Tx and never consume any Rx buffer.
- * Deadlock. Guaranteed. - Jean II
- */
- }
-
- /* Reset lock */
- self->rx_queue_lock = 0;
-}
-
-#ifdef CONFIG_PROC_FS
-struct irttp_iter_state {
- int id;
-};
-
-static void *irttp_seq_start(struct seq_file *seq, loff_t *pos)
-{
- struct irttp_iter_state *iter = seq->private;
- struct tsap_cb *self;
-
- /* Protect our access to the tsap list */
- spin_lock_irq(&irttp->tsaps->hb_spinlock);
- iter->id = 0;
-
- for (self = (struct tsap_cb *) hashbin_get_first(irttp->tsaps);
- self != NULL;
- self = (struct tsap_cb *) hashbin_get_next(irttp->tsaps)) {
- if (iter->id == *pos)
- break;
- ++iter->id;
- }
-
- return self;
-}
-
-static void *irttp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct irttp_iter_state *iter = seq->private;
-
- ++*pos;
- ++iter->id;
- return (void *) hashbin_get_next(irttp->tsaps);
-}
-
-static void irttp_seq_stop(struct seq_file *seq, void *v)
-{
- spin_unlock_irq(&irttp->tsaps->hb_spinlock);
-}
-
-static int irttp_seq_show(struct seq_file *seq, void *v)
-{
- const struct irttp_iter_state *iter = seq->private;
- const struct tsap_cb *self = v;
-
- seq_printf(seq, "TSAP %d, ", iter->id);
- seq_printf(seq, "stsap_sel: %02x, ",
- self->stsap_sel);
- seq_printf(seq, "dtsap_sel: %02x\n",
- self->dtsap_sel);
- seq_printf(seq, " connected: %s, ",
- self->connected ? "TRUE" : "FALSE");
- seq_printf(seq, "avail credit: %d, ",
- self->avail_credit);
- seq_printf(seq, "remote credit: %d, ",
- self->remote_credit);
- seq_printf(seq, "send credit: %d\n",
- self->send_credit);
- seq_printf(seq, " tx packets: %lu, ",
- self->stats.tx_packets);
- seq_printf(seq, "rx packets: %lu, ",
- self->stats.rx_packets);
- seq_printf(seq, "tx_queue len: %u ",
- skb_queue_len(&self->tx_queue));
- seq_printf(seq, "rx_queue len: %u\n",
- skb_queue_len(&self->rx_queue));
- seq_printf(seq, " tx_sdu_busy: %s, ",
- self->tx_sdu_busy ? "TRUE" : "FALSE");
- seq_printf(seq, "rx_sdu_busy: %s\n",
- self->rx_sdu_busy ? "TRUE" : "FALSE");
- seq_printf(seq, " max_seg_size: %u, ",
- self->max_seg_size);
- seq_printf(seq, "tx_max_sdu_size: %u, ",
- self->tx_max_sdu_size);
- seq_printf(seq, "rx_max_sdu_size: %u\n",
- self->rx_max_sdu_size);
-
- seq_printf(seq, " Used by (%s)\n\n",
- self->notify.name);
- return 0;
-}
-
-static const struct seq_operations irttp_seq_ops = {
- .start = irttp_seq_start,
- .next = irttp_seq_next,
- .stop = irttp_seq_stop,
- .show = irttp_seq_show,
-};
-
-static int irttp_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_private(file, &irttp_seq_ops,
- sizeof(struct irttp_iter_state));
-}
-
-const struct file_operations irttp_seq_fops = {
- .owner = THIS_MODULE,
- .open = irttp_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
-#endif /* PROC_FS */
diff --git a/net/irda/parameters.c b/net/irda/parameters.c
deleted file mode 100644
index 16ce32ffe004..000000000000
--- a/net/irda/parameters.c
+++ /dev/null
@@ -1,584 +0,0 @@
-/*********************************************************************
- *
- * Filename: parameters.c
- * Version: 1.0
- * Description: A more general way to handle (pi,pl,pv) parameters
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Mon Jun 7 10:25:11 1999
- * Modified at: Sun Jan 30 14:08:39 2000
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/types.h>
-#include <linux/module.h>
-
-#include <asm/unaligned.h>
-#include <asm/byteorder.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/parameters.h>
-
-static int irda_extract_integer(void *self, __u8 *buf, int len, __u8 pi,
- PV_TYPE type, PI_HANDLER func);
-static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
- PV_TYPE type, PI_HANDLER func);
-static int irda_extract_octseq(void *self, __u8 *buf, int len, __u8 pi,
- PV_TYPE type, PI_HANDLER func);
-static int irda_extract_no_value(void *self, __u8 *buf, int len, __u8 pi,
- PV_TYPE type, PI_HANDLER func);
-
-static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi,
- PV_TYPE type, PI_HANDLER func);
-static int irda_insert_no_value(void *self, __u8 *buf, int len, __u8 pi,
- PV_TYPE type, PI_HANDLER func);
-
-static int irda_param_unpack(__u8 *buf, char *fmt, ...);
-
-/* Parameter value call table. Must match PV_TYPE */
-static const PV_HANDLER pv_extract_table[] = {
- irda_extract_integer, /* Handler for any length integers */
- irda_extract_integer, /* Handler for 8 bits integers */
- irda_extract_integer, /* Handler for 16 bits integers */
- irda_extract_string, /* Handler for strings */
- irda_extract_integer, /* Handler for 32 bits integers */
- irda_extract_octseq, /* Handler for octet sequences */
- irda_extract_no_value /* Handler for no value parameters */
-};
-
-static const PV_HANDLER pv_insert_table[] = {
- irda_insert_integer, /* Handler for any length integers */
- irda_insert_integer, /* Handler for 8 bits integers */
- irda_insert_integer, /* Handler for 16 bits integers */
- NULL, /* Handler for strings */
- irda_insert_integer, /* Handler for 32 bits integers */
- NULL, /* Handler for octet sequences */
- irda_insert_no_value /* Handler for no value parameters */
-};
-
-/*
- * Function irda_insert_no_value (self, buf, len, pi, type, func)
- */
-static int irda_insert_no_value(void *self, __u8 *buf, int len, __u8 pi,
- PV_TYPE type, PI_HANDLER func)
-{
- irda_param_t p;
- int ret;
-
- p.pi = pi;
- p.pl = 0;
-
- /* Call handler for this parameter */
- ret = (*func)(self, &p, PV_GET);
-
- /* Extract values anyway, since handler may need them */
- irda_param_pack(buf, "bb", p.pi, p.pl);
-
- if (ret < 0)
- return ret;
-
- return 2; /* Inserted pl+2 bytes */
-}
-
-/*
- * Function irda_extract_no_value (self, buf, len, type, func)
- *
- * Extracts a parameter without a pv field (pl=0)
- *
- */
-static int irda_extract_no_value(void *self, __u8 *buf, int len, __u8 pi,
- PV_TYPE type, PI_HANDLER func)
-{
- irda_param_t p;
- int ret;
-
- /* Extract values anyway, since handler may need them */
- irda_param_unpack(buf, "bb", &p.pi, &p.pl);
-
- /* Call handler for this parameter */
- ret = (*func)(self, &p, PV_PUT);
-
- if (ret < 0)
- return ret;
-
- return 2; /* Extracted pl+2 bytes */
-}
-
-/*
- * Function irda_insert_integer (self, buf, len, pi, type, func)
- */
-static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi,
- PV_TYPE type, PI_HANDLER func)
-{
- irda_param_t p;
- int n = 0;
- int err;
-
- p.pi = pi; /* In case handler needs to know */
- p.pl = type & PV_MASK; /* The integer type codes the length as well */
- p.pv.i = 0; /* Clear value */
-
- /* Call handler for this parameter */
- err = (*func)(self, &p, PV_GET);
- if (err < 0)
- return err;
-
- /*
- * If parameter length is still 0, then (1) this is an any length
- * integer, and (2) the handler function does not care which length
- * we choose to use, so we pick the one the gives the fewest bytes.
- */
- if (p.pl == 0) {
- if (p.pv.i < 0xff) {
- pr_debug("%s(), using 1 byte\n", __func__);
- p.pl = 1;
- } else if (p.pv.i < 0xffff) {
- pr_debug("%s(), using 2 bytes\n", __func__);
- p.pl = 2;
- } else {
- pr_debug("%s(), using 4 bytes\n", __func__);
- p.pl = 4; /* Default length */
- }
- }
- /* Check if buffer is long enough for insertion */
- if (len < (2+p.pl)) {
- net_warn_ratelimited("%s: buffer too short for insertion!\n",
- __func__);
- return -1;
- }
- pr_debug("%s(), pi=%#x, pl=%d, pi=%d\n", __func__,
- p.pi, p.pl, p.pv.i);
- switch (p.pl) {
- case 1:
- n += irda_param_pack(buf, "bbb", p.pi, p.pl, (__u8) p.pv.i);
- break;
- case 2:
- if (type & PV_BIG_ENDIAN)
- p.pv.i = cpu_to_be16((__u16) p.pv.i);
- else
- p.pv.i = cpu_to_le16((__u16) p.pv.i);
- n += irda_param_pack(buf, "bbs", p.pi, p.pl, (__u16) p.pv.i);
- break;
- case 4:
- if (type & PV_BIG_ENDIAN)
- cpu_to_be32s(&p.pv.i);
- else
- cpu_to_le32s(&p.pv.i);
- n += irda_param_pack(buf, "bbi", p.pi, p.pl, p.pv.i);
-
- break;
- default:
- net_warn_ratelimited("%s: length %d not supported\n",
- __func__, p.pl);
- /* Skip parameter */
- return -1;
- }
-
- return p.pl+2; /* Inserted pl+2 bytes */
-}
-
-/*
- * Function irda_extract integer (self, buf, len, pi, type, func)
- *
- * Extract a possibly variable length integer from buffer, and call
- * handler for processing of the parameter
- */
-static int irda_extract_integer(void *self, __u8 *buf, int len, __u8 pi,
- PV_TYPE type, PI_HANDLER func)
-{
- irda_param_t p;
- int n = 0;
- int extract_len; /* Real length we extract */
- int err;
-
- p.pi = pi; /* In case handler needs to know */
- p.pl = buf[1]; /* Extract length of value */
- p.pv.i = 0; /* Clear value */
- extract_len = p.pl; /* Default : extract all */
-
- /* Check if buffer is long enough for parsing */
- if (len < (2+p.pl)) {
- net_warn_ratelimited("%s: buffer too short for parsing! Need %d bytes, but len is only %d\n",
- __func__, p.pl, len);
- return -1;
- }
-
- /*
- * Check that the integer length is what we expect it to be. If the
- * handler want a 16 bits integer then a 32 bits is not good enough
- * PV_INTEGER means that the handler is flexible.
- */
- if (((type & PV_MASK) != PV_INTEGER) && ((type & PV_MASK) != p.pl)) {
- net_err_ratelimited("%s: invalid parameter length! Expected %d bytes, but value had %d bytes!\n",
- __func__, type & PV_MASK, p.pl);
-
- /* Most parameters are bit/byte fields or little endian,
- * so it's ok to only extract a subset of it (the subset
- * that the handler expect). This is necessary, as some
- * broken implementations seems to add extra undefined bits.
- * If the parameter is shorter than we expect or is big
- * endian, we can't play those tricks. Jean II */
- if((p.pl < (type & PV_MASK)) || (type & PV_BIG_ENDIAN)) {
- /* Skip parameter */
- return p.pl+2;
- } else {
- /* Extract subset of it, fallthrough */
- extract_len = type & PV_MASK;
- }
- }
-
-
- switch (extract_len) {
- case 1:
- n += irda_param_unpack(buf+2, "b", &p.pv.i);
- break;
- case 2:
- n += irda_param_unpack(buf+2, "s", &p.pv.i);
- if (type & PV_BIG_ENDIAN)
- p.pv.i = be16_to_cpu((__u16) p.pv.i);
- else
- p.pv.i = le16_to_cpu((__u16) p.pv.i);
- break;
- case 4:
- n += irda_param_unpack(buf+2, "i", &p.pv.i);
- if (type & PV_BIG_ENDIAN)
- be32_to_cpus(&p.pv.i);
- else
- le32_to_cpus(&p.pv.i);
- break;
- default:
- net_warn_ratelimited("%s: length %d not supported\n",
- __func__, p.pl);
-
- /* Skip parameter */
- return p.pl+2;
- }
-
- pr_debug("%s(), pi=%#x, pl=%d, pi=%d\n", __func__,
- p.pi, p.pl, p.pv.i);
- /* Call handler for this parameter */
- err = (*func)(self, &p, PV_PUT);
- if (err < 0)
- return err;
-
- return p.pl+2; /* Extracted pl+2 bytes */
-}
-
-/*
- * Function irda_extract_string (self, buf, len, type, func)
- */
-static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
- PV_TYPE type, PI_HANDLER func)
-{
- char str[33];
- irda_param_t p;
- int err;
-
- p.pi = pi; /* In case handler needs to know */
- p.pl = buf[1]; /* Extract length of value */
- if (p.pl > 32)
- p.pl = 32;
-
- pr_debug("%s(), pi=%#x, pl=%d\n", __func__,
- p.pi, p.pl);
-
- /* Check if buffer is long enough for parsing */
- if (len < (2+p.pl)) {
- net_warn_ratelimited("%s: buffer too short for parsing! Need %d bytes, but len is only %d\n",
- __func__, p.pl, len);
- return -1;
- }
-
- /* Should be safe to copy string like this since we have already
- * checked that the buffer is long enough */
- strncpy(str, buf+2, p.pl);
-
- pr_debug("%s(), str=0x%02x 0x%02x\n",
- __func__, (__u8)str[0], (__u8)str[1]);
-
- /* Null terminate string */
- str[p.pl] = '\0';
-
- p.pv.c = str; /* Handler will need to take a copy */
-
- /* Call handler for this parameter */
- err = (*func)(self, &p, PV_PUT);
- if (err < 0)
- return err;
-
- return p.pl+2; /* Extracted pl+2 bytes */
-}
-
-/*
- * Function irda_extract_octseq (self, buf, len, type, func)
- */
-static int irda_extract_octseq(void *self, __u8 *buf, int len, __u8 pi,
- PV_TYPE type, PI_HANDLER func)
-{
- irda_param_t p;
-
- p.pi = pi; /* In case handler needs to know */
- p.pl = buf[1]; /* Extract length of value */
-
- /* Check if buffer is long enough for parsing */
- if (len < (2+p.pl)) {
- net_warn_ratelimited("%s: buffer too short for parsing! Need %d bytes, but len is only %d\n",
- __func__, p.pl, len);
- return -1;
- }
-
- pr_debug("%s(), not impl\n", __func__);
-
- return p.pl+2; /* Extracted pl+2 bytes */
-}
-
-/*
- * Function irda_param_pack (skb, fmt, ...)
- *
- * Format:
- * 'i' = 32 bits integer
- * 's' = string
- *
- */
-int irda_param_pack(__u8 *buf, char *fmt, ...)
-{
- irda_pv_t arg;
- va_list args;
- char *p;
- int n = 0;
-
- va_start(args, fmt);
-
- for (p = fmt; *p != '\0'; p++) {
- switch (*p) {
- case 'b': /* 8 bits unsigned byte */
- buf[n++] = (__u8)va_arg(args, int);
- break;
- case 's': /* 16 bits unsigned short */
- arg.i = (__u16)va_arg(args, int);
- put_unaligned((__u16)arg.i, (__u16 *)(buf+n)); n+=2;
- break;
- case 'i': /* 32 bits unsigned integer */
- arg.i = va_arg(args, __u32);
- put_unaligned(arg.i, (__u32 *)(buf+n)); n+=4;
- break;
-#if 0
- case 'c': /* \0 terminated string */
- arg.c = va_arg(args, char *);
- strcpy(buf+n, arg.c);
- n += strlen(arg.c) + 1;
- break;
-#endif
- default:
- va_end(args);
- return -1;
- }
- }
- va_end(args);
-
- return 0;
-}
-EXPORT_SYMBOL(irda_param_pack);
-
-/*
- * Function irda_param_unpack (skb, fmt, ...)
- */
-static int irda_param_unpack(__u8 *buf, char *fmt, ...)
-{
- irda_pv_t arg;
- va_list args;
- char *p;
- int n = 0;
-
- va_start(args, fmt);
-
- for (p = fmt; *p != '\0'; p++) {
- switch (*p) {
- case 'b': /* 8 bits byte */
- arg.ip = va_arg(args, __u32 *);
- *arg.ip = buf[n++];
- break;
- case 's': /* 16 bits short */
- arg.ip = va_arg(args, __u32 *);
- *arg.ip = get_unaligned((__u16 *)(buf+n)); n+=2;
- break;
- case 'i': /* 32 bits unsigned integer */
- arg.ip = va_arg(args, __u32 *);
- *arg.ip = get_unaligned((__u32 *)(buf+n)); n+=4;
- break;
-#if 0
- case 'c': /* \0 terminated string */
- arg.c = va_arg(args, char *);
- strcpy(arg.c, buf+n);
- n += strlen(arg.c) + 1;
- break;
-#endif
- default:
- va_end(args);
- return -1;
- }
-
- }
- va_end(args);
-
- return 0;
-}
-
-/*
- * Function irda_param_insert (self, pi, buf, len, info)
- *
- * Insert the specified parameter (pi) into buffer. Returns number of
- * bytes inserted
- */
-int irda_param_insert(void *self, __u8 pi, __u8 *buf, int len,
- pi_param_info_t *info)
-{
- const pi_minor_info_t *pi_minor_info;
- __u8 pi_minor;
- __u8 pi_major;
- int type;
- int ret = -1;
- int n = 0;
-
- IRDA_ASSERT(buf != NULL, return ret;);
- IRDA_ASSERT(info != NULL, return ret;);
-
- pi_minor = pi & info->pi_mask;
- pi_major = pi >> info->pi_major_offset;
-
- /* Check if the identifier value (pi) is valid */
- if ((pi_major > info->len-1) ||
- (pi_minor > info->tables[pi_major].len-1))
- {
- pr_debug("%s(), no handler for parameter=0x%02x\n",
- __func__, pi);
-
- /* Skip this parameter */
- return -1;
- }
-
- /* Lookup the info on how to parse this parameter */
- pi_minor_info = &info->tables[pi_major].pi_minor_call_table[pi_minor];
-
- /* Find expected data type for this parameter identifier (pi)*/
- type = pi_minor_info->type;
-
- /* Check if handler has been implemented */
- if (!pi_minor_info->func) {
- net_info_ratelimited("%s: no handler for pi=%#x\n",
- __func__, pi);
- /* Skip this parameter */
- return -1;
- }
-
- /* Insert parameter value */
- ret = (*pv_insert_table[type & PV_MASK])(self, buf+n, len, pi, type,
- pi_minor_info->func);
- return ret;
-}
-EXPORT_SYMBOL(irda_param_insert);
-
-/*
- * Function irda_param_extract (self, buf, len, info)
- *
- * Parse all parameters. If len is correct, then everything should be
- * safe. Returns the number of bytes that was parsed
- *
- */
-static int irda_param_extract(void *self, __u8 *buf, int len,
- pi_param_info_t *info)
-{
- const pi_minor_info_t *pi_minor_info;
- __u8 pi_minor;
- __u8 pi_major;
- int type;
- int ret = -1;
- int n = 0;
-
- IRDA_ASSERT(buf != NULL, return ret;);
- IRDA_ASSERT(info != NULL, return ret;);
-
- pi_minor = buf[n] & info->pi_mask;
- pi_major = buf[n] >> info->pi_major_offset;
-
- /* Check if the identifier value (pi) is valid */
- if ((pi_major > info->len-1) ||
- (pi_minor > info->tables[pi_major].len-1))
- {
- pr_debug("%s(), no handler for parameter=0x%02x\n",
- __func__, buf[0]);
-
- /* Skip this parameter */
- return 2 + buf[n + 1]; /* Continue */
- }
-
- /* Lookup the info on how to parse this parameter */
- pi_minor_info = &info->tables[pi_major].pi_minor_call_table[pi_minor];
-
- /* Find expected data type for this parameter identifier (pi)*/
- type = pi_minor_info->type;
-
- pr_debug("%s(), pi=[%d,%d], type=%d\n", __func__,
- pi_major, pi_minor, type);
-
- /* Check if handler has been implemented */
- if (!pi_minor_info->func) {
- net_info_ratelimited("%s: no handler for pi=%#x\n",
- __func__, buf[n]);
- /* Skip this parameter */
- return 2 + buf[n + 1]; /* Continue */
- }
-
- /* Parse parameter value */
- ret = (*pv_extract_table[type & PV_MASK])(self, buf+n, len, buf[n],
- type, pi_minor_info->func);
- return ret;
-}
-
-/*
- * Function irda_param_extract_all (self, buf, len, info)
- *
- * Parse all parameters. If len is correct, then everything should be
- * safe. Returns the number of bytes that was parsed
- *
- */
-int irda_param_extract_all(void *self, __u8 *buf, int len,
- pi_param_info_t *info)
-{
- int ret = -1;
- int n = 0;
-
- IRDA_ASSERT(buf != NULL, return ret;);
- IRDA_ASSERT(info != NULL, return ret;);
-
- /*
- * Parse all parameters. Each parameter must be at least two bytes
- * long or else there is no point in trying to parse it
- */
- while (len > 2) {
- ret = irda_param_extract(self, buf+n, len, info);
- if (ret < 0)
- return ret;
-
- n += ret;
- len -= ret;
- }
- return n;
-}
-EXPORT_SYMBOL(irda_param_extract_all);
diff --git a/net/irda/qos.c b/net/irda/qos.c
deleted file mode 100644
index 25ba8509ad3e..000000000000
--- a/net/irda/qos.c
+++ /dev/null
@@ -1,771 +0,0 @@
-/*********************************************************************
- *
- * Filename: qos.c
- * Version: 1.0
- * Description: IrLAP QoS parameter negotiation
- * Status: Stable
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Tue Sep 9 00:00:26 1997
- * Modified at: Sun Jan 30 14:29:16 2000
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>,
- * All Rights Reserved.
- * Copyright (c) 2000-2001 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/export.h>
-
-#include <asm/byteorder.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/parameters.h>
-#include <net/irda/qos.h>
-#include <net/irda/irlap.h>
-#include <net/irda/irlap_frame.h>
-
-/*
- * Maximum values of the baud rate we negotiate with the other end.
- * Most often, you don't have to change that, because Linux-IrDA will
- * use the maximum offered by the link layer, which usually works fine.
- * In some very rare cases, you may want to limit it to lower speeds...
- */
-int sysctl_max_baud_rate = 16000000;
-/*
- * Maximum value of the lap disconnect timer we negotiate with the other end.
- * Most often, the value below represent the best compromise, but some user
- * may want to keep the LAP alive longer or shorter in case of link failure.
- * Remember that the threshold time (early warning) is fixed to 3s...
- */
-int sysctl_max_noreply_time = 12;
-/*
- * Minimum turn time to be applied before transmitting to the peer.
- * Nonzero values (usec) are used as lower limit to the per-connection
- * mtt value which was announced by the other end during negotiation.
- * Might be helpful if the peer device provides too short mtt.
- * Default is 10us which means using the unmodified value given by the
- * peer except if it's 0 (0 is likely a bug in the other stack).
- */
-unsigned int sysctl_min_tx_turn_time = 10;
-/*
- * Maximum data size to be used in transmission in payload of LAP frame.
- * There is a bit of confusion in the IrDA spec :
- * The LAP spec defines the payload of a LAP frame (I field) to be
- * 2048 bytes max (IrLAP 1.1, chapt 6.6.5, p40).
- * On the other hand, the PHY mention frames of 2048 bytes max (IrPHY
- * 1.2, chapt 5.3.2.1, p41). But, this number includes the LAP header
- * (2 bytes), and CRC (32 bits at 4 Mb/s). So, for the I field (LAP
- * payload), that's only 2042 bytes. Oups !
- * My nsc-ircc hardware has troubles receiving 2048 bytes frames at 4 Mb/s,
- * so adjust to 2042... I don't know if this bug applies only for 2048
- * bytes frames or all negotiated frame sizes, but you can use the sysctl
- * to play with this value anyway.
- * Jean II */
-unsigned int sysctl_max_tx_data_size = 2042;
-/*
- * Maximum transmit window, i.e. number of LAP frames between turn-around.
- * This allow to override what the peer told us. Some peers are buggy and
- * don't always support what they tell us.
- * Jean II */
-unsigned int sysctl_max_tx_window = 7;
-
-static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get);
-static int irlap_param_link_disconnect(void *instance, irda_param_t *parm,
- int get);
-static int irlap_param_max_turn_time(void *instance, irda_param_t *param,
- int get);
-static int irlap_param_data_size(void *instance, irda_param_t *param, int get);
-static int irlap_param_window_size(void *instance, irda_param_t *param,
- int get);
-static int irlap_param_additional_bofs(void *instance, irda_param_t *parm,
- int get);
-static int irlap_param_min_turn_time(void *instance, irda_param_t *param,
- int get);
-
-#ifndef CONFIG_IRDA_DYNAMIC_WINDOW
-static __u32 irlap_requested_line_capacity(struct qos_info *qos);
-#endif
-
-static __u32 min_turn_times[] = { 10000, 5000, 1000, 500, 100, 50, 10, 0 }; /* us */
-static __u32 baud_rates[] = { 2400, 9600, 19200, 38400, 57600, 115200, 576000,
- 1152000, 4000000, 16000000 }; /* bps */
-static __u32 data_sizes[] = { 64, 128, 256, 512, 1024, 2048 }; /* bytes */
-static __u32 add_bofs[] = { 48, 24, 12, 5, 3, 2, 1, 0 }; /* bytes */
-static __u32 max_turn_times[] = { 500, 250, 100, 50 }; /* ms */
-static __u32 link_disc_times[] = { 3, 8, 12, 16, 20, 25, 30, 40 }; /* secs */
-
-static __u32 max_line_capacities[10][4] = {
- /* 500 ms 250 ms 100 ms 50 ms (max turn time) */
- { 100, 0, 0, 0 }, /* 2400 bps */
- { 400, 0, 0, 0 }, /* 9600 bps */
- { 800, 0, 0, 0 }, /* 19200 bps */
- { 1600, 0, 0, 0 }, /* 38400 bps */
- { 2360, 0, 0, 0 }, /* 57600 bps */
- { 4800, 2400, 960, 480 }, /* 115200 bps */
- { 28800, 11520, 5760, 2880 }, /* 576000 bps */
- { 57600, 28800, 11520, 5760 }, /* 1152000 bps */
- { 200000, 100000, 40000, 20000 }, /* 4000000 bps */
- { 800000, 400000, 160000, 80000 }, /* 16000000 bps */
-};
-
-static const pi_minor_info_t pi_minor_call_table_type_0[] = {
- { NULL, 0 },
-/* 01 */{ irlap_param_baud_rate, PV_INTEGER | PV_LITTLE_ENDIAN },
- { NULL, 0 },
- { NULL, 0 },
- { NULL, 0 },
- { NULL, 0 },
- { NULL, 0 },
- { NULL, 0 },
-/* 08 */{ irlap_param_link_disconnect, PV_INT_8_BITS }
-};
-
-static const pi_minor_info_t pi_minor_call_table_type_1[] = {
- { NULL, 0 },
- { NULL, 0 },
-/* 82 */{ irlap_param_max_turn_time, PV_INT_8_BITS },
-/* 83 */{ irlap_param_data_size, PV_INT_8_BITS },
-/* 84 */{ irlap_param_window_size, PV_INT_8_BITS },
-/* 85 */{ irlap_param_additional_bofs, PV_INT_8_BITS },
-/* 86 */{ irlap_param_min_turn_time, PV_INT_8_BITS },
-};
-
-static const pi_major_info_t pi_major_call_table[] = {
- { pi_minor_call_table_type_0, 9 },
- { pi_minor_call_table_type_1, 7 },
-};
-
-static pi_param_info_t irlap_param_info = { pi_major_call_table, 2, 0x7f, 7 };
-
-/* ---------------------- LOCAL SUBROUTINES ---------------------- */
-/* Note : we start with a bunch of local subroutines.
- * As the compiler is "one pass", this is the only way to get them to
- * inline properly...
- * Jean II
- */
-/*
- * Function value_index (value, array, size)
- *
- * Returns the index to the value in the specified array
- */
-static inline int value_index(__u32 value, __u32 *array, int size)
-{
- int i;
-
- for (i=0; i < size; i++)
- if (array[i] == value)
- break;
- return i;
-}
-
-/*
- * Function index_value (index, array)
- *
- * Returns value to index in array, easy!
- *
- */
-static inline __u32 index_value(int index, __u32 *array)
-{
- return array[index];
-}
-
-/*
- * Function msb_index (word)
- *
- * Returns index to most significant bit (MSB) in word
- *
- */
-static int msb_index (__u16 word)
-{
- __u16 msb = 0x8000;
- int index = 15; /* Current MSB */
-
- /* Check for buggy peers.
- * Note : there is a small probability that it could be us, but I
- * would expect driver authors to catch that pretty early and be
- * able to check precisely what's going on. If a end user sees this,
- * it's very likely the peer. - Jean II */
- if (word == 0) {
- net_warn_ratelimited("%s(), Detected buggy peer, adjust null PV to 0x1!\n",
- __func__);
- /* The only safe choice (we don't know the array size) */
- word = 0x1;
- }
-
- while (msb) {
- if (word & msb)
- break; /* Found it! */
- msb >>=1;
- index--;
- }
- return index;
-}
-
-/*
- * Function value_lower_bits (value, array)
- *
- * Returns a bit field marking all possibility lower than value.
- */
-static inline int value_lower_bits(__u32 value, __u32 *array, int size, __u16 *field)
-{
- int i;
- __u16 mask = 0x1;
- __u16 result = 0x0;
-
- for (i=0; i < size; i++) {
- /* Add the current value to the bit field, shift mask */
- result |= mask;
- mask <<= 1;
- /* Finished ? */
- if (array[i] >= value)
- break;
- }
- /* Send back a valid index */
- if(i >= size)
- i = size - 1; /* Last item */
- *field = result;
- return i;
-}
-
-/*
- * Function value_highest_bit (value, array)
- *
- * Returns a bit field marking the highest possibility lower than value.
- */
-static inline int value_highest_bit(__u32 value, __u32 *array, int size, __u16 *field)
-{
- int i;
- __u16 mask = 0x1;
- __u16 result = 0x0;
-
- for (i=0; i < size; i++) {
- /* Finished ? */
- if (array[i] <= value)
- break;
- /* Shift mask */
- mask <<= 1;
- }
- /* Set the current value to the bit field */
- result |= mask;
- /* Send back a valid index */
- if(i >= size)
- i = size - 1; /* Last item */
- *field = result;
- return i;
-}
-
-/* -------------------------- MAIN CALLS -------------------------- */
-
-/*
- * Function irda_qos_compute_intersection (qos, new)
- *
- * Compute the intersection of the old QoS capabilities with new ones
- *
- */
-void irda_qos_compute_intersection(struct qos_info *qos, struct qos_info *new)
-{
- IRDA_ASSERT(qos != NULL, return;);
- IRDA_ASSERT(new != NULL, return;);
-
- /* Apply */
- qos->baud_rate.bits &= new->baud_rate.bits;
- qos->window_size.bits &= new->window_size.bits;
- qos->min_turn_time.bits &= new->min_turn_time.bits;
- qos->max_turn_time.bits &= new->max_turn_time.bits;
- qos->data_size.bits &= new->data_size.bits;
- qos->link_disc_time.bits &= new->link_disc_time.bits;
- qos->additional_bofs.bits &= new->additional_bofs.bits;
-
- irda_qos_bits_to_value(qos);
-}
-
-/*
- * Function irda_init_max_qos_capabilies (qos)
- *
- * The purpose of this function is for layers and drivers to be able to
- * set the maximum QoS possible and then "and in" their own limitations
- *
- */
-void irda_init_max_qos_capabilies(struct qos_info *qos)
-{
- int i;
- /*
- * These are the maximum supported values as specified on pages
- * 39-43 in IrLAP
- */
-
- /* Use sysctl to set some configurable values... */
- /* Set configured max speed */
- i = value_lower_bits(sysctl_max_baud_rate, baud_rates, 10,
- &qos->baud_rate.bits);
- sysctl_max_baud_rate = index_value(i, baud_rates);
-
- /* Set configured max disc time */
- i = value_lower_bits(sysctl_max_noreply_time, link_disc_times, 8,
- &qos->link_disc_time.bits);
- sysctl_max_noreply_time = index_value(i, link_disc_times);
-
- /* LSB is first byte, MSB is second byte */
- qos->baud_rate.bits &= 0x03ff;
-
- qos->window_size.bits = 0x7f;
- qos->min_turn_time.bits = 0xff;
- qos->max_turn_time.bits = 0x0f;
- qos->data_size.bits = 0x3f;
- qos->link_disc_time.bits &= 0xff;
- qos->additional_bofs.bits = 0xff;
-}
-EXPORT_SYMBOL(irda_init_max_qos_capabilies);
-
-/*
- * Function irlap_adjust_qos_settings (qos)
- *
- * Adjust QoS settings in case some values are not possible to use because
- * of other settings
- */
-static void irlap_adjust_qos_settings(struct qos_info *qos)
-{
- __u32 line_capacity;
- int index;
-
- /*
- * Make sure the mintt is sensible.
- * Main culprit : Ericsson T39. - Jean II
- */
- if (sysctl_min_tx_turn_time > qos->min_turn_time.value) {
- int i;
-
- net_warn_ratelimited("%s(), Detected buggy peer, adjust mtt to %dus!\n",
- __func__, sysctl_min_tx_turn_time);
-
- /* We don't really need bits, but easier this way */
- i = value_highest_bit(sysctl_min_tx_turn_time, min_turn_times,
- 8, &qos->min_turn_time.bits);
- sysctl_min_tx_turn_time = index_value(i, min_turn_times);
- qos->min_turn_time.value = sysctl_min_tx_turn_time;
- }
-
- /*
- * Not allowed to use a max turn time less than 500 ms if the baudrate
- * is less than 115200
- */
- if ((qos->baud_rate.value < 115200) &&
- (qos->max_turn_time.value < 500))
- {
- pr_debug("%s(), adjusting max turn time from %d to 500 ms\n",
- __func__, qos->max_turn_time.value);
- qos->max_turn_time.value = 500;
- }
-
- /*
- * The data size must be adjusted according to the baud rate and max
- * turn time
- */
- index = value_index(qos->data_size.value, data_sizes, 6);
- line_capacity = irlap_max_line_capacity(qos->baud_rate.value,
- qos->max_turn_time.value);
-
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
- while ((qos->data_size.value > line_capacity) && (index > 0)) {
- qos->data_size.value = data_sizes[index--];
- pr_debug("%s(), reducing data size to %d\n",
- __func__, qos->data_size.value);
- }
-#else /* Use method described in section 6.6.11 of IrLAP */
- while (irlap_requested_line_capacity(qos) > line_capacity) {
- IRDA_ASSERT(index != 0, return;);
-
- /* Must be able to send at least one frame */
- if (qos->window_size.value > 1) {
- qos->window_size.value--;
- pr_debug("%s(), reducing window size to %d\n",
- __func__, qos->window_size.value);
- } else if (index > 1) {
- qos->data_size.value = data_sizes[index--];
- pr_debug("%s(), reducing data size to %d\n",
- __func__, qos->data_size.value);
- } else {
- net_warn_ratelimited("%s(), nothing more we can do!\n",
- __func__);
- }
- }
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
- /*
- * Fix tx data size according to user limits - Jean II
- */
- if (qos->data_size.value > sysctl_max_tx_data_size)
- /* Allow non discrete adjustement to avoid losing capacity */
- qos->data_size.value = sysctl_max_tx_data_size;
- /*
- * Override Tx window if user request it. - Jean II
- */
- if (qos->window_size.value > sysctl_max_tx_window)
- qos->window_size.value = sysctl_max_tx_window;
-}
-
-/*
- * Function irlap_negotiate (qos_device, qos_session, skb)
- *
- * Negotiate QoS values, not really that much negotiation :-)
- * We just set the QoS capabilities for the peer station
- *
- */
-int irlap_qos_negotiate(struct irlap_cb *self, struct sk_buff *skb)
-{
- int ret;
-
- ret = irda_param_extract_all(self, skb->data, skb->len,
- &irlap_param_info);
-
- /* Convert the negotiated bits to values */
- irda_qos_bits_to_value(&self->qos_tx);
- irda_qos_bits_to_value(&self->qos_rx);
-
- irlap_adjust_qos_settings(&self->qos_tx);
-
- pr_debug("Setting BAUD_RATE to %d bps.\n",
- self->qos_tx.baud_rate.value);
- pr_debug("Setting DATA_SIZE to %d bytes\n",
- self->qos_tx.data_size.value);
- pr_debug("Setting WINDOW_SIZE to %d\n",
- self->qos_tx.window_size.value);
- pr_debug("Setting XBOFS to %d\n",
- self->qos_tx.additional_bofs.value);
- pr_debug("Setting MAX_TURN_TIME to %d ms.\n",
- self->qos_tx.max_turn_time.value);
- pr_debug("Setting MIN_TURN_TIME to %d usecs.\n",
- self->qos_tx.min_turn_time.value);
- pr_debug("Setting LINK_DISC to %d secs.\n",
- self->qos_tx.link_disc_time.value);
- return ret;
-}
-
-/*
- * Function irlap_insert_negotiation_params (qos, fp)
- *
- * Insert QoS negotiaion pararameters into frame
- *
- */
-int irlap_insert_qos_negotiation_params(struct irlap_cb *self,
- struct sk_buff *skb)
-{
- int ret;
-
- /* Insert data rate */
- ret = irda_param_insert(self, PI_BAUD_RATE, skb_tail_pointer(skb),
- skb_tailroom(skb), &irlap_param_info);
- if (ret < 0)
- return ret;
- skb_put(skb, ret);
-
- /* Insert max turnaround time */
- ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb_tail_pointer(skb),
- skb_tailroom(skb), &irlap_param_info);
- if (ret < 0)
- return ret;
- skb_put(skb, ret);
-
- /* Insert data size */
- ret = irda_param_insert(self, PI_DATA_SIZE, skb_tail_pointer(skb),
- skb_tailroom(skb), &irlap_param_info);
- if (ret < 0)
- return ret;
- skb_put(skb, ret);
-
- /* Insert window size */
- ret = irda_param_insert(self, PI_WINDOW_SIZE, skb_tail_pointer(skb),
- skb_tailroom(skb), &irlap_param_info);
- if (ret < 0)
- return ret;
- skb_put(skb, ret);
-
- /* Insert additional BOFs */
- ret = irda_param_insert(self, PI_ADD_BOFS, skb_tail_pointer(skb),
- skb_tailroom(skb), &irlap_param_info);
- if (ret < 0)
- return ret;
- skb_put(skb, ret);
-
- /* Insert minimum turnaround time */
- ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb_tail_pointer(skb),
- skb_tailroom(skb), &irlap_param_info);
- if (ret < 0)
- return ret;
- skb_put(skb, ret);
-
- /* Insert link disconnect/threshold time */
- ret = irda_param_insert(self, PI_LINK_DISC, skb_tail_pointer(skb),
- skb_tailroom(skb), &irlap_param_info);
- if (ret < 0)
- return ret;
- skb_put(skb, ret);
-
- return 0;
-}
-
-/*
- * Function irlap_param_baud_rate (instance, param, get)
- *
- * Negotiate data-rate
- *
- */
-static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get)
-{
- __u16 final;
-
- struct irlap_cb *self = (struct irlap_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- if (get) {
- param->pv.i = self->qos_rx.baud_rate.bits;
- pr_debug("%s(), baud rate = 0x%02x\n",
- __func__, param->pv.i);
- } else {
- /*
- * Stations must agree on baud rate, so calculate
- * intersection
- */
- pr_debug("Requested BAUD_RATE: 0x%04x\n", (__u16)param->pv.i);
- final = (__u16) param->pv.i & self->qos_rx.baud_rate.bits;
-
- pr_debug("Final BAUD_RATE: 0x%04x\n", final);
- self->qos_tx.baud_rate.bits = final;
- self->qos_rx.baud_rate.bits = final;
- }
-
- return 0;
-}
-
-/*
- * Function irlap_param_link_disconnect (instance, param, get)
- *
- * Negotiate link disconnect/threshold time.
- *
- */
-static int irlap_param_link_disconnect(void *instance, irda_param_t *param,
- int get)
-{
- __u16 final;
-
- struct irlap_cb *self = (struct irlap_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- if (get)
- param->pv.i = self->qos_rx.link_disc_time.bits;
- else {
- /*
- * Stations must agree on link disconnect/threshold
- * time.
- */
- pr_debug("LINK_DISC: %02x\n", (__u8)param->pv.i);
- final = (__u8) param->pv.i & self->qos_rx.link_disc_time.bits;
-
- pr_debug("Final LINK_DISC: %02x\n", final);
- self->qos_tx.link_disc_time.bits = final;
- self->qos_rx.link_disc_time.bits = final;
- }
- return 0;
-}
-
-/*
- * Function irlap_param_max_turn_time (instance, param, get)
- *
- * Negotiate the maximum turnaround time. This is a type 1 parameter and
- * will be negotiated independently for each station
- *
- */
-static int irlap_param_max_turn_time(void *instance, irda_param_t *param,
- int get)
-{
- struct irlap_cb *self = (struct irlap_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- if (get)
- param->pv.i = self->qos_rx.max_turn_time.bits;
- else
- self->qos_tx.max_turn_time.bits = (__u8) param->pv.i;
-
- return 0;
-}
-
-/*
- * Function irlap_param_data_size (instance, param, get)
- *
- * Negotiate the data size. This is a type 1 parameter and
- * will be negotiated independently for each station
- *
- */
-static int irlap_param_data_size(void *instance, irda_param_t *param, int get)
-{
- struct irlap_cb *self = (struct irlap_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- if (get)
- param->pv.i = self->qos_rx.data_size.bits;
- else
- self->qos_tx.data_size.bits = (__u8) param->pv.i;
-
- return 0;
-}
-
-/*
- * Function irlap_param_window_size (instance, param, get)
- *
- * Negotiate the window size. This is a type 1 parameter and
- * will be negotiated independently for each station
- *
- */
-static int irlap_param_window_size(void *instance, irda_param_t *param,
- int get)
-{
- struct irlap_cb *self = (struct irlap_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- if (get)
- param->pv.i = self->qos_rx.window_size.bits;
- else
- self->qos_tx.window_size.bits = (__u8) param->pv.i;
-
- return 0;
-}
-
-/*
- * Function irlap_param_additional_bofs (instance, param, get)
- *
- * Negotiate additional BOF characters. This is a type 1 parameter and
- * will be negotiated independently for each station.
- */
-static int irlap_param_additional_bofs(void *instance, irda_param_t *param, int get)
-{
- struct irlap_cb *self = (struct irlap_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- if (get)
- param->pv.i = self->qos_rx.additional_bofs.bits;
- else
- self->qos_tx.additional_bofs.bits = (__u8) param->pv.i;
-
- return 0;
-}
-
-/*
- * Function irlap_param_min_turn_time (instance, param, get)
- *
- * Negotiate the minimum turn around time. This is a type 1 parameter and
- * will be negotiated independently for each station
- */
-static int irlap_param_min_turn_time(void *instance, irda_param_t *param,
- int get)
-{
- struct irlap_cb *self = (struct irlap_cb *) instance;
-
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
- if (get)
- param->pv.i = self->qos_rx.min_turn_time.bits;
- else
- self->qos_tx.min_turn_time.bits = (__u8) param->pv.i;
-
- return 0;
-}
-
-/*
- * Function irlap_max_line_capacity (speed, max_turn_time, min_turn_time)
- *
- * Calculate the maximum line capacity
- *
- */
-__u32 irlap_max_line_capacity(__u32 speed, __u32 max_turn_time)
-{
- __u32 line_capacity;
- int i,j;
-
- pr_debug("%s(), speed=%d, max_turn_time=%d\n",
- __func__, speed, max_turn_time);
-
- i = value_index(speed, baud_rates, 10);
- j = value_index(max_turn_time, max_turn_times, 4);
-
- IRDA_ASSERT(((i >=0) && (i <10)), return 0;);
- IRDA_ASSERT(((j >=0) && (j <4)), return 0;);
-
- line_capacity = max_line_capacities[i][j];
-
- pr_debug("%s(), line capacity=%d bytes\n",
- __func__, line_capacity);
-
- return line_capacity;
-}
-
-#ifndef CONFIG_IRDA_DYNAMIC_WINDOW
-static __u32 irlap_requested_line_capacity(struct qos_info *qos)
-{
- __u32 line_capacity;
-
- line_capacity = qos->window_size.value *
- (qos->data_size.value + 6 + qos->additional_bofs.value) +
- irlap_min_turn_time_in_bytes(qos->baud_rate.value,
- qos->min_turn_time.value);
-
- pr_debug("%s(), requested line capacity=%d\n",
- __func__, line_capacity);
-
- return line_capacity;
-}
-#endif
-
-void irda_qos_bits_to_value(struct qos_info *qos)
-{
- int index;
-
- IRDA_ASSERT(qos != NULL, return;);
-
- index = msb_index(qos->baud_rate.bits);
- qos->baud_rate.value = baud_rates[index];
-
- index = msb_index(qos->data_size.bits);
- qos->data_size.value = data_sizes[index];
-
- index = msb_index(qos->window_size.bits);
- qos->window_size.value = index+1;
-
- index = msb_index(qos->min_turn_time.bits);
- qos->min_turn_time.value = min_turn_times[index];
-
- index = msb_index(qos->max_turn_time.bits);
- qos->max_turn_time.value = max_turn_times[index];
-
- index = msb_index(qos->link_disc_time.bits);
- qos->link_disc_time.value = link_disc_times[index];
-
- index = msb_index(qos->additional_bofs.bits);
- qos->additional_bofs.value = add_bofs[index];
-}
-EXPORT_SYMBOL(irda_qos_bits_to_value);
diff --git a/net/irda/timer.c b/net/irda/timer.c
deleted file mode 100644
index f2280f73b057..000000000000
--- a/net/irda/timer.c
+++ /dev/null
@@ -1,231 +0,0 @@
-/*********************************************************************
- *
- * Filename: timer.c
- * Version:
- * Description:
- * Status: Experimental.
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Sat Aug 16 00:59:29 1997
- * Modified at: Wed Dec 8 12:50:34 1999
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- *
- * Copyright (c) 1997, 1999 Dag Brattli <dagb@cs.uit.no>,
- * All Rights Reserved.
- * Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/delay.h>
-
-#include <net/irda/timer.h>
-#include <net/irda/irda.h>
-#include <net/irda/irda_device.h>
-#include <net/irda/irlap.h>
-#include <net/irda/irlmp.h>
-
-extern int sysctl_slot_timeout;
-
-static void irlap_slot_timer_expired(void* data);
-static void irlap_query_timer_expired(void* data);
-static void irlap_final_timer_expired(void* data);
-static void irlap_wd_timer_expired(void* data);
-static void irlap_backoff_timer_expired(void* data);
-static void irlap_media_busy_expired(void* data);
-
-void irlap_start_slot_timer(struct irlap_cb *self, int timeout)
-{
- irda_start_timer(&self->slot_timer, timeout, (void *) self,
- irlap_slot_timer_expired);
-}
-
-void irlap_start_query_timer(struct irlap_cb *self, int S, int s)
-{
- int timeout;
-
- /* Calculate when the peer discovery should end. Normally, we
- * get the end-of-discovery frame, so this is just in case
- * we miss it.
- * Basically, we multiply the number of remaining slots by our
- * slot time, plus add some extra time to properly receive the last
- * discovery packet (which is longer due to extra discovery info),
- * to avoid messing with for incoming connections requests and
- * to accommodate devices that perform discovery slower than us.
- * Jean II */
- timeout = msecs_to_jiffies(sysctl_slot_timeout) * (S - s)
- + XIDEXTRA_TIMEOUT + SMALLBUSY_TIMEOUT;
-
- /* Set or re-set the timer. We reset the timer for each received
- * discovery query, which allow us to automatically adjust to
- * the speed of the peer discovery (faster or slower). Jean II */
- irda_start_timer( &self->query_timer, timeout, (void *) self,
- irlap_query_timer_expired);
-}
-
-void irlap_start_final_timer(struct irlap_cb *self, int timeout)
-{
- irda_start_timer(&self->final_timer, timeout, (void *) self,
- irlap_final_timer_expired);
-}
-
-void irlap_start_wd_timer(struct irlap_cb *self, int timeout)
-{
- irda_start_timer(&self->wd_timer, timeout, (void *) self,
- irlap_wd_timer_expired);
-}
-
-void irlap_start_backoff_timer(struct irlap_cb *self, int timeout)
-{
- irda_start_timer(&self->backoff_timer, timeout, (void *) self,
- irlap_backoff_timer_expired);
-}
-
-void irlap_start_mbusy_timer(struct irlap_cb *self, int timeout)
-{
- irda_start_timer(&self->media_busy_timer, timeout,
- (void *) self, irlap_media_busy_expired);
-}
-
-void irlap_stop_mbusy_timer(struct irlap_cb *self)
-{
- /* If timer is activated, kill it! */
- del_timer(&self->media_busy_timer);
-
- /* If we are in NDM, there is a bunch of events in LAP that
- * that be pending due to the media_busy condition, such as
- * CONNECT_REQUEST and SEND_UI_FRAME. If we don't generate
- * an event, they will wait forever...
- * Jean II */
- if (self->state == LAP_NDM)
- irlap_do_event(self, MEDIA_BUSY_TIMER_EXPIRED, NULL, NULL);
-}
-
-void irlmp_start_watchdog_timer(struct lsap_cb *self, int timeout)
-{
- irda_start_timer(&self->watchdog_timer, timeout, (void *) self,
- irlmp_watchdog_timer_expired);
-}
-
-void irlmp_start_discovery_timer(struct irlmp_cb *self, int timeout)
-{
- irda_start_timer(&self->discovery_timer, timeout, (void *) self,
- irlmp_discovery_timer_expired);
-}
-
-void irlmp_start_idle_timer(struct lap_cb *self, int timeout)
-{
- irda_start_timer(&self->idle_timer, timeout, (void *) self,
- irlmp_idle_timer_expired);
-}
-
-void irlmp_stop_idle_timer(struct lap_cb *self)
-{
- /* If timer is activated, kill it! */
- del_timer(&self->idle_timer);
-}
-
-/*
- * Function irlap_slot_timer_expired (data)
- *
- * IrLAP slot timer has expired
- *
- */
-static void irlap_slot_timer_expired(void *data)
-{
- struct irlap_cb *self = (struct irlap_cb *) data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- irlap_do_event(self, SLOT_TIMER_EXPIRED, NULL, NULL);
-}
-
-/*
- * Function irlap_query_timer_expired (data)
- *
- * IrLAP query timer has expired
- *
- */
-static void irlap_query_timer_expired(void *data)
-{
- struct irlap_cb *self = (struct irlap_cb *) data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- irlap_do_event(self, QUERY_TIMER_EXPIRED, NULL, NULL);
-}
-
-/*
- * Function irda_final_timer_expired (data)
- *
- *
- *
- */
-static void irlap_final_timer_expired(void *data)
-{
- struct irlap_cb *self = (struct irlap_cb *) data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- irlap_do_event(self, FINAL_TIMER_EXPIRED, NULL, NULL);
-}
-
-/*
- * Function irda_wd_timer_expired (data)
- *
- *
- *
- */
-static void irlap_wd_timer_expired(void *data)
-{
- struct irlap_cb *self = (struct irlap_cb *) data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- irlap_do_event(self, WD_TIMER_EXPIRED, NULL, NULL);
-}
-
-/*
- * Function irda_backoff_timer_expired (data)
- *
- *
- *
- */
-static void irlap_backoff_timer_expired(void *data)
-{
- struct irlap_cb *self = (struct irlap_cb *) data;
-
- IRDA_ASSERT(self != NULL, return;);
- IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
- irlap_do_event(self, BACKOFF_TIMER_EXPIRED, NULL, NULL);
-}
-
-
-/*
- * Function irtty_media_busy_expired (data)
- *
- *
- */
-static void irlap_media_busy_expired(void *data)
-{
- struct irlap_cb *self = (struct irlap_cb *) data;
-
- IRDA_ASSERT(self != NULL, return;);
-
- irda_device_set_media_busy(self->netdev, FALSE);
- /* Note : the LAP event will be send in irlap_stop_mbusy_timer(),
- * to catch other cases where the flag is cleared (for example
- * after a discovery) - Jean II */
-}
diff --git a/net/irda/wrapper.c b/net/irda/wrapper.c
deleted file mode 100644
index 40a0f993bf13..000000000000
--- a/net/irda/wrapper.c
+++ /dev/null
@@ -1,492 +0,0 @@
-/*********************************************************************
- *
- * Filename: wrapper.c
- * Version: 1.2
- * Description: IrDA SIR async wrapper layer
- * Status: Stable
- * Author: Dag Brattli <dagb@cs.uit.no>
- * Created at: Mon Aug 4 20:40:53 1997
- * Modified at: Fri Jan 28 13:21:09 2000
- * Modified by: Dag Brattli <dagb@cs.uit.no>
- * Modified at: Fri May 28 3:11 CST 1999
- * Modified by: Horst von Brand <vonbrand@sleipnir.valparaiso.cl>
- *
- * Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>,
- * All Rights Reserved.
- * Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * Neither Dag Brattli nor University of Tromsø admit liability nor
- * provide warranty for any of this software. This material is
- * provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/skbuff.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <asm/byteorder.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/wrapper.h>
-#include <net/irda/crc.h>
-#include <net/irda/irlap.h>
-#include <net/irda/irlap_frame.h>
-#include <net/irda/irda_device.h>
-
-/************************** FRAME WRAPPING **************************/
-/*
- * Unwrap and unstuff SIR frames
- *
- * Note : at FIR and MIR, HDLC framing is used and usually handled
- * by the controller, so we come here only for SIR... Jean II
- */
-
-/*
- * Function stuff_byte (byte, buf)
- *
- * Byte stuff one single byte and put the result in buffer pointed to by
- * buf. The buffer must at all times be able to have two bytes inserted.
- *
- * This is in a tight loop, better inline it, so need to be prior to callers.
- * (2000 bytes on P6 200MHz, non-inlined ~370us, inline ~170us) - Jean II
- */
-static inline int stuff_byte(__u8 byte, __u8 *buf)
-{
- switch (byte) {
- case BOF: /* FALLTHROUGH */
- case EOF: /* FALLTHROUGH */
- case CE:
- /* Insert transparently coded */
- buf[0] = CE; /* Send link escape */
- buf[1] = byte^IRDA_TRANS; /* Complement bit 5 */
- return 2;
- /* break; */
- default:
- /* Non-special value, no transparency required */
- buf[0] = byte;
- return 1;
- /* break; */
- }
-}
-
-/*
- * Function async_wrap (skb, *tx_buff, buffsize)
- *
- * Makes a new buffer with wrapping and stuffing, should check that
- * we don't get tx buffer overflow.
- */
-int async_wrap_skb(struct sk_buff *skb, __u8 *tx_buff, int buffsize)
-{
- struct irda_skb_cb *cb = (struct irda_skb_cb *) skb->cb;
- int xbofs;
- int i;
- int n;
- union {
- __u16 value;
- __u8 bytes[2];
- } fcs;
-
- /* Initialize variables */
- fcs.value = INIT_FCS;
- n = 0;
-
- /*
- * Send XBOF's for required min. turn time and for the negotiated
- * additional XBOFS
- */
-
- if (cb->magic != LAP_MAGIC) {
- /*
- * This will happen for all frames sent from user-space.
- * Nothing to worry about, but we set the default number of
- * BOF's
- */
- pr_debug("%s(), wrong magic in skb!\n", __func__);
- xbofs = 10;
- } else
- xbofs = cb->xbofs + cb->xbofs_delay;
-
- pr_debug("%s(), xbofs=%d\n", __func__, xbofs);
-
- /* Check that we never use more than 115 + 48 xbofs */
- if (xbofs > 163) {
- pr_debug("%s(), too many xbofs (%d)\n", __func__,
- xbofs);
- xbofs = 163;
- }
-
- memset(tx_buff + n, XBOF, xbofs);
- n += xbofs;
-
- /* Start of packet character BOF */
- tx_buff[n++] = BOF;
-
- /* Insert frame and calc CRC */
- for (i=0; i < skb->len; i++) {
- /*
- * Check for the possibility of tx buffer overflow. We use
- * bufsize-5 since the maximum number of bytes that can be
- * transmitted after this point is 5.
- */
- if(n >= (buffsize-5)) {
- net_err_ratelimited("%s(), tx buffer overflow (n=%d)\n",
- __func__, n);
- return n;
- }
-
- n += stuff_byte(skb->data[i], tx_buff+n);
- fcs.value = irda_fcs(fcs.value, skb->data[i]);
- }
-
- /* Insert CRC in little endian format (LSB first) */
- fcs.value = ~fcs.value;
-#ifdef __LITTLE_ENDIAN
- n += stuff_byte(fcs.bytes[0], tx_buff+n);
- n += stuff_byte(fcs.bytes[1], tx_buff+n);
-#else /* ifdef __BIG_ENDIAN */
- n += stuff_byte(fcs.bytes[1], tx_buff+n);
- n += stuff_byte(fcs.bytes[0], tx_buff+n);
-#endif
- tx_buff[n++] = EOF;
-
- return n;
-}
-EXPORT_SYMBOL(async_wrap_skb);
-
-/************************* FRAME UNWRAPPING *************************/
-/*
- * Unwrap and unstuff SIR frames
- *
- * Complete rewrite by Jean II :
- * More inline, faster, more compact, more logical. Jean II
- * (16 bytes on P6 200MHz, old 5 to 7 us, new 4 to 6 us)
- * (24 bytes on P6 200MHz, old 9 to 10 us, new 7 to 8 us)
- * (for reference, 115200 b/s is 1 byte every 69 us)
- * And reduce wrapper.o by ~900B in the process ;-)
- *
- * Then, we have the addition of ZeroCopy, which is optional
- * (i.e. the driver must initiate it) and improve final processing.
- * (2005 B frame + EOF on P6 200MHz, without 30 to 50 us, with 10 to 25 us)
- *
- * Note : at FIR and MIR, HDLC framing is used and usually handled
- * by the controller, so we come here only for SIR... Jean II
- */
-
-/*
- * We can also choose where we want to do the CRC calculation. We can
- * do it "inline", as we receive the bytes, or "postponed", when
- * receiving the End-Of-Frame.
- * (16 bytes on P6 200MHz, inlined 4 to 6 us, postponed 4 to 5 us)
- * (24 bytes on P6 200MHz, inlined 7 to 8 us, postponed 5 to 7 us)
- * With ZeroCopy :
- * (2005 B frame on P6 200MHz, inlined 10 to 25 us, postponed 140 to 180 us)
- * Without ZeroCopy :
- * (2005 B frame on P6 200MHz, inlined 30 to 50 us, postponed 150 to 180 us)
- * (Note : numbers taken with irq disabled)
- *
- * From those numbers, it's not clear which is the best strategy, because
- * we end up running through a lot of data one way or another (i.e. cache
- * misses). I personally prefer to avoid the huge latency spike of the
- * "postponed" solution, because it come just at the time when we have
- * lot's of protocol processing to do and it will hurt our ability to
- * reach low link turnaround times... Jean II
- */
-//#define POSTPONE_RX_CRC
-
-/*
- * Function async_bump (buf, len, stats)
- *
- * Got a frame, make a copy of it, and pass it up the stack! We can try
- * to inline it since it's only called from state_inside_frame
- */
-static inline void
-async_bump(struct net_device *dev,
- struct net_device_stats *stats,
- iobuff_t *rx_buff)
-{
- struct sk_buff *newskb;
- struct sk_buff *dataskb;
- int docopy;
-
- /* Check if we need to copy the data to a new skb or not.
- * If the driver doesn't use ZeroCopy Rx, we have to do it.
- * With ZeroCopy Rx, the rx_buff already point to a valid
- * skb. But, if the frame is small, it is more efficient to
- * copy it to save memory (copy will be fast anyway - that's
- * called Rx-copy-break). Jean II */
- docopy = ((rx_buff->skb == NULL) ||
- (rx_buff->len < IRDA_RX_COPY_THRESHOLD));
-
- /* Allocate a new skb */
- newskb = dev_alloc_skb(docopy ? rx_buff->len + 1 : rx_buff->truesize);
- if (!newskb) {
- stats->rx_dropped++;
- /* We could deliver the current skb if doing ZeroCopy Rx,
- * but this would stall the Rx path. Better drop the
- * packet... Jean II */
- return;
- }
-
- /* Align IP header to 20 bytes (i.e. increase skb->data)
- * Note this is only useful with IrLAN, as PPP has a variable
- * header size (2 or 1 bytes) - Jean II */
- skb_reserve(newskb, 1);
-
- if(docopy) {
- /* Copy data without CRC (length already checked) */
- skb_copy_to_linear_data(newskb, rx_buff->data,
- rx_buff->len - 2);
- /* Deliver this skb */
- dataskb = newskb;
- } else {
- /* We are using ZeroCopy. Deliver old skb */
- dataskb = rx_buff->skb;
- /* And hook the new skb to the rx_buff */
- rx_buff->skb = newskb;
- rx_buff->head = newskb->data; /* NOT newskb->head */
- //printk(KERN_DEBUG "ZeroCopy : len = %d, dataskb = %p, newskb = %p\n", rx_buff->len, dataskb, newskb);
- }
-
- /* Set proper length on skb (without CRC) */
- skb_put(dataskb, rx_buff->len - 2);
-
- /* Feed it to IrLAP layer */
- dataskb->dev = dev;
- skb_reset_mac_header(dataskb);
- dataskb->protocol = htons(ETH_P_IRDA);
-
- netif_rx(dataskb);
-
- stats->rx_packets++;
- stats->rx_bytes += rx_buff->len;
-
- /* Clean up rx_buff (redundant with async_unwrap_bof() ???) */
- rx_buff->data = rx_buff->head;
- rx_buff->len = 0;
-}
-
-/*
- * Function async_unwrap_bof(dev, byte)
- *
- * Handle Beginning Of Frame character received within a frame
- *
- */
-static inline void
-async_unwrap_bof(struct net_device *dev,
- struct net_device_stats *stats,
- iobuff_t *rx_buff, __u8 byte)
-{
- switch(rx_buff->state) {
- case LINK_ESCAPE:
- case INSIDE_FRAME:
- /* Not supposed to happen, the previous frame is not
- * finished - Jean II */
- pr_debug("%s(), Discarding incomplete frame\n",
- __func__);
- stats->rx_errors++;
- stats->rx_missed_errors++;
- irda_device_set_media_busy(dev, TRUE);
- break;
-
- case OUTSIDE_FRAME:
- case BEGIN_FRAME:
- default:
- /* We may receive multiple BOF at the start of frame */
- break;
- }
-
- /* Now receiving frame */
- rx_buff->state = BEGIN_FRAME;
- rx_buff->in_frame = TRUE;
-
- /* Time to initialize receive buffer */
- rx_buff->data = rx_buff->head;
- rx_buff->len = 0;
- rx_buff->fcs = INIT_FCS;
-}
-
-/*
- * Function async_unwrap_eof(dev, byte)
- *
- * Handle End Of Frame character received within a frame
- *
- */
-static inline void
-async_unwrap_eof(struct net_device *dev,
- struct net_device_stats *stats,
- iobuff_t *rx_buff, __u8 byte)
-{
-#ifdef POSTPONE_RX_CRC
- int i;
-#endif
-
- switch(rx_buff->state) {
- case OUTSIDE_FRAME:
- /* Probably missed the BOF */
- stats->rx_errors++;
- stats->rx_missed_errors++;
- irda_device_set_media_busy(dev, TRUE);
- break;
-
- case BEGIN_FRAME:
- case LINK_ESCAPE:
- case INSIDE_FRAME:
- default:
- /* Note : in the case of BEGIN_FRAME and LINK_ESCAPE,
- * the fcs will most likely not match and generate an
- * error, as expected - Jean II */
- rx_buff->state = OUTSIDE_FRAME;
- rx_buff->in_frame = FALSE;
-
-#ifdef POSTPONE_RX_CRC
- /* If we haven't done the CRC as we receive bytes, we
- * must do it now... Jean II */
- for(i = 0; i < rx_buff->len; i++)
- rx_buff->fcs = irda_fcs(rx_buff->fcs,
- rx_buff->data[i]);
-#endif
-
- /* Test FCS and signal success if the frame is good */
- if (rx_buff->fcs == GOOD_FCS) {
- /* Deliver frame */
- async_bump(dev, stats, rx_buff);
- break;
- } else {
- /* Wrong CRC, discard frame! */
- irda_device_set_media_busy(dev, TRUE);
-
- pr_debug("%s(), crc error\n", __func__);
- stats->rx_errors++;
- stats->rx_crc_errors++;
- }
- break;
- }
-}
-
-/*
- * Function async_unwrap_ce(dev, byte)
- *
- * Handle Character Escape character received within a frame
- *
- */
-static inline void
-async_unwrap_ce(struct net_device *dev,
- struct net_device_stats *stats,
- iobuff_t *rx_buff, __u8 byte)
-{
- switch(rx_buff->state) {
- case OUTSIDE_FRAME:
- /* Activate carrier sense */
- irda_device_set_media_busy(dev, TRUE);
- break;
-
- case LINK_ESCAPE:
- net_warn_ratelimited("%s: state not defined\n", __func__);
- break;
-
- case BEGIN_FRAME:
- case INSIDE_FRAME:
- default:
- /* Stuffed byte coming */
- rx_buff->state = LINK_ESCAPE;
- break;
- }
-}
-
-/*
- * Function async_unwrap_other(dev, byte)
- *
- * Handle other characters received within a frame
- *
- */
-static inline void
-async_unwrap_other(struct net_device *dev,
- struct net_device_stats *stats,
- iobuff_t *rx_buff, __u8 byte)
-{
- switch(rx_buff->state) {
- /* This is on the critical path, case are ordered by
- * probability (most frequent first) - Jean II */
- case INSIDE_FRAME:
- /* Must be the next byte of the frame */
- if (rx_buff->len < rx_buff->truesize) {
- rx_buff->data[rx_buff->len++] = byte;
-#ifndef POSTPONE_RX_CRC
- rx_buff->fcs = irda_fcs(rx_buff->fcs, byte);
-#endif
- } else {
- pr_debug("%s(), Rx buffer overflow, aborting\n",
- __func__);
- rx_buff->state = OUTSIDE_FRAME;
- }
- break;
-
- case LINK_ESCAPE:
- /*
- * Stuffed char, complement bit 5 of byte
- * following CE, IrLAP p.114
- */
- byte ^= IRDA_TRANS;
- if (rx_buff->len < rx_buff->truesize) {
- rx_buff->data[rx_buff->len++] = byte;
-#ifndef POSTPONE_RX_CRC
- rx_buff->fcs = irda_fcs(rx_buff->fcs, byte);
-#endif
- rx_buff->state = INSIDE_FRAME;
- } else {
- pr_debug("%s(), Rx buffer overflow, aborting\n",
- __func__);
- rx_buff->state = OUTSIDE_FRAME;
- }
- break;
-
- case OUTSIDE_FRAME:
- /* Activate carrier sense */
- if(byte != XBOF)
- irda_device_set_media_busy(dev, TRUE);
- break;
-
- case BEGIN_FRAME:
- default:
- rx_buff->data[rx_buff->len++] = byte;
-#ifndef POSTPONE_RX_CRC
- rx_buff->fcs = irda_fcs(rx_buff->fcs, byte);
-#endif
- rx_buff->state = INSIDE_FRAME;
- break;
- }
-}
-
-/*
- * Function async_unwrap_char (dev, rx_buff, byte)
- *
- * Parse and de-stuff frame received from the IrDA-port
- *
- * This is the main entry point for SIR drivers.
- */
-void async_unwrap_char(struct net_device *dev,
- struct net_device_stats *stats,
- iobuff_t *rx_buff, __u8 byte)
-{
- switch(byte) {
- case CE:
- async_unwrap_ce(dev, stats, rx_buff, byte);
- break;
- case BOF:
- async_unwrap_bof(dev, stats, rx_buff, byte);
- break;
- case EOF:
- async_unwrap_eof(dev, stats, rx_buff, byte);
- break;
- default:
- async_unwrap_other(dev, stats, rx_buff, byte);
- break;
- }
-}
-EXPORT_SYMBOL(async_unwrap_char);
-
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index c343ac60bf50..bd5723315069 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/list.h>
@@ -155,8 +156,8 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
seq_printf(seq,
" psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ",
psock->index,
- psock->strp.stats.rx_msgs,
- psock->strp.stats.rx_bytes,
+ psock->strp.stats.msgs,
+ psock->strp.stats.bytes,
psock->stats.tx_msgs,
psock->stats.tx_bytes,
psock->sk->sk_receive_queue.qlen,
@@ -170,22 +171,22 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
if (psock->tx_stopped)
seq_puts(seq, "TxStop ");
- if (psock->strp.rx_stopped)
+ if (psock->strp.stopped)
seq_puts(seq, "RxStop ");
if (psock->tx_kcm)
seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index);
- if (!psock->strp.rx_paused && !psock->ready_rx_msg) {
+ if (!psock->strp.paused && !psock->ready_rx_msg) {
if (psock->sk->sk_receive_queue.qlen) {
- if (psock->strp.rx_need_bytes)
+ if (psock->strp.need_bytes)
seq_printf(seq, "RxWait=%u ",
- psock->strp.rx_need_bytes);
+ psock->strp.need_bytes);
else
seq_printf(seq, "RxWait ");
}
} else {
- if (psock->strp.rx_paused)
+ if (psock->strp.paused)
seq_puts(seq, "RxPause ");
if (psock->ready_rx_msg)
@@ -371,20 +372,20 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
seq_printf(seq,
"%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
"",
- strp_stats.rx_msgs,
- strp_stats.rx_bytes,
+ strp_stats.msgs,
+ strp_stats.bytes,
psock_stats.tx_msgs,
psock_stats.tx_bytes,
psock_stats.reserved,
psock_stats.unreserved,
- strp_stats.rx_aborts,
- strp_stats.rx_interrupted,
- strp_stats.rx_unrecov_intr,
- strp_stats.rx_mem_fail,
- strp_stats.rx_need_more_hdr,
- strp_stats.rx_bad_hdr_len,
- strp_stats.rx_msg_too_big,
- strp_stats.rx_msg_timeouts,
+ strp_stats.aborts,
+ strp_stats.interrupted,
+ strp_stats.unrecov_intr,
+ strp_stats.mem_fail,
+ strp_stats.need_more_hdr,
+ strp_stats.bad_hdr_len,
+ strp_stats.msg_too_big,
+ strp_stats.msg_timeouts,
psock_stats.tx_aborts);
return 0;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index da49191f7ad0..0b750a22c4b9 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -96,12 +96,12 @@ static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
struct kcm_psock *psock)
{
STRP_STATS_ADD(mux->stats.rx_bytes,
- psock->strp.stats.rx_bytes -
+ psock->strp.stats.bytes -
psock->saved_rx_bytes);
mux->stats.rx_msgs +=
- psock->strp.stats.rx_msgs - psock->saved_rx_msgs;
- psock->saved_rx_msgs = psock->strp.stats.rx_msgs;
- psock->saved_rx_bytes = psock->strp.stats.rx_bytes;
+ psock->strp.stats.msgs - psock->saved_rx_msgs;
+ psock->saved_rx_msgs = psock->strp.stats.msgs;
+ psock->saved_rx_bytes = psock->strp.stats.bytes;
}
static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
@@ -1118,7 +1118,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
struct kcm_sock *kcm = kcm_sk(sk);
int err = 0;
long timeo;
- struct strp_rx_msg *rxm;
+ struct strp_msg *stm;
int copied = 0;
struct sk_buff *skb;
@@ -1132,26 +1132,26 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
/* Okay, have a message on the receive queue */
- rxm = strp_rx_msg(skb);
+ stm = strp_msg(skb);
- if (len > rxm->full_len)
- len = rxm->full_len;
+ if (len > stm->full_len)
+ len = stm->full_len;
- err = skb_copy_datagram_msg(skb, rxm->offset, msg, len);
+ err = skb_copy_datagram_msg(skb, stm->offset, msg, len);
if (err < 0)
goto out;
copied = len;
if (likely(!(flags & MSG_PEEK))) {
KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
- if (copied < rxm->full_len) {
+ if (copied < stm->full_len) {
if (sock->type == SOCK_DGRAM) {
/* Truncated message */
msg->msg_flags |= MSG_TRUNC;
goto msg_finished;
}
- rxm->offset += copied;
- rxm->full_len -= copied;
+ stm->offset += copied;
+ stm->full_len -= copied;
} else {
msg_finished:
/* Finished with message */
@@ -1175,7 +1175,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
struct sock *sk = sock->sk;
struct kcm_sock *kcm = kcm_sk(sk);
long timeo;
- struct strp_rx_msg *rxm;
+ struct strp_msg *stm;
int err = 0;
ssize_t copied;
struct sk_buff *skb;
@@ -1192,12 +1192,12 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
/* Okay, have a message on the receive queue */
- rxm = strp_rx_msg(skb);
+ stm = strp_msg(skb);
- if (len > rxm->full_len)
- len = rxm->full_len;
+ if (len > stm->full_len)
+ len = stm->full_len;
- copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);
+ copied = skb_splice_bits(skb, sk, stm->offset, pipe, len, flags);
if (copied < 0) {
err = copied;
goto err_out;
@@ -1205,8 +1205,8 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
- rxm->offset += copied;
- rxm->full_len -= copied;
+ stm->offset += copied;
+ stm->full_len -= copied;
/* We have no way to return MSG_EOR. If all the bytes have been
* read we still leave the message in the receive socket buffer.
@@ -1376,13 +1376,21 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
struct kcm_psock *psock = NULL, *tpsock;
struct list_head *head;
int index = 0;
- struct strp_callbacks cb;
+ static const struct strp_callbacks cb = {
+ .rcv_msg = kcm_rcv_strparser,
+ .parse_msg = kcm_parse_func_strparser,
+ .read_sock_done = kcm_read_sock_done,
+ };
int err;
csk = csock->sk;
if (!csk)
return -EINVAL;
+ /* We must prevent loops or risk deadlock ! */
+ if (csk->sk_family == PF_KCM)
+ return -EOPNOTSUPP;
+
psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
if (!psock)
return -ENOMEM;
@@ -1391,11 +1399,6 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
psock->sk = csk;
psock->bpf_prog = prog;
- cb.rcv_msg = kcm_rcv_strparser;
- cb.abort_parser = NULL;
- cb.parse_msg = kcm_parse_func_strparser;
- cb.read_sock_done = kcm_read_sock_done;
-
err = strp_init(&psock->strp, csk, &cb);
if (err) {
kmem_cache_free(kcm_psockp, psock);
@@ -1647,7 +1650,7 @@ static int kcm_clone(struct socket *osock, struct kcm_clone *info,
}
newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
- if (unlikely(IS_ERR(newfile))) {
+ if (IS_ERR(newfile)) {
err = PTR_ERR(newfile);
goto out_sock_alloc_fail;
}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 98f4d8211b9a..3dffb892d52c 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2399,8 +2399,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
out:
xfrm_pol_put(xp);
- if (err == 0)
- xfrm_garbage_collect(net);
return err;
}
@@ -2651,8 +2649,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
out:
xfrm_pol_put(xp);
- if (delete && err == 0)
- xfrm_garbage_collect(net);
return err;
}
@@ -2752,8 +2748,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
int err, err2;
err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
- if (!err)
- xfrm_garbage_collect(net);
err2 = unicast_flush_resp(sk, hdr);
if (err || err2) {
if (err == -ESRCH) /* empty table - old silent behavior */
@@ -3851,7 +3845,7 @@ static void __net_exit pfkey_net_exit(struct net *net)
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
pfkey_exit_proc(net);
- BUG_ON(!hlist_empty(&net_pfkey->table));
+ WARN_ON(!hlist_empty(&net_pfkey->table));
}
static struct pernet_operations pfkey_net_ops = {
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index 2870f41ea44d..399a7e5db2f4 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the L2TP.
#
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index b0c2d4ae781d..115918ad8eca 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -100,8 +100,6 @@ struct l2tp_skb_cb {
#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
-static atomic_t l2tp_tunnel_count;
-static atomic_t l2tp_session_count;
static struct workqueue_struct *l2tp_wq;
/* per-net private data for this module */
@@ -113,7 +111,6 @@ struct l2tp_net {
spinlock_t l2tp_session_hlist_lock;
};
-static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
{
@@ -127,39 +124,6 @@ static inline struct l2tp_net *l2tp_pernet(const struct net *net)
return net_generic(net, l2tp_net_id);
}
-/* Tunnel reference counts. Incremented per session that is added to
- * the tunnel.
- */
-static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
-{
- refcount_inc(&tunnel->ref_count);
-}
-
-static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
-{
- if (refcount_dec_and_test(&tunnel->ref_count))
- l2tp_tunnel_free(tunnel);
-}
-#ifdef L2TP_REFCNT_DEBUG
-#define l2tp_tunnel_inc_refcount(_t) \
-do { \
- pr_debug("l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", \
- __func__, __LINE__, (_t)->name, \
- refcount_read(&_t->ref_count)); \
- l2tp_tunnel_inc_refcount_1(_t); \
-} while (0)
-#define l2tp_tunnel_dec_refcount(_t) \
-do { \
- pr_debug("l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", \
- __func__, __LINE__, (_t)->name, \
- refcount_read(&_t->ref_count)); \
- l2tp_tunnel_dec_refcount_1(_t); \
-} while (0)
-#else
-#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
-#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
-#endif
-
/* Session hash global list for L2TPv3.
* The session_id SHOULD be random according to RFC3931, but several
* L2TP implementations use incrementing session_ids. So we do a real
@@ -229,12 +193,31 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
}
-/* Lookup a session. A new reference is held on the returned session.
- * Optionally calls session->ref() too if do_ref is true.
- */
+/* Lookup a tunnel. A new reference is held on the returned tunnel. */
+struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
+{
+ const struct l2tp_net *pn = l2tp_pernet(net);
+ struct l2tp_tunnel *tunnel;
+
+ rcu_read_lock_bh();
+ list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
+ if (tunnel->tunnel_id == tunnel_id) {
+ l2tp_tunnel_inc_refcount(tunnel);
+ rcu_read_unlock_bh();
+
+ return tunnel;
+ }
+ }
+ rcu_read_unlock_bh();
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
+
+/* Lookup a session. A new reference is held on the returned session. */
struct l2tp_session *l2tp_session_get(const struct net *net,
struct l2tp_tunnel *tunnel,
- u32 session_id, bool do_ref)
+ u32 session_id)
{
struct hlist_head *session_list;
struct l2tp_session *session;
@@ -248,8 +231,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
hlist_for_each_entry_rcu(session, session_list, global_hlist) {
if (session->session_id == session_id) {
l2tp_session_inc_refcount(session);
- if (do_ref && session->ref)
- session->ref(session);
rcu_read_unlock_bh();
return session;
@@ -265,8 +246,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
hlist_for_each_entry(session, session_list, hlist) {
if (session->session_id == session_id) {
l2tp_session_inc_refcount(session);
- if (do_ref && session->ref)
- session->ref(session);
read_unlock_bh(&tunnel->hlist_lock);
return session;
@@ -278,8 +257,7 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
}
EXPORT_SYMBOL_GPL(l2tp_session_get);
-struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
- bool do_ref)
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth)
{
int hash;
struct l2tp_session *session;
@@ -290,8 +268,6 @@ struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) {
if (++count > nth) {
l2tp_session_inc_refcount(session);
- if (do_ref && session->ref)
- session->ref(session);
read_unlock_bh(&tunnel->hlist_lock);
return session;
}
@@ -308,8 +284,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
* This is very inefficient but is only used by management interfaces.
*/
struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
- const char *ifname,
- bool do_ref)
+ const char *ifname)
{
struct l2tp_net *pn = l2tp_pernet(net);
int hash;
@@ -320,8 +295,6 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
if (!strcmp(session->ifname, ifname)) {
l2tp_session_inc_refcount(session);
- if (do_ref && session->ref)
- session->ref(session);
rcu_read_unlock_bh();
return session;
@@ -335,20 +308,28 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
}
EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
-static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
- struct l2tp_session *session)
+int l2tp_session_register(struct l2tp_session *session,
+ struct l2tp_tunnel *tunnel)
{
struct l2tp_session *session_walk;
struct hlist_head *g_head;
struct hlist_head *head;
struct l2tp_net *pn;
+ int err;
head = l2tp_session_id_hash(tunnel, session->session_id);
write_lock_bh(&tunnel->hlist_lock);
+ if (!tunnel->acpt_newsess) {
+ err = -ENODEV;
+ goto err_tlock;
+ }
+
hlist_for_each_entry(session_walk, head, hlist)
- if (session_walk->session_id == session->session_id)
- goto exist;
+ if (session_walk->session_id == session->session_id) {
+ err = -EEXIST;
+ goto err_tlock;
+ }
if (tunnel->version == L2TP_HDR_VER_3) {
pn = l2tp_pernet(tunnel->l2tp_net);
@@ -356,12 +337,21 @@ static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
session->session_id);
spin_lock_bh(&pn->l2tp_session_hlist_lock);
+
hlist_for_each_entry(session_walk, g_head, global_hlist)
- if (session_walk->session_id == session->session_id)
- goto exist_glob;
+ if (session_walk->session_id == session->session_id) {
+ err = -EEXIST;
+ goto err_tlock_pnlock;
+ }
+ l2tp_tunnel_inc_refcount(tunnel);
+ sock_hold(tunnel->sock);
hlist_add_head_rcu(&session->global_hlist, g_head);
+
spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+ } else {
+ l2tp_tunnel_inc_refcount(tunnel);
+ sock_hold(tunnel->sock);
}
hlist_add_head(&session->hlist, head);
@@ -369,13 +359,14 @@ static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
return 0;
-exist_glob:
+err_tlock_pnlock:
spin_unlock_bh(&pn->l2tp_session_hlist_lock);
-exist:
+err_tlock:
write_unlock_bh(&tunnel->hlist_lock);
- return -EEXIST;
+ return err;
}
+EXPORT_SYMBOL_GPL(l2tp_session_register);
/* Lookup a tunnel by id
*/
@@ -480,9 +471,6 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
(*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length);
else
kfree_skb(skb);
-
- if (session->deref)
- (*session->deref)(session);
}
/* Dequeue skbs from the session's reorder_q, subject to packet order.
@@ -511,8 +499,6 @@ start:
session->reorder_skip = 1;
__skb_unlink(skb, &session->reorder_q);
kfree_skb(skb);
- if (session->deref)
- (*session->deref)(session);
continue;
}
@@ -685,9 +671,6 @@ discard:
* a data (not control) frame before coming here. Fields up to the
* session-id have already been parsed and ptr points to the data
* after the session-id.
- *
- * session->ref() must have been called prior to l2tp_recv_common().
- * session->deref() will be called automatically after skb is processed.
*/
void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
unsigned char *ptr, unsigned char *optr, u16 hdrflags,
@@ -854,9 +837,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
discard:
atomic_long_inc(&session->stats.rx_errors);
kfree_skb(skb);
-
- if (session->deref)
- (*session->deref)(session);
}
EXPORT_SYMBOL(l2tp_recv_common);
@@ -870,8 +850,6 @@ int l2tp_session_queue_purge(struct l2tp_session *session)
while ((skb = skb_dequeue(&session->reorder_q))) {
atomic_long_inc(&session->stats.rx_errors);
kfree_skb(skb);
- if (session->deref)
- (*session->deref)(session);
}
return 0;
}
@@ -963,13 +941,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
}
/* Find the session context */
- session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true);
+ session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id);
if (!session || !session->recv_skb) {
- if (session) {
- if (session->deref)
- session->deref(session);
+ if (session)
l2tp_session_dec_refcount(session);
- }
/* Not found? Pass to userspace to deal with */
l2tp_info(tunnel, L2TP_MSG_DATA,
@@ -1264,16 +1239,14 @@ static void l2tp_tunnel_destruct(struct sock *sk)
/* Remove hooks into tunnel socket */
sk->sk_destruct = tunnel->old_sk_destruct;
sk->sk_user_data = NULL;
- tunnel->sock = NULL;
/* Remove the tunnel struct from the tunnel list */
pn = l2tp_pernet(tunnel->l2tp_net);
spin_lock_bh(&pn->l2tp_tunnel_list_lock);
list_del_rcu(&tunnel->list);
spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
- atomic_dec(&l2tp_tunnel_count);
- l2tp_tunnel_closeall(tunnel);
+ tunnel->sock = NULL;
l2tp_tunnel_dec_refcount(tunnel);
/* Call the original destructor */
@@ -1298,6 +1271,7 @@ void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
tunnel->name);
write_lock_bh(&tunnel->hlist_lock);
+ tunnel->acpt_newsess = false;
for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
again:
hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
@@ -1308,8 +1282,8 @@ again:
hlist_del_init(&session->hlist);
- if (session->ref != NULL)
- (*session->ref)(session);
+ if (test_and_set_bit(0, &session->dead))
+ goto again;
write_unlock_bh(&tunnel->hlist_lock);
@@ -1319,9 +1293,6 @@ again:
if (session->session_close != NULL)
(*session->session_close)(session);
- if (session->deref != NULL)
- (*session->deref)(session);
-
l2tp_session_dec_refcount(session);
write_lock_bh(&tunnel->hlist_lock);
@@ -1348,17 +1319,6 @@ static void l2tp_udp_encap_destroy(struct sock *sk)
}
}
-/* Really kill the tunnel.
- * Come here only when all sessions have been cleared from the tunnel.
- */
-static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
-{
- BUG_ON(refcount_read(&tunnel->ref_count) != 0);
- BUG_ON(tunnel->sock != NULL);
- l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: free...\n", tunnel->name);
- kfree_rcu(tunnel, rcu);
-}
-
/* Workqueue tunnel deletion function */
static void l2tp_tunnel_del_work(struct work_struct *work)
{
@@ -1605,6 +1565,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
tunnel->magic = L2TP_TUNNEL_MAGIC;
sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
rwlock_init(&tunnel->hlist_lock);
+ tunnel->acpt_newsess = true;
/* The net we belong to */
tunnel->l2tp_net = net;
@@ -1662,7 +1623,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
/* Add tunnel to our list */
INIT_LIST_HEAD(&tunnel->list);
- atomic_inc(&l2tp_tunnel_count);
/* Bump the reference count. The tunnel context is deleted
* only when this drops to zero. Must be done before list insertion
@@ -1689,14 +1649,12 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
/* This function is used by the netlink TUNNEL_DELETE command.
*/
-int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
+void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
{
- l2tp_tunnel_inc_refcount(tunnel);
- if (false == queue_work(l2tp_wq, &tunnel->del_work)) {
- l2tp_tunnel_dec_refcount(tunnel);
- return 1;
+ if (!test_and_set_bit(0, &tunnel->dead)) {
+ l2tp_tunnel_inc_refcount(tunnel);
+ queue_work(l2tp_wq, &tunnel->del_work);
}
- return 0;
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
@@ -1710,8 +1668,6 @@ void l2tp_session_free(struct l2tp_session *session)
if (tunnel) {
BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
- if (session->session_id != 0)
- atomic_dec(&l2tp_session_count);
sock_put(tunnel->sock);
session->tunnel = NULL;
l2tp_tunnel_dec_refcount(tunnel);
@@ -1754,15 +1710,16 @@ EXPORT_SYMBOL_GPL(__l2tp_session_unhash);
*/
int l2tp_session_delete(struct l2tp_session *session)
{
- if (session->ref)
- (*session->ref)(session);
+ if (test_and_set_bit(0, &session->dead))
+ return 0;
+
__l2tp_session_unhash(session);
l2tp_session_queue_purge(session);
if (session->session_close != NULL)
(*session->session_close)(session);
- if (session->deref)
- (*session->deref)(session);
+
l2tp_session_dec_refcount(session);
+
return 0;
}
EXPORT_SYMBOL_GPL(l2tp_session_delete);
@@ -1788,7 +1745,6 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
struct l2tp_session *session;
- int err;
session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
if (session != NULL) {
@@ -1844,25 +1800,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
l2tp_session_set_header_len(session, tunnel->version);
- err = l2tp_session_add_to_tunnel(tunnel, session);
- if (err) {
- kfree(session);
-
- return ERR_PTR(err);
- }
-
- /* Bump the reference count. The session context is deleted
- * only when this drops to zero.
- */
refcount_set(&session->ref_count, 1);
- l2tp_tunnel_inc_refcount(tunnel);
-
- /* Ensure tunnel socket isn't deleted */
- sock_hold(tunnel->sock);
-
- /* Ignore management session in session count value */
- if (session->session_id != 0)
- atomic_inc(&l2tp_session_count);
return session;
}
@@ -1895,15 +1833,19 @@ static __net_exit void l2tp_exit_net(struct net *net)
{
struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_tunnel *tunnel = NULL;
+ int hash;
rcu_read_lock_bh();
list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- (void)l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_delete(tunnel);
}
rcu_read_unlock_bh();
flush_workqueue(l2tp_wq);
rcu_barrier();
+
+ for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
+ WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash]));
}
static struct pernet_operations l2tp_net_ops = {
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index cdb6e3327f74..9534e16965cc 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -76,6 +76,7 @@ struct l2tp_session_cfg {
struct l2tp_session {
int magic; /* should be
* L2TP_SESSION_MAGIC */
+ long dead;
struct l2tp_tunnel *tunnel; /* back pointer to tunnel
* context */
@@ -128,8 +129,6 @@ struct l2tp_session {
int (*build_header)(struct l2tp_session *session, void *buf);
void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
void (*session_close)(struct l2tp_session *session);
- void (*ref)(struct l2tp_session *session);
- void (*deref)(struct l2tp_session *session);
#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
void (*show)(struct seq_file *m, void *priv);
#endif
@@ -160,8 +159,15 @@ struct l2tp_tunnel_cfg {
struct l2tp_tunnel {
int magic; /* Should be L2TP_TUNNEL_MAGIC */
+
+ unsigned long dead;
+
struct rcu_head rcu;
rwlock_t hlist_lock; /* protect session_hlist */
+ bool acpt_newsess; /* Indicates whether this
+ * tunnel accepts new sessions.
+ * Protected by hlist_lock.
+ */
struct hlist_head session_hlist[L2TP_HASH_SIZE];
/* hashed list of sessions,
* hashed by id */
@@ -197,7 +203,9 @@ struct l2tp_tunnel {
};
struct l2tp_nl_cmd_ops {
- int (*session_create)(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+ int (*session_create)(struct net *net, struct l2tp_tunnel *tunnel,
+ u32 session_id, u32 peer_session_id,
+ struct l2tp_session_cfg *cfg);
int (*session_delete)(struct l2tp_session *session);
};
@@ -231,14 +239,14 @@ out:
return tunnel;
}
+struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
+
struct l2tp_session *l2tp_session_get(const struct net *net,
struct l2tp_tunnel *tunnel,
- u32 session_id, bool do_ref);
-struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
- bool do_ref);
+ u32 session_id);
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth);
struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
- const char *ifname,
- bool do_ref);
+ const char *ifname);
struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id);
struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth);
@@ -246,11 +254,14 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
struct l2tp_tunnel **tunnelp);
void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
-int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
+void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
struct l2tp_session *l2tp_session_create(int priv_size,
struct l2tp_tunnel *tunnel,
u32 session_id, u32 peer_session_id,
struct l2tp_session_cfg *cfg);
+int l2tp_session_register(struct l2tp_session *session,
+ struct l2tp_tunnel *tunnel);
+
void __l2tp_session_unhash(struct l2tp_session *session);
int l2tp_session_delete(struct l2tp_session *session);
void l2tp_session_free(struct l2tp_session *session);
@@ -269,40 +280,31 @@ int l2tp_nl_register_ops(enum l2tp_pwtype pw_type,
void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+static inline void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel)
+{
+ refcount_inc(&tunnel->ref_count);
+}
+
+static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
+{
+ if (refcount_dec_and_test(&tunnel->ref_count))
+ kfree_rcu(tunnel, rcu);
+}
+
/* Session reference counts. Incremented when code obtains a reference
* to a session.
*/
-static inline void l2tp_session_inc_refcount_1(struct l2tp_session *session)
+static inline void l2tp_session_inc_refcount(struct l2tp_session *session)
{
refcount_inc(&session->ref_count);
}
-static inline void l2tp_session_dec_refcount_1(struct l2tp_session *session)
+static inline void l2tp_session_dec_refcount(struct l2tp_session *session)
{
if (refcount_dec_and_test(&session->ref_count))
l2tp_session_free(session);
}
-#ifdef L2TP_REFCNT_DEBUG
-#define l2tp_session_inc_refcount(_s) \
-do { \
- pr_debug("l2tp_session_inc_refcount: %s:%d %s: cnt=%d\n", \
- __func__, __LINE__, (_s)->name, \
- refcount_read(&_s->ref_count)); \
- l2tp_session_inc_refcount_1(_s); \
-} while (0)
-#define l2tp_session_dec_refcount(_s) \
-do { \
- pr_debug("l2tp_session_dec_refcount: %s:%d %s: cnt=%d\n", \
- __func__, __LINE__, (_s)->name, \
- refcount_read(&_s->ref_count)); \
- l2tp_session_dec_refcount_1(_s); \
-} while (0)
-#else
-#define l2tp_session_inc_refcount(s) l2tp_session_inc_refcount_1(s)
-#define l2tp_session_dec_refcount(s) l2tp_session_dec_refcount_1(s)
-#endif
-
#define l2tp_printk(ptr, type, func, fmt, ...) \
do { \
if (((ptr)->debug) & (type)) \
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 53bae54c4d6e..eb69411bcb47 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
{
- pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
+ pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
pd->session_idx++;
if (pd->session == NULL) {
@@ -241,8 +241,6 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
} else {
l2tp_dfs_seq_session_show(m, pd->session);
- if (pd->session->deref)
- pd->session->deref(pd->session);
l2tp_session_dec_refcount(pd->session);
}
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 4de2ec94b08c..5c366ecfa1cb 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -41,10 +41,7 @@
/* via netdev_priv() */
struct l2tp_eth {
- struct net_device *dev;
- struct sock *tunnel_sock;
struct l2tp_session *session;
- struct list_head list;
atomic_long_t tx_bytes;
atomic_long_t tx_packets;
atomic_long_t tx_dropped;
@@ -55,26 +52,12 @@ struct l2tp_eth {
/* via l2tp_session_priv() */
struct l2tp_eth_sess {
- struct net_device *dev;
+ struct net_device __rcu *dev;
};
-/* per-net private data for this module */
-static unsigned int l2tp_eth_net_id;
-struct l2tp_eth_net {
- struct list_head l2tp_eth_dev_list;
- spinlock_t l2tp_eth_lock;
-};
-
-static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
-{
- return net_generic(net, l2tp_eth_net_id);
-}
static int l2tp_eth_dev_init(struct net_device *dev)
{
- struct l2tp_eth *priv = netdev_priv(dev);
-
- priv->dev = dev;
eth_hw_addr_random(dev);
eth_broadcast_addr(dev->broadcast);
netdev_lockdep_set_classes(dev);
@@ -85,12 +68,13 @@ static int l2tp_eth_dev_init(struct net_device *dev)
static void l2tp_eth_dev_uninit(struct net_device *dev)
{
struct l2tp_eth *priv = netdev_priv(dev);
- struct l2tp_eth_net *pn = l2tp_eth_pernet(dev_net(dev));
+ struct l2tp_eth_sess *spriv;
- spin_lock(&pn->l2tp_eth_lock);
- list_del_init(&priv->list);
- spin_unlock(&pn->l2tp_eth_lock);
- dev_put(dev);
+ spriv = l2tp_session_priv(priv->session);
+ RCU_INIT_POINTER(spriv->dev, NULL);
+ /* No need for synchronize_net() here. We're called by
+ * unregister_netdev*(), which does the synchronisation for us.
+ */
}
static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -148,8 +132,8 @@ static void l2tp_eth_dev_setup(struct net_device *dev)
static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
{
struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
- struct net_device *dev = spriv->dev;
- struct l2tp_eth *priv = netdev_priv(dev);
+ struct net_device *dev;
+ struct l2tp_eth *priv;
if (session->debug & L2TP_MSG_DATA) {
unsigned int length;
@@ -173,16 +157,25 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
skb_dst_drop(skb);
nf_reset(skb);
+ rcu_read_lock();
+ dev = rcu_dereference(spriv->dev);
+ if (!dev)
+ goto error_rcu;
+
+ priv = netdev_priv(dev);
if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
atomic_long_inc(&priv->rx_packets);
atomic_long_add(data_len, &priv->rx_bytes);
} else {
atomic_long_inc(&priv->rx_errors);
}
+ rcu_read_unlock();
+
return;
+error_rcu:
+ rcu_read_unlock();
error:
- atomic_long_inc(&priv->rx_errors);
kfree_skb(skb);
}
@@ -193,11 +186,15 @@ static void l2tp_eth_delete(struct l2tp_session *session)
if (session) {
spriv = l2tp_session_priv(session);
- dev = spriv->dev;
+
+ rtnl_lock();
+ dev = rtnl_dereference(spriv->dev);
if (dev) {
- unregister_netdev(dev);
- spriv->dev = NULL;
+ unregister_netdevice(dev);
+ rtnl_unlock();
module_put(THIS_MODULE);
+ } else {
+ rtnl_unlock();
}
}
}
@@ -207,9 +204,20 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
{
struct l2tp_session *session = arg;
struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
- struct net_device *dev = spriv->dev;
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = rcu_dereference(spriv->dev);
+ if (!dev) {
+ rcu_read_unlock();
+ return;
+ }
+ dev_hold(dev);
+ rcu_read_unlock();
seq_printf(m, " interface %s\n", dev->name);
+
+ dev_put(dev);
}
#endif
@@ -262,23 +270,17 @@ static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
dev->needed_headroom += session->hdr_len;
}
-static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
+ u32 session_id, u32 peer_session_id,
+ struct l2tp_session_cfg *cfg)
{
unsigned char name_assign_type;
struct net_device *dev;
char name[IFNAMSIZ];
- struct l2tp_tunnel *tunnel;
struct l2tp_session *session;
struct l2tp_eth *priv;
struct l2tp_eth_sess *spriv;
int rc;
- struct l2tp_eth_net *pn;
-
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (!tunnel) {
- rc = -ENODEV;
- goto out;
- }
if (cfg->ifname) {
strlcpy(name, cfg->ifname, IFNAMSIZ);
@@ -292,14 +294,14 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
peer_session_id, cfg);
if (IS_ERR(session)) {
rc = PTR_ERR(session);
- goto out;
+ goto err;
}
dev = alloc_netdev(sizeof(*priv), name, name_assign_type,
l2tp_eth_dev_setup);
if (!dev) {
rc = -ENOMEM;
- goto out_del_session;
+ goto err_sess;
}
dev_net_set(dev, net);
@@ -308,11 +310,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
l2tp_eth_adjust_mtu(tunnel, session, dev);
priv = netdev_priv(dev);
- priv->dev = dev;
priv->session = session;
- INIT_LIST_HEAD(&priv->list);
- priv->tunnel_sock = tunnel->sock;
session->recv_skb = l2tp_eth_dev_recv;
session->session_close = l2tp_eth_delete;
#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
@@ -320,48 +319,50 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
#endif
spriv = l2tp_session_priv(session);
- spriv->dev = dev;
- rc = register_netdev(dev);
- if (rc < 0)
- goto out_del_dev;
+ l2tp_session_inc_refcount(session);
- __module_get(THIS_MODULE);
- /* Must be done after register_netdev() */
- strlcpy(session->ifname, dev->name, IFNAMSIZ);
+ rtnl_lock();
- dev_hold(dev);
- pn = l2tp_eth_pernet(dev_net(dev));
- spin_lock(&pn->l2tp_eth_lock);
- list_add(&priv->list, &pn->l2tp_eth_dev_list);
- spin_unlock(&pn->l2tp_eth_lock);
+ /* Register both device and session while holding the rtnl lock. This
+ * ensures that l2tp_eth_delete() will see that there's a device to
+ * unregister, even if it happened to run before we assign spriv->dev.
+ */
+ rc = l2tp_session_register(session, tunnel);
+ if (rc < 0) {
+ rtnl_unlock();
+ goto err_sess_dev;
+ }
- return 0;
+ rc = register_netdevice(dev);
+ if (rc < 0) {
+ rtnl_unlock();
+ l2tp_session_delete(session);
+ l2tp_session_dec_refcount(session);
+ free_netdev(dev);
-out_del_dev:
- free_netdev(dev);
- spriv->dev = NULL;
-out_del_session:
- l2tp_session_delete(session);
-out:
- return rc;
-}
+ return rc;
+ }
-static __net_init int l2tp_eth_init_net(struct net *net)
-{
- struct l2tp_eth_net *pn = net_generic(net, l2tp_eth_net_id);
+ strlcpy(session->ifname, dev->name, IFNAMSIZ);
+ rcu_assign_pointer(spriv->dev, dev);
+
+ rtnl_unlock();
+
+ l2tp_session_dec_refcount(session);
- INIT_LIST_HEAD(&pn->l2tp_eth_dev_list);
- spin_lock_init(&pn->l2tp_eth_lock);
+ __module_get(THIS_MODULE);
return 0;
-}
-static struct pernet_operations l2tp_eth_net_ops = {
- .init = l2tp_eth_init_net,
- .id = &l2tp_eth_net_id,
- .size = sizeof(struct l2tp_eth_net),
-};
+err_sess_dev:
+ l2tp_session_dec_refcount(session);
+ free_netdev(dev);
+err_sess:
+ kfree(session);
+err:
+ return rc;
+}
static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = {
@@ -376,25 +377,18 @@ static int __init l2tp_eth_init(void)
err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops);
if (err)
- goto out;
-
- err = register_pernet_device(&l2tp_eth_net_ops);
- if (err)
- goto out_unreg;
+ goto err;
pr_info("L2TP ethernet pseudowire support (L2TPv3)\n");
return 0;
-out_unreg:
- l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
-out:
+err:
return err;
}
static void __exit l2tp_eth_exit(void)
{
- unregister_pernet_device(&l2tp_eth_net_ops);
l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
}
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 4d322c1b7233..ff61124fdf59 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -123,6 +123,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
unsigned char *ptr, *optr;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel = NULL;
+ struct iphdr *iph;
int length;
if (!pskb_may_pull(skb, 4))
@@ -143,7 +144,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
}
/* Ok, this is a data packet. Lookup the session. */
- session = l2tp_session_get(net, NULL, session_id, true);
+ session = l2tp_session_get(net, NULL, session_id);
if (!session)
goto discard;
@@ -178,24 +179,17 @@ pass_up:
goto discard;
tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel) {
- sk = tunnel->sock;
- sock_hold(sk);
- } else {
- struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
-
- read_lock_bh(&l2tp_ip_lock);
- sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr,
- inet_iif(skb), tunnel_id);
- if (!sk) {
- read_unlock_bh(&l2tp_ip_lock);
- goto discard;
- }
+ iph = (struct iphdr *)skb_network_header(skb);
- sock_hold(sk);
+ read_lock_bh(&l2tp_ip_lock);
+ sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb),
+ tunnel_id);
+ if (!sk) {
read_unlock_bh(&l2tp_ip_lock);
+ goto discard;
}
+ sock_hold(sk);
+ read_unlock_bh(&l2tp_ip_lock);
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
@@ -205,8 +199,6 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
discard_sess:
- if (session->deref)
- session->deref(session);
l2tp_session_dec_refcount(session);
goto discard;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 88b397c30d86..192344688c06 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -136,6 +136,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
unsigned char *ptr, *optr;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel = NULL;
+ struct ipv6hdr *iph;
int length;
if (!pskb_may_pull(skb, 4))
@@ -156,7 +157,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
}
/* Ok, this is a data packet. Lookup the session. */
- session = l2tp_session_get(net, NULL, session_id, true);
+ session = l2tp_session_get(net, NULL, session_id);
if (!session)
goto discard;
@@ -192,24 +193,17 @@ pass_up:
goto discard;
tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel) {
- sk = tunnel->sock;
- sock_hold(sk);
- } else {
- struct ipv6hdr *iph = ipv6_hdr(skb);
-
- read_lock_bh(&l2tp_ip6_lock);
- sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
- inet6_iif(skb), tunnel_id);
- if (!sk) {
- read_unlock_bh(&l2tp_ip6_lock);
- goto discard;
- }
+ iph = ipv6_hdr(skb);
- sock_hold(sk);
+ read_lock_bh(&l2tp_ip6_lock);
+ sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
+ inet6_iif(skb), tunnel_id);
+ if (!sk) {
read_unlock_bh(&l2tp_ip6_lock);
+ goto discard;
}
+ sock_hold(sk);
+ read_unlock_bh(&l2tp_ip6_lock);
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
@@ -219,8 +213,6 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
discard_sess:
- if (session->deref)
- session->deref(session);
l2tp_session_dec_refcount(session);
goto discard;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 12cfcd0ca807..a1f24fb2be98 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -48,8 +48,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq,
/* Accessed under genl lock */
static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
-static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
- bool do_ref)
+static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info)
{
u32 tunnel_id;
u32 session_id;
@@ -60,15 +59,16 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
if (info->attrs[L2TP_ATTR_IFNAME]) {
ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
- session = l2tp_session_get_by_ifname(net, ifname, do_ref);
+ session = l2tp_session_get_by_ifname(net, ifname);
} else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
(info->attrs[L2TP_ATTR_CONN_ID])) {
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel)
- session = l2tp_session_get(net, tunnel, session_id,
- do_ref);
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
+ if (tunnel) {
+ session = l2tp_session_get(net, tunnel, session_id);
+ l2tp_tunnel_dec_refcount(tunnel);
+ }
}
return session;
@@ -271,8 +271,8 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
}
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel == NULL) {
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
+ if (!tunnel) {
ret = -ENODEV;
goto out;
}
@@ -280,7 +280,9 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
l2tp_tunnel_notify(&l2tp_nl_family, info,
tunnel, L2TP_CMD_TUNNEL_DELETE);
- (void) l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_delete(tunnel);
+
+ l2tp_tunnel_dec_refcount(tunnel);
out:
return ret;
@@ -299,8 +301,8 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
}
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel == NULL) {
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
+ if (!tunnel) {
ret = -ENODEV;
goto out;
}
@@ -311,6 +313,8 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
tunnel, L2TP_CMD_TUNNEL_MODIFY);
+ l2tp_tunnel_dec_refcount(tunnel);
+
out:
return ret;
}
@@ -400,7 +404,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
goto nla_put_failure;
- /* NOBREAK */
+ /* fall through */
case L2TP_ENCAPTYPE_IP:
#if IS_ENABLED(CONFIG_IPV6)
if (np) {
@@ -438,34 +442,37 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[L2TP_ATTR_CONN_ID]) {
ret = -EINVAL;
- goto out;
+ goto err;
}
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel == NULL) {
- ret = -ENODEV;
- goto out;
- }
-
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
- goto out;
+ goto err;
+ }
+
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
+ if (!tunnel) {
+ ret = -ENODEV;
+ goto err_nlmsg;
}
ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
NLM_F_ACK, tunnel, L2TP_CMD_TUNNEL_GET);
if (ret < 0)
- goto err_out;
+ goto err_nlmsg_tunnel;
+
+ l2tp_tunnel_dec_refcount(tunnel);
return genlmsg_unicast(net, msg, info->snd_portid);
-err_out:
+err_nlmsg_tunnel:
+ l2tp_tunnel_dec_refcount(tunnel);
+err_nlmsg:
nlmsg_free(msg);
-
-out:
+err:
return ret;
}
@@ -509,8 +516,9 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
ret = -EINVAL;
goto out;
}
+
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
if (!tunnel) {
ret = -ENODEV;
goto out;
@@ -518,24 +526,24 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
if (!info->attrs[L2TP_ATTR_SESSION_ID]) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
peer_session_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_SESSION_ID]);
if (!info->attrs[L2TP_ATTR_PW_TYPE]) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
cfg.pw_type = nla_get_u16(info->attrs[L2TP_ATTR_PW_TYPE]);
if (cfg.pw_type >= __L2TP_PWTYPE_MAX) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
if (tunnel->version > 2) {
@@ -557,7 +565,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]);
if (len > 8) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
cfg.cookie_len = len;
memcpy(&cfg.cookie[0], nla_data(info->attrs[L2TP_ATTR_COOKIE]), len);
@@ -566,7 +574,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]);
if (len > 8) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
cfg.peer_cookie_len = len;
memcpy(&cfg.peer_cookie[0], nla_data(info->attrs[L2TP_ATTR_PEER_COOKIE]), len);
@@ -609,7 +617,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
(l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
ret = -EPROTONOSUPPORT;
- goto out;
+ goto out_tunnel;
}
/* Check that pseudowire-specific params are present */
@@ -619,7 +627,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
case L2TP_PWTYPE_ETH_VLAN:
if (!info->attrs[L2TP_ATTR_VLAN_ID]) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
break;
case L2TP_PWTYPE_ETH:
@@ -633,13 +641,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
break;
}
- ret = -EPROTONOSUPPORT;
- if (l2tp_nl_cmd_ops[cfg.pw_type]->session_create)
- ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id,
- session_id, peer_session_id, &cfg);
+ ret = l2tp_nl_cmd_ops[cfg.pw_type]->session_create(net, tunnel,
+ session_id,
+ peer_session_id,
+ &cfg);
if (ret >= 0) {
- session = l2tp_session_get(net, tunnel, session_id, false);
+ session = l2tp_session_get(net, tunnel, session_id);
if (session) {
ret = l2tp_session_notify(&l2tp_nl_family, info, session,
L2TP_CMD_SESSION_CREATE);
@@ -647,6 +655,8 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
}
}
+out_tunnel:
+ l2tp_tunnel_dec_refcount(tunnel);
out:
return ret;
}
@@ -657,7 +667,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
struct l2tp_session *session;
u16 pw_type;
- session = l2tp_nl_session_get(info, true);
+ session = l2tp_nl_session_get(info);
if (session == NULL) {
ret = -ENODEV;
goto out;
@@ -671,8 +681,6 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
- if (session->deref)
- session->deref(session);
l2tp_session_dec_refcount(session);
out:
@@ -684,7 +692,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
int ret = 0;
struct l2tp_session *session;
- session = l2tp_nl_session_get(info, false);
+ session = l2tp_nl_session_get(info);
if (session == NULL) {
ret = -ENODEV;
goto out;
@@ -816,7 +824,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg;
int ret;
- session = l2tp_nl_session_get(info, false);
+ session = l2tp_nl_session_get(info);
if (session == NULL) {
ret = -ENODEV;
goto err;
@@ -862,7 +870,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
goto out;
}
- session = l2tp_session_get_nth(tunnel, si, false);
+ session = l2tp_session_get_nth(tunnel, si);
if (session == NULL) {
ti++;
tunnel = NULL;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index f0edb7209079..b412fc3351dc 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -122,10 +122,11 @@
struct pppol2tp_session {
int owner; /* pid that opened the socket */
- struct sock *sock; /* Pointer to the session
+ struct mutex sk_lock; /* Protects .sk */
+ struct sock __rcu *sk; /* Pointer to the session
* PPPoX socket */
- struct sock *tunnel_sock; /* Pointer to the tunnel UDP
- * socket */
+ struct sock *__sk; /* Copy of .sk, for cleanup */
+ struct rcu_head rcu; /* For asynchronous release */
int flags; /* accessed by PPPIOCGFLAGS.
* Unused. */
};
@@ -138,6 +139,24 @@ static const struct ppp_channel_ops pppol2tp_chan_ops = {
static const struct proto_ops pppol2tp_ops;
+/* Retrieves the pppol2tp socket associated to a session.
+ * A reference is held on the returned socket, so this function must be paired
+ * with sock_put().
+ */
+static struct sock *pppol2tp_session_get_sock(struct l2tp_session *session)
+{
+ struct pppol2tp_session *ps = l2tp_session_priv(session);
+ struct sock *sk;
+
+ rcu_read_lock();
+ sk = rcu_dereference(ps->sk);
+ if (sk)
+ sock_hold(sk);
+ rcu_read_unlock();
+
+ return sk;
+}
+
/* Helpers to obtain tunnel/session contexts from sockets.
*/
static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
@@ -224,7 +243,8 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
/* If the socket is bound, send it in to PPP's input queue. Otherwise
* queue it on the session socket.
*/
- sk = ps->sock;
+ rcu_read_lock();
+ sk = rcu_dereference(ps->sk);
if (sk == NULL)
goto no_sock;
@@ -247,30 +267,16 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
kfree_skb(skb);
}
}
+ rcu_read_unlock();
return;
no_sock:
+ rcu_read_unlock();
l2tp_info(session, L2TP_MSG_DATA, "%s: no socket\n", session->name);
kfree_skb(skb);
}
-static void pppol2tp_session_sock_hold(struct l2tp_session *session)
-{
- struct pppol2tp_session *ps = l2tp_session_priv(session);
-
- if (ps->sock)
- sock_hold(ps->sock);
-}
-
-static void pppol2tp_session_sock_put(struct l2tp_session *session)
-{
- struct pppol2tp_session *ps = l2tp_session_priv(session);
-
- if (ps->sock)
- sock_put(ps->sock);
-}
-
/************************************************************************
* Transmit handling
***********************************************************************/
@@ -287,7 +293,6 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
int error;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
- struct pppol2tp_session *ps;
int uhlen;
error = -ENOTCONN;
@@ -300,10 +305,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
if (session == NULL)
goto error;
- ps = l2tp_session_priv(session);
- tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
- if (tunnel == NULL)
- goto error_put_sess;
+ tunnel = session->tunnel;
uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
@@ -314,7 +316,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
0, GFP_KERNEL);
if (!skb)
- goto error_put_sess_tun;
+ goto error_put_sess;
/* Reserve space for headers. */
skb_reserve(skb, NET_SKB_PAD);
@@ -332,20 +334,17 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
error = memcpy_from_msg(skb_put(skb, total_len), m, total_len);
if (error < 0) {
kfree_skb(skb);
- goto error_put_sess_tun;
+ goto error_put_sess;
}
local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
local_bh_enable();
- sock_put(ps->tunnel_sock);
sock_put(sk);
return total_len;
-error_put_sess_tun:
- sock_put(ps->tunnel_sock);
error_put_sess:
sock_put(sk);
error:
@@ -369,10 +368,8 @@ error:
static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
{
struct sock *sk = (struct sock *) chan->private;
- struct sock *sk_tun;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
- struct pppol2tp_session *ps;
int uhlen, headroom;
if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
@@ -383,13 +380,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
if (session == NULL)
goto abort;
- ps = l2tp_session_priv(session);
- sk_tun = ps->tunnel_sock;
- if (sk_tun == NULL)
- goto abort_put_sess;
- tunnel = l2tp_sock_to_tunnel(sk_tun);
- if (tunnel == NULL)
- goto abort_put_sess;
+ tunnel = session->tunnel;
uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
headroom = NET_SKB_PAD +
@@ -398,7 +389,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
session->hdr_len + /* L2TP header */
2; /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
if (skb_cow_head(skb, headroom))
- goto abort_put_sess_tun;
+ goto abort_put_sess;
/* Setup PPP header */
__skb_push(skb, 2);
@@ -409,12 +400,10 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
l2tp_xmit_skb(session, skb, session->hdr_len);
local_bh_enable();
- sock_put(sk_tun);
sock_put(sk);
+
return 1;
-abort_put_sess_tun:
- sock_put(sk_tun);
abort_put_sess:
sock_put(sk);
abort:
@@ -431,16 +420,15 @@ abort:
*/
static void pppol2tp_session_close(struct l2tp_session *session)
{
- struct pppol2tp_session *ps = l2tp_session_priv(session);
- struct sock *sk = ps->sock;
- struct socket *sock = sk->sk_socket;
+ struct sock *sk;
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
- if (sock) {
- inet_shutdown(sock, SEND_SHUTDOWN);
- /* Don't let the session go away before our socket does */
- l2tp_session_inc_refcount(session);
+ sk = pppol2tp_session_get_sock(session);
+ if (sk) {
+ if (sk->sk_socket)
+ inet_shutdown(sk->sk_socket, SEND_SHUTDOWN);
+ sock_put(sk);
}
}
@@ -461,6 +449,14 @@ static void pppol2tp_session_destruct(struct sock *sk)
}
}
+static void pppol2tp_put_sk(struct rcu_head *head)
+{
+ struct pppol2tp_session *ps;
+
+ ps = container_of(head, typeof(*ps), rcu);
+ sock_put(ps->__sk);
+}
+
/* Called when the PPPoX socket (session) is closed.
*/
static int pppol2tp_release(struct socket *sock)
@@ -486,11 +482,23 @@ static int pppol2tp_release(struct socket *sock)
session = pppol2tp_sock_to_session(sk);
- /* Purge any queued data */
if (session != NULL) {
- __l2tp_session_unhash(session);
- l2tp_session_queue_purge(session);
- sock_put(sk);
+ struct pppol2tp_session *ps;
+
+ l2tp_session_delete(session);
+
+ ps = l2tp_session_priv(session);
+ mutex_lock(&ps->sk_lock);
+ ps->__sk = rcu_dereference_protected(ps->sk,
+ lockdep_is_held(&ps->sk_lock));
+ RCU_INIT_POINTER(ps->sk, NULL);
+ mutex_unlock(&ps->sk_lock);
+ call_rcu(&ps->rcu, pppol2tp_put_sk);
+
+ /* Rely on the sock_put() call at the end of the function for
+ * dropping the reference held by pppol2tp_sock_to_session().
+ * The last reference will be dropped by pppol2tp_put_sk().
+ */
}
release_sock(sk);
@@ -557,16 +565,46 @@ out:
static void pppol2tp_show(struct seq_file *m, void *arg)
{
struct l2tp_session *session = arg;
- struct pppol2tp_session *ps = l2tp_session_priv(session);
+ struct sock *sk;
+
+ sk = pppol2tp_session_get_sock(session);
+ if (sk) {
+ struct pppox_sock *po = pppox_sk(sk);
- if (ps) {
- struct pppox_sock *po = pppox_sk(ps->sock);
- if (po)
- seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
+ seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
+ sock_put(sk);
}
}
#endif
+static void pppol2tp_session_init(struct l2tp_session *session)
+{
+ struct pppol2tp_session *ps;
+ struct dst_entry *dst;
+
+ session->recv_skb = pppol2tp_recv;
+ session->session_close = pppol2tp_session_close;
+#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
+ session->show = pppol2tp_show;
+#endif
+
+ ps = l2tp_session_priv(session);
+ mutex_init(&ps->sk_lock);
+ ps->owner = current->pid;
+
+ /* If PMTU discovery was enabled, use the MTU that was discovered */
+ dst = sk_dst_get(session->tunnel->sock);
+ if (dst) {
+ u32 pmtu = dst_mtu(dst);
+
+ if (pmtu) {
+ session->mtu = pmtu - PPPOL2TP_HEADER_OVERHEAD;
+ session->mru = pmtu - PPPOL2TP_HEADER_OVERHEAD;
+ }
+ dst_release(dst);
+ }
+}
+
/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
*/
static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
@@ -578,12 +616,12 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
struct l2tp_session *session = NULL;
struct l2tp_tunnel *tunnel;
struct pppol2tp_session *ps;
- struct dst_entry *dst;
struct l2tp_session_cfg cfg = { 0, };
int error = 0;
u32 tunnel_id, peer_tunnel_id;
u32 session_id, peer_session_id;
bool drop_refcnt = false;
+ bool drop_tunnel = false;
int ver = 2;
int fd;
@@ -652,7 +690,9 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
if (tunnel_id == 0)
goto end;
- tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
+ tunnel = l2tp_tunnel_get(sock_net(sk), tunnel_id);
+ if (tunnel)
+ drop_tunnel = true;
/* Special case: create tunnel context if session_id and
* peer_session_id is 0. Otherwise look up tunnel using supplied
@@ -685,7 +725,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
if (tunnel->peer_tunnel_id == 0)
tunnel->peer_tunnel_id = peer_tunnel_id;
- session = l2tp_session_get(sock_net(sk), tunnel, session_id, false);
+ session = l2tp_session_get(sock_net(sk), tunnel, session_id);
if (session) {
drop_refcnt = true;
ps = l2tp_session_priv(session);
@@ -693,13 +733,10 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
/* Using a pre-existing session is fine as long as it hasn't
* been connected yet.
*/
- if (ps->sock) {
- error = -EEXIST;
- goto end;
- }
-
- /* consistency checks */
- if (ps->tunnel_sock != tunnel->sock) {
+ mutex_lock(&ps->sk_lock);
+ if (rcu_dereference_protected(ps->sk,
+ lockdep_is_held(&ps->sk_lock))) {
+ mutex_unlock(&ps->sk_lock);
error = -EEXIST;
goto end;
}
@@ -715,35 +752,19 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
error = PTR_ERR(session);
goto end;
}
- }
-
- /* Associate session with its PPPoL2TP socket */
- ps = l2tp_session_priv(session);
- ps->owner = current->pid;
- ps->sock = sk;
- ps->tunnel_sock = tunnel->sock;
-
- session->recv_skb = pppol2tp_recv;
- session->session_close = pppol2tp_session_close;
-#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
- session->show = pppol2tp_show;
-#endif
-
- /* We need to know each time a skb is dropped from the reorder
- * queue.
- */
- session->ref = pppol2tp_session_sock_hold;
- session->deref = pppol2tp_session_sock_put;
- /* If PMTU discovery was enabled, use the MTU that was discovered */
- dst = sk_dst_get(tunnel->sock);
- if (dst != NULL) {
- u32 pmtu = dst_mtu(dst);
+ pppol2tp_session_init(session);
+ ps = l2tp_session_priv(session);
+ l2tp_session_inc_refcount(session);
- if (pmtu != 0)
- session->mtu = session->mru = pmtu -
- PPPOL2TP_HEADER_OVERHEAD;
- dst_release(dst);
+ mutex_lock(&ps->sk_lock);
+ error = l2tp_session_register(session, tunnel);
+ if (error < 0) {
+ mutex_unlock(&ps->sk_lock);
+ kfree(session);
+ goto end;
+ }
+ drop_refcnt = true;
}
/* Special case: if source & dest session_id == 0x0000, this
@@ -768,12 +789,23 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
po->chan.mtu = session->mtu;
error = ppp_register_net_channel(sock_net(sk), &po->chan);
- if (error)
+ if (error) {
+ mutex_unlock(&ps->sk_lock);
goto end;
+ }
out_no_ppp:
/* This is how we get the session context from the socket. */
sk->sk_user_data = session;
+ rcu_assign_pointer(ps->sk, sk);
+ mutex_unlock(&ps->sk_lock);
+
+ /* Keep the reference we've grabbed on the session: sk doesn't expect
+ * the session to disappear. pppol2tp_session_destruct() is responsible
+ * for dropping it.
+ */
+ drop_refcnt = false;
+
sk->sk_state = PPPOX_CONNECTED;
l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n",
session->name);
@@ -781,6 +813,8 @@ out_no_ppp:
end:
if (drop_refcnt)
l2tp_session_dec_refcount(session);
+ if (drop_tunnel)
+ l2tp_tunnel_dec_refcount(tunnel);
release_sock(sk);
return error;
@@ -788,25 +822,19 @@ end:
#ifdef CONFIG_L2TP_V3
-/* Called when creating sessions via the netlink interface.
- */
-static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+/* Called when creating sessions via the netlink interface. */
+static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
+ u32 session_id, u32 peer_session_id,
+ struct l2tp_session_cfg *cfg)
{
int error;
- struct l2tp_tunnel *tunnel;
struct l2tp_session *session;
- struct pppol2tp_session *ps;
-
- tunnel = l2tp_tunnel_find(net, tunnel_id);
-
- /* Error if we can't find the tunnel */
- error = -ENOENT;
- if (tunnel == NULL)
- goto out;
/* Error if tunnel socket is not prepped */
- if (tunnel->sock == NULL)
- goto out;
+ if (!tunnel->sock) {
+ error = -ENOENT;
+ goto err;
+ }
/* Default MTU values. */
if (cfg->mtu == 0)
@@ -820,18 +848,20 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
peer_session_id, cfg);
if (IS_ERR(session)) {
error = PTR_ERR(session);
- goto out;
+ goto err;
}
- ps = l2tp_session_priv(session);
- ps->tunnel_sock = tunnel->sock;
+ pppol2tp_session_init(session);
- l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n",
- session->name);
+ error = l2tp_session_register(session, tunnel);
+ if (error < 0)
+ goto err_sess;
- error = 0;
+ return 0;
-out:
+err_sess:
+ kfree(session);
+err:
return error;
}
@@ -862,9 +892,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
goto end;
pls = l2tp_session_priv(session);
- tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock);
- if (tunnel == NULL)
- goto end_put_sess;
+ tunnel = session->tunnel;
inet = inet_sk(tunnel->sock);
if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) {
@@ -944,8 +972,6 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
*usockaddr_len = len;
error = 0;
- sock_put(pls->tunnel_sock);
-end_put_sess:
sock_put(sk);
end:
return error;
@@ -992,8 +1018,9 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
"%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
session->name, cmd, arg);
- sk = ps->sock;
- sock_hold(sk);
+ sk = pppol2tp_session_get_sock(session);
+ if (!sk)
+ return -EBADR;
switch (cmd) {
case SIOCGIFMTU:
@@ -1140,13 +1167,11 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
/* resend to session ioctl handler */
struct l2tp_session *session =
l2tp_session_get(sock_net(sk), tunnel,
- stats.session_id, true);
+ stats.session_id);
if (session) {
err = pppol2tp_session_ioctl(session, cmd,
arg);
- if (session->deref)
- session->deref(session);
l2tp_session_dec_refcount(session);
} else {
err = -EBADR;
@@ -1185,7 +1210,6 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
struct sock *sk = sock->sk;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
- struct pppol2tp_session *ps;
int err;
if (!sk)
@@ -1209,16 +1233,10 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
/* Special case: if session's session_id is zero, treat ioctl as a
* tunnel ioctl
*/
- ps = l2tp_session_priv(session);
if ((session->session_id == 0) &&
(session->peer_session_id == 0)) {
- err = -EBADF;
- tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
- if (tunnel == NULL)
- goto end_put_sess;
-
+ tunnel = session->tunnel;
err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
- sock_put(ps->tunnel_sock);
goto end_put_sess;
}
@@ -1270,7 +1288,6 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
int optname, int val)
{
int err = 0;
- struct pppol2tp_session *ps = l2tp_session_priv(session);
switch (optname) {
case PPPOL2TP_SO_RECVSEQ:
@@ -1291,8 +1308,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
}
session->send_seq = !!val;
{
- struct sock *ssk = ps->sock;
- struct pppox_sock *po = pppox_sk(ssk);
+ struct pppox_sock *po = pppox_sk(sk);
+
po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
}
@@ -1345,7 +1362,6 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
struct sock *sk = sock->sk;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
- struct pppol2tp_session *ps;
int val;
int err;
@@ -1370,20 +1386,14 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
/* Special case: if session_id == 0x0000, treat as operation on tunnel
*/
- ps = l2tp_session_priv(session);
if ((session->session_id == 0) &&
(session->peer_session_id == 0)) {
- err = -EBADF;
- tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
- if (tunnel == NULL)
- goto end_put_sess;
-
+ tunnel = session->tunnel;
err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
- sock_put(ps->tunnel_sock);
- } else
+ } else {
err = pppol2tp_session_setsockopt(sk, session, optname, val);
+ }
-end_put_sess:
sock_put(sk);
end:
return err;
@@ -1471,7 +1481,6 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
struct l2tp_tunnel *tunnel;
int val, len;
int err;
- struct pppol2tp_session *ps;
if (level != SOL_PPPOL2TP)
return -EINVAL;
@@ -1495,16 +1504,10 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
goto end;
/* Special case: if session_id == 0x0000, treat as operation on tunnel */
- ps = l2tp_session_priv(session);
if ((session->session_id == 0) &&
(session->peer_session_id == 0)) {
- err = -EBADF;
- tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
- if (tunnel == NULL)
- goto end_put_sess;
-
+ tunnel = session->tunnel;
err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
- sock_put(ps->tunnel_sock);
if (err)
goto end_put_sess;
} else {
@@ -1563,7 +1566,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
{
- pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
+ pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
pd->session_idx++;
if (pd->session == NULL) {
@@ -1631,8 +1634,9 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
{
struct l2tp_session *session = v;
struct l2tp_tunnel *tunnel = session->tunnel;
- struct pppol2tp_session *ps = l2tp_session_priv(session);
- struct pppox_sock *po = pppox_sk(ps->sock);
+ unsigned char state;
+ char user_data_ok;
+ struct sock *sk;
u32 ip = 0;
u16 port = 0;
@@ -1642,6 +1646,15 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
port = ntohs(inet->inet_sport);
}
+ sk = pppol2tp_session_get_sock(session);
+ if (sk) {
+ state = sk->sk_state;
+ user_data_ok = (session == sk->sk_user_data) ? 'Y' : 'N';
+ } else {
+ state = 0;
+ user_data_ok = 'N';
+ }
+
seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> "
"%04X/%04X %d %c\n",
session->name, ip, port,
@@ -1649,9 +1662,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
session->session_id,
tunnel->peer_tunnel_id,
session->peer_session_id,
- ps->sock->sk_state,
- (session == ps->sock->sk_user_data) ?
- 'Y' : 'N');
+ state, user_data_ok);
seq_printf(m, " %d/%d/%c/%c/%s %08x %u\n",
session->mtu, session->mru,
session->recv_seq ? 'R' : '-',
@@ -1668,8 +1679,12 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
atomic_long_read(&session->stats.rx_bytes),
atomic_long_read(&session->stats.rx_errors));
- if (po)
+ if (sk) {
+ struct pppox_sock *po = pppox_sk(sk);
+
seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
+ sock_put(sk);
+ }
}
static int pppol2tp_seq_show(struct seq_file *m, void *v)
@@ -1694,8 +1709,6 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v)
pppol2tp_seq_tunnel_show(m, pd->tunnel);
} else {
pppol2tp_seq_session_show(m, pd->session);
- if (pd->session->deref)
- pd->session->deref(pd->session);
l2tp_session_dec_refcount(pd->session);
}
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index e15314e3b464..db6e0afe3a20 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -127,8 +127,8 @@ static struct lapb_cb *lapb_create_cb(void)
skb_queue_head_init(&lapb->write_queue);
skb_queue_head_init(&lapb->ack_queue);
- init_timer(&lapb->t1timer);
- init_timer(&lapb->t2timer);
+ timer_setup(&lapb->t1timer, NULL, 0);
+ timer_setup(&lapb->t2timer, NULL, 0);
lapb->t1 = LAPB_DEFAULT_T1;
lapb->t2 = LAPB_DEFAULT_T2;
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 1a5535bc3b8d..8bb469cb3abe 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -35,15 +35,14 @@
#include <linux/interrupt.h>
#include <net/lapb.h>
-static void lapb_t1timer_expiry(unsigned long);
-static void lapb_t2timer_expiry(unsigned long);
+static void lapb_t1timer_expiry(struct timer_list *);
+static void lapb_t2timer_expiry(struct timer_list *);
void lapb_start_t1timer(struct lapb_cb *lapb)
{
del_timer(&lapb->t1timer);
- lapb->t1timer.data = (unsigned long)lapb;
- lapb->t1timer.function = &lapb_t1timer_expiry;
+ lapb->t1timer.function = (TIMER_FUNC_TYPE)lapb_t1timer_expiry;
lapb->t1timer.expires = jiffies + lapb->t1;
add_timer(&lapb->t1timer);
@@ -53,8 +52,7 @@ void lapb_start_t2timer(struct lapb_cb *lapb)
{
del_timer(&lapb->t2timer);
- lapb->t2timer.data = (unsigned long)lapb;
- lapb->t2timer.function = &lapb_t2timer_expiry;
+ lapb->t2timer.function = (TIMER_FUNC_TYPE)lapb_t2timer_expiry;
lapb->t2timer.expires = jiffies + lapb->t2;
add_timer(&lapb->t2timer);
@@ -75,9 +73,9 @@ int lapb_t1timer_running(struct lapb_cb *lapb)
return timer_pending(&lapb->t1timer);
}
-static void lapb_t2timer_expiry(unsigned long param)
+static void lapb_t2timer_expiry(struct timer_list *t)
{
- struct lapb_cb *lapb = (struct lapb_cb *)param;
+ struct lapb_cb *lapb = from_timer(lapb, t, t2timer);
if (lapb->condition & LAPB_ACK_PENDING_CONDITION) {
lapb->condition &= ~LAPB_ACK_PENDING_CONDITION;
@@ -85,9 +83,9 @@ static void lapb_t2timer_expiry(unsigned long param)
}
}
-static void lapb_t1timer_expiry(unsigned long param)
+static void lapb_t1timer_expiry(struct timer_list *t)
{
- struct lapb_cb *lapb = (struct lapb_cb *)param;
+ struct lapb_cb *lapb = from_timer(lapb, t, t1timer);
switch (lapb->state) {
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index ea225bd2672c..f59648018060 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1318,9 +1318,8 @@ static int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb)
return 0;
}
-static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type)
+static void llc_conn_tmr_common_cb(struct sock *sk, u8 type)
{
- struct sock *sk = (struct sock *)timeout_data;
struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC);
bh_lock_sock(sk);
@@ -1334,24 +1333,32 @@ static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type)
bh_unlock_sock(sk);
}
-void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data)
+void llc_conn_pf_cycle_tmr_cb(struct timer_list *t)
{
- llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_P_TMR);
+ struct llc_sock *llc = from_timer(llc, t, pf_cycle_timer.timer);
+
+ llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_P_TMR);
}
-void llc_conn_busy_tmr_cb(unsigned long timeout_data)
+void llc_conn_busy_tmr_cb(struct timer_list *t)
{
- llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_BUSY_TMR);
+ struct llc_sock *llc = from_timer(llc, t, busy_state_timer.timer);
+
+ llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_BUSY_TMR);
}
-void llc_conn_ack_tmr_cb(unsigned long timeout_data)
+void llc_conn_ack_tmr_cb(struct timer_list *t)
{
- llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_ACK_TMR);
+ struct llc_sock *llc = from_timer(llc, t, ack_timer.timer);
+
+ llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_ACK_TMR);
}
-void llc_conn_rej_tmr_cb(unsigned long timeout_data)
+void llc_conn_rej_tmr_cb(struct timer_list *t)
{
- llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_REJ_TMR);
+ struct llc_sock *llc = from_timer(llc, t, rej_sent_timer.timer);
+
+ llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_REJ_TMR);
}
int llc_conn_ac_rst_vs(struct sock *sk, struct sk_buff *skb)
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 5e91b47f0d2a..9177dbb16dce 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -902,20 +902,16 @@ static void llc_sk_init(struct sock *sk)
llc->inc_cntr = llc->dec_cntr = 2;
llc->dec_step = llc->connect_step = 1;
- setup_timer(&llc->ack_timer.timer, llc_conn_ack_tmr_cb,
- (unsigned long)sk);
+ timer_setup(&llc->ack_timer.timer, llc_conn_ack_tmr_cb, 0);
llc->ack_timer.expire = sysctl_llc2_ack_timeout;
- setup_timer(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb,
- (unsigned long)sk);
+ timer_setup(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb, 0);
llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout;
- setup_timer(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb,
- (unsigned long)sk);
+ timer_setup(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb, 0);
llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout;
- setup_timer(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb,
- (unsigned long)sk);
+ timer_setup(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb, 0);
llc->busy_state_timer.expire = sysctl_llc2_busy_timeout;
llc->n2 = 2; /* max retransmit */
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index dd3e83328ad5..82cb93f66b9b 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -193,7 +193,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
*/
rcv = rcu_dereference(sap->rcv_func);
dest = llc_pdu_type(skb);
- sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL;
+ sap_handler = dest ? READ_ONCE(llc_type_handlers[dest - 1]) : NULL;
if (unlikely(!sap_handler)) {
if (rcv)
rcv(skb, dev, pt, orig_dev);
@@ -214,7 +214,7 @@ drop:
kfree_skb(skb);
goto out;
handle_station:
- sta_handler = ACCESS_ONCE(llc_station_handler);
+ sta_handler = READ_ONCE(llc_station_handler);
if (!sta_handler)
goto drop;
sta_handler(skb);
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index 799bafc2af39..8443a6d841b0 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* sysctl_net_llc.c: sysctl interface to LLC net subsystem.
*
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 282912245938..e3589ade62e0 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_MAC80211) += mac80211.o
# mac80211 objects
@@ -6,6 +7,7 @@ mac80211-y := \
driver-ops.o \
sta_info.o \
wep.o \
+ aead_api.o \
wpa.o \
scan.o offchannel.o \
ht.o agg-tx.o agg-rx.o \
@@ -15,8 +17,6 @@ mac80211-y := \
rate.o \
michael.o \
tkip.o \
- aes_ccm.o \
- aes_gcm.o \
aes_cmac.o \
aes_gmac.o \
fils_aead.o \
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aead_api.c
index a4e0d59a40dd..160f9df30402 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aead_api.c
@@ -1,6 +1,7 @@
/*
* Copyright 2003-2004, Instant802 Networks, Inc.
* Copyright 2005-2006, Devicescape Software, Inc.
+ * Copyright 2014-2015, Qualcomm Atheros, Inc.
*
* Rewrite: Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
*
@@ -12,30 +13,29 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/err.h>
+#include <linux/scatterlist.h>
#include <crypto/aead.h>
-#include <net/mac80211.h>
-#include "key.h"
-#include "aes_ccm.h"
+#include "aead_api.h"
-int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic,
- size_t mic_len)
+int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
+ u8 *data, size_t data_len, u8 *mic)
{
+ size_t mic_len = crypto_aead_authsize(tfm);
struct scatterlist sg[3];
struct aead_request *aead_req;
int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
u8 *__aad;
- aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC);
+ aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC);
if (!aead_req)
return -ENOMEM;
__aad = (u8 *)aead_req + reqsize;
- memcpy(__aad, aad, CCM_AAD_LEN);
+ memcpy(__aad, aad, aad_len);
sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
+ sg_set_buf(&sg[0], __aad, aad_len);
sg_set_buf(&sg[1], data, data_len);
sg_set_buf(&sg[2], mic, mic_len);
@@ -49,10 +49,10 @@ int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
return 0;
}
-int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic,
- size_t mic_len)
+int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
+ u8 *data, size_t data_len, u8 *mic)
{
+ size_t mic_len = crypto_aead_authsize(tfm);
struct scatterlist sg[3];
struct aead_request *aead_req;
int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
@@ -62,15 +62,15 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
if (data_len == 0)
return -EINVAL;
- aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC);
+ aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC);
if (!aead_req)
return -ENOMEM;
__aad = (u8 *)aead_req + reqsize;
- memcpy(__aad, aad, CCM_AAD_LEN);
+ memcpy(__aad, aad, aad_len);
sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
+ sg_set_buf(&sg[0], __aad, aad_len);
sg_set_buf(&sg[1], data, data_len);
sg_set_buf(&sg[2], mic, mic_len);
@@ -84,14 +84,14 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
return err;
}
-struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[],
- size_t key_len,
- size_t mic_len)
+struct crypto_aead *
+aead_key_setup_encrypt(const char *alg, const u8 key[],
+ size_t key_len, size_t mic_len)
{
struct crypto_aead *tfm;
int err;
- tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC);
+ tfm = crypto_alloc_aead(alg, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm))
return tfm;
@@ -109,7 +109,7 @@ free_aead:
return ERR_PTR(err);
}
-void ieee80211_aes_key_free(struct crypto_aead *tfm)
+void aead_key_free(struct crypto_aead *tfm)
{
crypto_free_aead(tfm);
}
diff --git a/net/mac80211/aead_api.h b/net/mac80211/aead_api.h
new file mode 100644
index 000000000000..5e39ea843bbf
--- /dev/null
+++ b/net/mac80211/aead_api.h
@@ -0,0 +1,27 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _AEAD_API_H
+#define _AEAD_API_H
+
+#include <crypto/aead.h>
+#include <linux/crypto.h>
+
+struct crypto_aead *
+aead_key_setup_encrypt(const char *alg, const u8 key[],
+ size_t key_len, size_t mic_len);
+
+int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+ size_t aad_len, u8 *data,
+ size_t data_len, u8 *mic);
+
+int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+ size_t aad_len, u8 *data,
+ size_t data_len, u8 *mic);
+
+void aead_key_free(struct crypto_aead *tfm);
+
+#endif /* _AEAD_API_H */
diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h
index fcd3254c5cf0..e9b7ca0bde5b 100644
--- a/net/mac80211/aes_ccm.h
+++ b/net/mac80211/aes_ccm.h
@@ -10,19 +10,39 @@
#ifndef AES_CCM_H
#define AES_CCM_H
-#include <linux/crypto.h>
+#include "aead_api.h"
#define CCM_AAD_LEN 32
-struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[],
- size_t key_len,
- size_t mic_len);
-int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic,
- size_t mic_len);
-int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic,
- size_t mic_len);
-void ieee80211_aes_key_free(struct crypto_aead *tfm);
+static inline struct crypto_aead *
+ieee80211_aes_key_setup_encrypt(const u8 key[], size_t key_len, size_t mic_len)
+{
+ return aead_key_setup_encrypt("ccm(aes)", key, key_len, mic_len);
+}
+
+static inline int
+ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm,
+ u8 *b_0, u8 *aad, u8 *data,
+ size_t data_len, u8 *mic)
+{
+ return aead_encrypt(tfm, b_0, aad + 2,
+ be16_to_cpup((__be16 *)aad),
+ data, data_len, mic);
+}
+
+static inline int
+ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm,
+ u8 *b_0, u8 *aad, u8 *data,
+ size_t data_len, u8 *mic)
+{
+ return aead_decrypt(tfm, b_0, aad + 2,
+ be16_to_cpup((__be16 *)aad),
+ data, data_len, mic);
+}
+
+static inline void ieee80211_aes_key_free(struct crypto_aead *tfm)
+{
+ return aead_key_free(tfm);
+}
#endif /* AES_CCM_H */
diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c
deleted file mode 100644
index 8a4397cc1b08..000000000000
--- a/net/mac80211/aes_gcm.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright 2014-2015, Qualcomm Atheros, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/err.h>
-#include <crypto/aead.h>
-
-#include <net/mac80211.h>
-#include "key.h"
-#include "aes_gcm.h"
-
-int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic)
-{
- struct scatterlist sg[3];
- struct aead_request *aead_req;
- int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
- u8 *__aad;
-
- aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
- if (!aead_req)
- return -ENOMEM;
-
- __aad = (u8 *)aead_req + reqsize;
- memcpy(__aad, aad, GCM_AAD_LEN);
-
- sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
- sg_set_buf(&sg[1], data, data_len);
- sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
-
- aead_request_set_tfm(aead_req, tfm);
- aead_request_set_crypt(aead_req, sg, sg, data_len, j_0);
- aead_request_set_ad(aead_req, sg[0].length);
-
- crypto_aead_encrypt(aead_req);
- kzfree(aead_req);
- return 0;
-}
-
-int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic)
-{
- struct scatterlist sg[3];
- struct aead_request *aead_req;
- int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
- u8 *__aad;
- int err;
-
- if (data_len == 0)
- return -EINVAL;
-
- aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
- if (!aead_req)
- return -ENOMEM;
-
- __aad = (u8 *)aead_req + reqsize;
- memcpy(__aad, aad, GCM_AAD_LEN);
-
- sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
- sg_set_buf(&sg[1], data, data_len);
- sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
-
- aead_request_set_tfm(aead_req, tfm);
- aead_request_set_crypt(aead_req, sg, sg,
- data_len + IEEE80211_GCMP_MIC_LEN, j_0);
- aead_request_set_ad(aead_req, sg[0].length);
-
- err = crypto_aead_decrypt(aead_req);
- kzfree(aead_req);
-
- return err;
-}
-
-struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
- size_t key_len)
-{
- struct crypto_aead *tfm;
- int err;
-
- tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm))
- return tfm;
-
- err = crypto_aead_setkey(tfm, key, key_len);
- if (err)
- goto free_aead;
- err = crypto_aead_setauthsize(tfm, IEEE80211_GCMP_MIC_LEN);
- if (err)
- goto free_aead;
-
- return tfm;
-
-free_aead:
- crypto_free_aead(tfm);
- return ERR_PTR(err);
-}
-
-void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm)
-{
- crypto_free_aead(tfm);
-}
diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h
index 55aed5352494..d2b096033009 100644
--- a/net/mac80211/aes_gcm.h
+++ b/net/mac80211/aes_gcm.h
@@ -9,16 +9,38 @@
#ifndef AES_GCM_H
#define AES_GCM_H
-#include <linux/crypto.h>
+#include "aead_api.h"
#define GCM_AAD_LEN 32
-int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic);
-int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic);
-struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
- size_t key_len);
-void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm);
+static inline int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm,
+ u8 *j_0, u8 *aad, u8 *data,
+ size_t data_len, u8 *mic)
+{
+ return aead_encrypt(tfm, j_0, aad + 2,
+ be16_to_cpup((__be16 *)aad),
+ data, data_len, mic);
+}
+
+static inline int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm,
+ u8 *j_0, u8 *aad, u8 *data,
+ size_t data_len, u8 *mic)
+{
+ return aead_decrypt(tfm, j_0, aad + 2,
+ be16_to_cpup((__be16 *)aad),
+ data, data_len, mic);
+}
+
+static inline struct crypto_aead *
+ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], size_t key_len)
+{
+ return aead_key_setup_encrypt("gcm(aes)", key,
+ key_len, IEEE80211_GCMP_MIC_LEN);
+}
+
+static inline void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm)
+{
+ return aead_key_free(tfm);
+}
#endif /* AES_GCM_H */
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 2b36eff5d97e..88cc1ae935ea 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -245,10 +245,10 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
ieee80211_tx_skb(sdata, skb);
}
-void __ieee80211_start_rx_ba_session(struct sta_info *sta,
- u8 dialog_token, u16 timeout,
- u16 start_seq_num, u16 ba_policy, u16 tid,
- u16 buf_size, bool tx, bool auto_seq)
+void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
+ u8 dialog_token, u16 timeout,
+ u16 start_seq_num, u16 ba_policy, u16 tid,
+ u16 buf_size, bool tx, bool auto_seq)
{
struct ieee80211_local *local = sta->sdata->local;
struct tid_ampdu_rx *tid_agg_rx;
@@ -267,7 +267,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
ht_dbg(sta->sdata,
"STA %pM requests BA session on unsupported tid %d\n",
sta->sta.addr, tid);
- goto end_no_lock;
+ goto end;
}
if (!sta->sta.ht_cap.ht_supported) {
@@ -275,14 +275,14 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
"STA %pM erroneously requests BA session on tid %d w/o QoS\n",
sta->sta.addr, tid);
/* send a response anyway, it's an error case if we get here */
- goto end_no_lock;
+ goto end;
}
if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) {
ht_dbg(sta->sdata,
"Suspend in progress - Denying ADDBA request (%pM tid %d)\n",
sta->sta.addr, tid);
- goto end_no_lock;
+ goto end;
}
/* sanity check for incoming parameters:
@@ -296,7 +296,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
ht_dbg_ratelimited(sta->sdata,
"AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n",
sta->sta.addr, tid, ba_policy, buf_size);
- goto end_no_lock;
+ goto end;
}
/* determine default buffer size */
if (buf_size == 0)
@@ -311,7 +311,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
buf_size, sta->sta.addr);
/* examine state machine */
- mutex_lock(&sta->ampdu_mlme.mtx);
+ lockdep_assert_held(&sta->ampdu_mlme.mtx);
if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
@@ -415,15 +415,25 @@ end:
__clear_bit(tid, sta->ampdu_mlme.unexpected_agg);
sta->ampdu_mlme.tid_rx_token[tid] = dialog_token;
}
- mutex_unlock(&sta->ampdu_mlme.mtx);
-end_no_lock:
if (tx)
ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
dialog_token, status, 1, buf_size,
timeout);
}
+void __ieee80211_start_rx_ba_session(struct sta_info *sta,
+ u8 dialog_token, u16 timeout,
+ u16 start_seq_num, u16 ba_policy, u16 tid,
+ u16 buf_size, bool tx, bool auto_seq)
+{
+ mutex_lock(&sta->ampdu_mlme.mtx);
+ ___ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
+ start_seq_num, ba_policy, tid,
+ buf_size, tx, auto_seq);
+ mutex_unlock(&sta->ampdu_mlme.mtx);
+}
+
void ieee80211_process_addba_request(struct ieee80211_local *local,
struct sta_info *sta,
struct ieee80211_mgmt *mgmt,
@@ -449,7 +459,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
}
void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
- const u8 *addr, unsigned int bit)
+ const u8 *addr, unsigned int tid)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_local *local = sdata->local;
@@ -460,7 +470,7 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
if (!sta)
goto unlock;
- set_bit(bit, sta->ampdu_mlme.tid_rx_manage_offl);
+ set_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl);
ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
unlock:
rcu_read_unlock();
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index cbd48762256c..bef516ec47f9 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -226,7 +226,11 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable)
clear_bit(IEEE80211_TXQ_AMPDU, &txqi->flags);
clear_bit(IEEE80211_TXQ_STOP, &txqi->flags);
+ local_bh_disable();
+ rcu_read_lock();
drv_wake_tx_queue(sta->sdata->local, txqi);
+ rcu_read_unlock();
+ local_bh_enable();
}
/*
@@ -436,7 +440,7 @@ static void sta_addba_resp_timer_expired(unsigned long data)
test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) {
rcu_read_unlock();
ht_dbg(sta->sdata,
- "timer expired on %pM tid %d but we are not (or no longer) expecting addBA response there\n",
+ "timer expired on %pM tid %d not expecting addBA response\n",
sta->sta.addr, tid);
return;
}
@@ -639,7 +643,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] +
HT_AGG_RETRIES_PERIOD)) {
ht_dbg(sdata,
- "BA request denied - waiting a grace period after %d failed requests on %pM tid %u\n",
+ "BA request denied - %d failed requests on %pM tid %u\n",
sta->ampdu_mlme.addba_req_num[tid], sta->sta.addr, tid);
ret = -EBUSY;
goto err_unlock_sta;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a354f1939e49..fb15d3b97cb2 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2727,12 +2727,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
if (!ieee80211_sdata_running(sdata))
return -ENETDOWN;
- if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
- ret = drv_set_bitrate_mask(local, sdata, mask);
- if (ret)
- return ret;
- }
-
/*
* If active validate the setting and reject it if it doesn't leave
* at least one basic rate usable, since we really have to be able
@@ -2748,6 +2742,12 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
return -EINVAL;
}
+ if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
+ ret = drv_set_bitrate_mask(local, sdata, mask);
+ if (ret)
+ return ret;
+ }
+
for (i = 0; i < NUM_NL80211_BANDS; i++) {
struct ieee80211_supported_band *sband = wiphy->bands[i];
int j;
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index 1956b3115dd5..d90a8f9cc3fd 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MAC80211_DEBUG_H
#define __MAC80211_DEBUG_H
#include <net/cfg80211.h>
diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h
index 60c35afee29d..d2c424787463 100644
--- a/net/mac80211/debugfs.h
+++ b/net/mac80211/debugfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MAC80211_DEBUGFS_H
#define __MAC80211_DEBUGFS_H
diff --git a/net/mac80211/debugfs_key.h b/net/mac80211/debugfs_key.h
index 32adc77e9c77..1cd7b8bff56c 100644
--- a/net/mac80211/debugfs_key.h
+++ b/net/mac80211/debugfs_key.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MAC80211_DEBUGFS_KEY_H
#define __MAC80211_DEBUGFS_KEY_H
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
index 9f5501a9a795..a7e9d8d518f9 100644
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* routines exported for debugfs handling */
#ifndef __IEEE80211_DEBUGFS_NETDEV_H
diff --git a/net/mac80211/debugfs_sta.h b/net/mac80211/debugfs_sta.h
index 8b608903259f..d2e7c27ad6d1 100644
--- a/net/mac80211/debugfs_sta.h
+++ b/net/mac80211/debugfs_sta.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MAC80211_DEBUGFS_STA_H
#define __MAC80211_DEBUGFS_STA_H
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 09f77e4a8a79..c7f93fd9ca7a 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Portions of this file
* Copyright(c) 2016 Intel Deutschland GmbH
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index c92df492e898..41f5e48f8021 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -290,16 +290,36 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
{
int i;
+ mutex_lock(&sta->ampdu_mlme.mtx);
for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
- __ieee80211_stop_tx_ba_session(sta, i, reason);
- __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
- WLAN_REASON_QSTA_LEAVE_QBSS,
- reason != AGG_STOP_DESTROY_STA &&
- reason != AGG_STOP_PEER_REQUEST);
+ ___ieee80211_stop_tx_ba_session(sta, i, reason);
+ ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
+ WLAN_REASON_QSTA_LEAVE_QBSS,
+ reason != AGG_STOP_DESTROY_STA &&
+ reason != AGG_STOP_PEER_REQUEST);
}
+ mutex_unlock(&sta->ampdu_mlme.mtx);
/* stopping might queue the work again - so cancel only afterwards */
cancel_work_sync(&sta->ampdu_mlme.work);
+
+ /*
+ * In case the tear down is part of a reconfigure due to HW restart
+ * request, it is possible that the low level driver requested to stop
+ * the BA session, so handle it to properly clean tid_tx data.
+ */
+ mutex_lock(&sta->ampdu_mlme.mtx);
+ for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
+ struct tid_ampdu_tx *tid_tx =
+ rcu_dereference_protected_tid_tx(sta, i);
+
+ if (!tid_tx)
+ continue;
+
+ if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state))
+ ieee80211_stop_tx_ba_cb(sta, i, tid_tx);
+ }
+ mutex_unlock(&sta->ampdu_mlme.mtx);
}
void ieee80211_ba_session_work(struct work_struct *work)
@@ -333,9 +353,9 @@ void ieee80211_ba_session_work(struct work_struct *work)
if (test_and_clear_bit(tid,
sta->ampdu_mlme.tid_rx_manage_offl))
- __ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
- IEEE80211_MAX_AMPDU_BUF,
- false, true);
+ ___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
+ IEEE80211_MAX_AMPDU_BUF,
+ false, true);
if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
sta->ampdu_mlme.tid_rx_manage_offl))
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 2197c62a0a6e..68f874e73561 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1760,6 +1760,10 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
u8 dialog_token, u16 timeout,
u16 start_seq_num, u16 ba_policy, u16 tid,
u16 buf_size, bool tx, bool auto_seq);
+void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
+ u8 dialog_token, u16 timeout,
+ u16 start_seq_num, u16 ba_policy, u16 tid,
+ u16 buf_size, bool tx, bool auto_seq);
void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
enum ieee80211_agg_stop_reason reason);
void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
@@ -2005,6 +2009,8 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
struct txq_info *txq, int tid);
void ieee80211_txq_purge(struct ieee80211_local *local,
struct txq_info *txqi);
+void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
u16 transaction, u16 auth_alg, u16 status,
const u8 *extra, size_t extra_len, const u8 *bssid,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 9228ac73c429..13b16f90e1cf 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -731,7 +731,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
local->ops->wake_tx_queue) {
/* XXX: for AP_VLAN, actually track AP queues */
- netif_tx_start_all_queues(dev);
+ if (dev)
+ netif_tx_start_all_queues(dev);
} else if (dev) {
unsigned long flags;
int n_acs = IEEE80211_NUM_ACS;
@@ -793,7 +794,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
bool going_down)
{
struct ieee80211_local *local = sdata->local;
- struct fq *fq = &local->fq;
unsigned long flags;
struct sk_buff *skb, *tmp;
u32 hw_reconf_flags = 0;
@@ -993,8 +993,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
skb_queue_purge(&sdata->skb_queue);
}
- sdata->bss = NULL;
-
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
skb_queue_walk_safe(&local->pending[i], skb, tmp) {
@@ -1007,13 +1005,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
}
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
- if (sdata->vif.txq) {
- struct txq_info *txqi = to_txq_info(sdata->vif.txq);
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ ieee80211_txq_remove_vlan(local, sdata);
- spin_lock_bh(&fq->lock);
- ieee80211_txq_purge(local, txqi);
- spin_unlock_bh(&fq->lock);
- }
+ sdata->bss = NULL;
if (local->open_count == 0)
ieee80211_clear_tx_pending(local);
@@ -1758,7 +1753,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
sizeof(void *));
int txq_size = 0;
- if (local->ops->wake_tx_queue)
+ if (local->ops->wake_tx_queue &&
+ type != NL80211_IFTYPE_AP_VLAN &&
+ type != NL80211_IFTYPE_MONITOR)
txq_size += sizeof(struct txq_info) +
local->hw.txq_data_size;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index a98fc2b5e0dc..938049395f90 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2015 Intel Deutschland GmbH
+ * Copyright 2015-2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -19,6 +19,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <net/mac80211.h>
+#include <crypto/algapi.h>
#include <asm/unaligned.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
@@ -609,6 +610,39 @@ void ieee80211_key_free_unused(struct ieee80211_key *key)
ieee80211_key_free_common(key);
}
+static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_key *old,
+ struct ieee80211_key *new)
+{
+ u8 tkip_old[WLAN_KEY_LEN_TKIP], tkip_new[WLAN_KEY_LEN_TKIP];
+ u8 *tk_old, *tk_new;
+
+ if (!old || new->conf.keylen != old->conf.keylen)
+ return false;
+
+ tk_old = old->conf.key;
+ tk_new = new->conf.key;
+
+ /*
+ * In station mode, don't compare the TX MIC key, as it's never used
+ * and offloaded rekeying may not care to send it to the host. This
+ * is the case in iwlwifi, for example.
+ */
+ if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+ new->conf.cipher == WLAN_CIPHER_SUITE_TKIP &&
+ new->conf.keylen == WLAN_KEY_LEN_TKIP &&
+ !(new->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
+ memcpy(tkip_old, tk_old, WLAN_KEY_LEN_TKIP);
+ memcpy(tkip_new, tk_new, WLAN_KEY_LEN_TKIP);
+ memset(tkip_old + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
+ memset(tkip_new + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
+ tk_old = tkip_old;
+ tk_new = tkip_new;
+ }
+
+ return !crypto_memneq(tk_old, tk_new, new->conf.keylen);
+}
+
int ieee80211_key_link(struct ieee80211_key *key,
struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
@@ -620,9 +654,6 @@ int ieee80211_key_link(struct ieee80211_key *key,
pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
idx = key->conf.keyidx;
- key->local = sdata->local;
- key->sdata = sdata;
- key->sta = sta;
mutex_lock(&sdata->local->key_mtx);
@@ -633,6 +664,20 @@ int ieee80211_key_link(struct ieee80211_key *key,
else
old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
+ /*
+ * Silently accept key re-installation without really installing the
+ * new version of the key to avoid nonce reuse or replay issues.
+ */
+ if (ieee80211_key_identical(sdata, old_key, key)) {
+ ieee80211_key_free_unused(key);
+ ret = 0;
+ goto out;
+ }
+
+ key->local = sdata->local;
+ key->sdata = sdata;
+ key->sta = sta;
+
increment_tailroom_need_count(sdata);
ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
@@ -648,6 +693,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
ret = 0;
}
+ out:
mutex_unlock(&sdata->local->key_mtx);
return ret;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index a550c707cd8a..7a76c4a6df30 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -675,8 +675,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
enum nl80211_band band;
u8 *pos;
struct ieee80211_sub_if_data *sdata;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) +
- sizeof(mgmt->u.beacon);
+ int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon);
sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);
rcu_read_lock();
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 7e5f271e3c30..465b7853edc0 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -275,6 +275,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
u8 *hw_addr, struct ieee802_11_elems *ie);
bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
+void mesh_plink_timer(struct timer_list *t);
void mesh_plink_broken(struct sta_info *sta);
u32 mesh_plink_deactivate(struct sta_info *sta);
u32 mesh_plink_open(struct sta_info *sta);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index d8bbd0d2225a..146ec6c0f12f 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -111,8 +111,8 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
u8 *pos, ie_len;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) +
- sizeof(mgmt->u.action.u.mesh_action);
+ int hdr_len = offsetofend(struct ieee80211_mgmt,
+ u.action.u.mesh_action);
skb = dev_alloc_skb(local->tx_headroom +
hdr_len +
@@ -242,8 +242,8 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct ieee80211_mgmt *mgmt;
u8 *pos, ie_len;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) +
- sizeof(mgmt->u.action.u.mesh_action);
+ int hdr_len = offsetofend(struct ieee80211_mgmt,
+ u.action.u.mesh_action);
if (time_before(jiffies, ifmsh->next_perr))
return -EAGAIN;
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index f69c6c38ca43..e2d00cce3c17 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -220,8 +220,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
bool include_plid = false;
u16 peering_proto = 0;
u8 *pos, ie_len = 4;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.self_prot) +
- sizeof(mgmt->u.action.u.self_prot);
+ int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot);
int err = -ENOMEM;
skb = dev_alloc_skb(local->tx_headroom +
@@ -604,8 +603,9 @@ out:
ieee80211_mbss_info_change_notify(sdata, changed);
}
-static void mesh_plink_timer(unsigned long data)
+void mesh_plink_timer(struct timer_list *t)
{
+ struct mesh_sta *mesh = from_timer(mesh, t, plink_timer);
struct sta_info *sta;
u16 reason = 0;
struct ieee80211_sub_if_data *sdata;
@@ -617,7 +617,7 @@ static void mesh_plink_timer(unsigned long data)
* del_timer_sync() this timer after having made sure
* it cannot be readded (by deleting the plink.)
*/
- sta = (struct sta_info *) data;
+ sta = mesh->plink_sta;
if (sta->sdata->local->quiescing)
return;
@@ -697,11 +697,8 @@ static void mesh_plink_timer(unsigned long data)
static inline void mesh_plink_timer_set(struct sta_info *sta, u32 timeout)
{
- sta->mesh->plink_timer.expires = jiffies + msecs_to_jiffies(timeout);
- sta->mesh->plink_timer.data = (unsigned long) sta;
- sta->mesh->plink_timer.function = mesh_plink_timer;
sta->mesh->plink_timeout = timeout;
- add_timer(&sta->mesh->plink_timer);
+ mod_timer(&sta->mesh->plink_timer, jiffies + msecs_to_jiffies(timeout));
}
static bool llid_in_use(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b588e593b0ec..e4ededa1909d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -145,7 +145,6 @@ static u32
ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
struct ieee80211_channel *channel,
- const struct ieee80211_ht_cap *ht_cap,
const struct ieee80211_ht_operation *ht_oper,
const struct ieee80211_vht_operation *vht_oper,
struct cfg80211_chan_def *chandef, bool tracking)
@@ -163,20 +162,13 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
chandef->center_freq1 = channel->center_freq;
chandef->center_freq2 = 0;
- if (!ht_cap || !ht_oper || !sta_ht_cap.ht_supported) {
+ if (!ht_oper || !sta_ht_cap.ht_supported) {
ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
goto out;
}
chandef->width = NL80211_CHAN_WIDTH_20;
- if (!(ht_cap->cap_info &
- cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40))) {
- ret = IEEE80211_STA_DISABLE_40MHZ;
- vht_chandef = *chandef;
- goto out;
- }
-
ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
channel->band);
/* check that channel matches the right operating channel */
@@ -344,7 +336,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
/* calculate new channel (type) based on HT/VHT operation IEs */
flags = ieee80211_determine_chantype(sdata, sband, chan,
- ht_cap, ht_oper, vht_oper,
+ ht_oper, vht_oper,
&chandef, true);
/*
@@ -780,11 +772,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
WLAN_EID_HT_CAPABILITY,
WLAN_EID_BSS_COEX_2040,
+ /* luckily this is almost always there */
WLAN_EID_EXT_CAPABILITY,
WLAN_EID_QOS_TRAFFIC_CAPA,
WLAN_EID_TIM_BCAST_REQ,
WLAN_EID_INTERWORKING,
- /* 60GHz doesn't happen right now */
+ /* 60 GHz (Multi-band, DMG, MMS) can't happen */
WLAN_EID_VHT_CAPABILITY,
WLAN_EID_OPMODE_NOTIF,
};
@@ -811,22 +804,16 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
/* if present, add any custom IEs that go before VHT */
if (assoc_data->ie_len) {
static const u8 before_vht[] = {
- WLAN_EID_SSID,
- WLAN_EID_SUPP_RATES,
- WLAN_EID_EXT_SUPP_RATES,
- WLAN_EID_PWR_CAPABILITY,
- WLAN_EID_SUPPORTED_CHANNELS,
- WLAN_EID_RSN,
- WLAN_EID_QOS_CAPA,
- WLAN_EID_RRM_ENABLED_CAPABILITIES,
- WLAN_EID_MOBILITY_DOMAIN,
- WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
- WLAN_EID_HT_CAPABILITY,
+ /*
+ * no need to list the ones split off before HT
+ * or generated here
+ */
WLAN_EID_BSS_COEX_2040,
WLAN_EID_EXT_CAPABILITY,
WLAN_EID_QOS_TRAFFIC_CAPA,
WLAN_EID_TIM_BCAST_REQ,
WLAN_EID_INTERWORKING,
+ /* 60 GHz (Multi-band, DMG, MMS) can't happen */
};
/* RIC already taken above, so no need to handle here anymore */
@@ -3155,7 +3142,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
if (len < 24 + 6)
return;
- reassoc = ieee80211_is_reassoc_req(mgmt->frame_control);
+ reassoc = ieee80211_is_reassoc_resp(mgmt->frame_control);
capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
aid = le16_to_cpu(mgmt->u.assoc_resp.aid);
@@ -4317,7 +4304,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
cbss->channel,
- ht_cap, ht_oper, vht_oper,
+ ht_oper, vht_oper,
&chandef, false);
sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index f8e7a8bbc618..faf4f6055000 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -707,6 +707,8 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
if (!cookie)
return -ENOENT;
+ flush_work(&local->hw_roc_start);
+
mutex_lock(&local->mtx);
list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
if (!mgmt_tx && roc->cookie != cookie)
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index a87d195c4a61..38c45e1dafd8 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <net/mac80211.h>
#include <net/rtnetlink.h>
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 47d2ed570470..ef2becaade50 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -7,7 +7,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
- * Copyright 2016 Intel Deutschland GmbH
+ * Copyright 2016-2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -183,6 +183,20 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
return bss;
}
+static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata,
+ u32 scan_flags, const u8 *da)
+{
+ if (!sdata)
+ return false;
+ /* accept broadcast for OCE */
+ if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP &&
+ is_broadcast_ether_addr(da))
+ return true;
+ if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
+ return true;
+ return ether_addr_equal(da, sdata->vif.addr);
+}
+
void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
{
struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
@@ -208,19 +222,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
if (ieee80211_is_probe_resp(mgmt->frame_control)) {
struct cfg80211_scan_request *scan_req;
struct cfg80211_sched_scan_request *sched_scan_req;
+ u32 scan_req_flags = 0, sched_scan_req_flags = 0;
scan_req = rcu_dereference(local->scan_req);
sched_scan_req = rcu_dereference(local->sched_scan_req);
- /* ignore ProbeResp to foreign address unless scanning
- * with randomised address
+ if (scan_req)
+ scan_req_flags = scan_req->flags;
+
+ if (sched_scan_req)
+ sched_scan_req_flags = sched_scan_req->flags;
+
+ /* ignore ProbeResp to foreign address or non-bcast (OCE)
+ * unless scanning with randomised address
*/
- if (!(sdata1 &&
- (ether_addr_equal(mgmt->da, sdata1->vif.addr) ||
- scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)) &&
- !(sdata2 &&
- (ether_addr_equal(mgmt->da, sdata2->vif.addr) ||
- sched_scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)))
+ if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags,
+ mgmt->da) &&
+ !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags,
+ mgmt->da))
return;
elements = mgmt->u.probe_resp.variable;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 69615016d5bf..a3060e55122c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -329,10 +329,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
sta->mesh = kzalloc(sizeof(*sta->mesh), gfp);
if (!sta->mesh)
goto free;
+ sta->mesh->plink_sta = sta;
spin_lock_init(&sta->mesh->plink_lock);
if (ieee80211_vif_is_mesh(&sdata->vif) &&
!sdata->u.mesh.user_mpm)
- init_timer(&sta->mesh->plink_timer);
+ timer_setup(&sta->mesh->plink_timer, mesh_plink_timer,
+ 0);
sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
}
#endif
@@ -515,6 +517,31 @@ static int sta_info_insert_drv_state(struct ieee80211_local *local,
return err;
}
+static void
+ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ bool allow_p2p_go_ps = sdata->vif.p2p;
+ struct sta_info *sta;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ if (sdata != sta->sdata ||
+ !test_sta_flag(sta, WLAN_STA_ASSOC))
+ continue;
+ if (!sta->sta.support_p2p_ps) {
+ allow_p2p_go_ps = false;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
+ sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
+ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
+ }
+}
+
/*
* should be called with sta_mtx locked
* this function replaces the mutex lock
@@ -561,6 +588,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
goto out_remove;
set_sta_flag(sta, WLAN_STA_INSERTED);
+
+ if (sta->sta_state >= IEEE80211_STA_ASSOC) {
+ ieee80211_recalc_min_chandef(sta->sdata);
+ if (!sta->sta.support_p2p_ps)
+ ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
+ }
+
/* accept BA sessions now */
clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
@@ -1788,31 +1822,6 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
}
EXPORT_SYMBOL(ieee80211_sta_set_buffered);
-static void
-ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
-{
- struct ieee80211_local *local = sdata->local;
- bool allow_p2p_go_ps = sdata->vif.p2p;
- struct sta_info *sta;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
- if (sdata != sta->sdata ||
- !test_sta_flag(sta, WLAN_STA_ASSOC))
- continue;
- if (!sta->sta.support_p2p_ps) {
- allow_p2p_go_ps = false;
- break;
- }
- }
- rcu_read_unlock();
-
- if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
- sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
- }
-}
-
int sta_info_move_state(struct sta_info *sta,
enum ieee80211_sta_state new_state)
{
@@ -2008,7 +2017,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
{
- u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate);
+ u16 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);
if (rate == STA_STATS_RATE_INVALID)
return -EINVAL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 3acbdfa9f649..5c54acd10562 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -344,6 +344,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8)
* @plink_state: peer link state
* @plink_timeout: timeout of peer link
* @plink_timer: peer link watch timer
+ * @plink_sta: peer link watch timer's sta_info
* @t_offset: timing offset relative to this host
* @t_offset_setpoint: reference timing offset of this sta to be used when
* calculating clockdrift
@@ -356,6 +357,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8)
*/
struct mesh_sta {
struct timer_list plink_timer;
+ struct sta_info *plink_sta;
s64 t_offset;
s64 t_offset_setpoint;
@@ -398,7 +400,7 @@ struct ieee80211_sta_rx_stats {
u64 msdu[IEEE80211_NUM_TIDS + 1];
};
-/**
+/*
* The bandwidth threshold below which the per-station CoDel parameters will be
* scaled to be more lenient (to prevent starvation of slow stations). This
* value will be scaled by the number of active stations when it is being
diff --git a/net/mac80211/trace.c b/net/mac80211/trace.c
index edfe0c170a1c..837857261b66 100644
--- a/net/mac80211/trace.c
+++ b/net/mac80211/trace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* bug in tracepoint.h, it should include this */
#include <linux/module.h>
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 3d9ac17af407..591ad02e1fa4 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Portions of this file
* Copyright(c) 2016 Intel Deutschland GmbH
diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h
index 768f7c22a190..366b9e6f043e 100644
--- a/net/mac80211/trace_msg.h
+++ b/net/mac80211/trace_msg.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifdef CONFIG_MAC80211_MESSAGE_TRACING
#if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8858f4f185e9..7b8154474b9e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1276,11 +1276,6 @@ static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb)
IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time();
}
-static void ieee80211_set_skb_vif(struct sk_buff *skb, struct txq_info *txqi)
-{
- IEEE80211_SKB_CB(skb)->control.vif = txqi->txq.vif;
-}
-
static u32 codel_skb_len_func(const struct sk_buff *skb)
{
return skb->len;
@@ -1401,6 +1396,40 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
fq_flow_get_default_func);
}
+static bool fq_vlan_filter_func(struct fq *fq, struct fq_tin *tin,
+ struct fq_flow *flow, struct sk_buff *skb,
+ void *data)
+{
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
+ return info->control.vif == data;
+}
+
+void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ struct fq *fq = &local->fq;
+ struct txq_info *txqi;
+ struct fq_tin *tin;
+ struct ieee80211_sub_if_data *ap;
+
+ if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP_VLAN))
+ return;
+
+ ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap);
+
+ if (!ap->vif.txq)
+ return;
+
+ txqi = to_txq_info(ap->vif.txq);
+ tin = &txqi->tin;
+
+ spin_lock_bh(&fq->lock);
+ fq_tin_filter(fq, tin, fq_vlan_filter_func, &sdata->vif,
+ fq_skb_free_func);
+ spin_unlock_bh(&fq->lock);
+}
+
void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta,
struct txq_info *txqi, int tid)
@@ -3414,6 +3443,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
struct ieee80211_tx_info *info;
struct ieee80211_tx_data tx;
ieee80211_tx_result r;
+ struct ieee80211_vif *vif;
spin_lock_bh(&fq->lock);
@@ -3430,8 +3460,6 @@ begin:
if (!skb)
goto out;
- ieee80211_set_skb_vif(skb, txqi);
-
hdr = (struct ieee80211_hdr *)skb->data;
info = IEEE80211_SKB_CB(skb);
@@ -3488,6 +3516,34 @@ begin:
}
}
+ switch (tx.sdata->vif.type) {
+ case NL80211_IFTYPE_MONITOR:
+ if (tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
+ vif = &tx.sdata->vif;
+ break;
+ }
+ tx.sdata = rcu_dereference(local->monitor_sdata);
+ if (tx.sdata) {
+ vif = &tx.sdata->vif;
+ info->hw_queue =
+ vif->hw_queue[skb_get_queue_mapping(skb)];
+ } else if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) {
+ ieee80211_free_txskb(&local->hw, skb);
+ goto begin;
+ } else {
+ vif = NULL;
+ }
+ break;
+ case NL80211_IFTYPE_AP_VLAN:
+ tx.sdata = container_of(tx.sdata->bss,
+ struct ieee80211_sub_if_data, u.ap);
+ /* fall through */
+ default:
+ vif = &tx.sdata->vif;
+ break;
+ }
+
+ IEEE80211_SKB_CB(skb)->control.vif = vif;
out:
spin_unlock_bh(&fq->lock);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 259698de569f..d57e5f6bd8b6 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1392,10 +1392,10 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
/* insert custom IEs that go before HT */
if (ie && ie_len) {
static const u8 before_ht[] = {
- WLAN_EID_SSID,
- WLAN_EID_SUPP_RATES,
- WLAN_EID_REQUEST,
- WLAN_EID_EXT_SUPP_RATES,
+ /*
+ * no need to list the ones split off already
+ * (or generated here)
+ */
WLAN_EID_DS_PARAMS,
WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
};
@@ -1424,20 +1424,17 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
/* insert custom IEs that go before VHT */
if (ie && ie_len) {
static const u8 before_vht[] = {
- WLAN_EID_SSID,
- WLAN_EID_SUPP_RATES,
- WLAN_EID_REQUEST,
- WLAN_EID_EXT_SUPP_RATES,
- WLAN_EID_DS_PARAMS,
- WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
- WLAN_EID_HT_CAPABILITY,
+ /*
+ * no need to list the ones split off already
+ * (or generated here)
+ */
WLAN_EID_BSS_COEX_2040,
WLAN_EID_EXT_CAPABILITY,
WLAN_EID_SSID_LIST,
WLAN_EID_CHANNEL_USAGE,
WLAN_EID_INTERWORKING,
- /* mesh ID can't happen here */
- /* 60 GHz can't happen here right now */
+ WLAN_EID_MESH_ID,
+ /* 60 GHz (Multi-band, DMG, MMS) can't happen */
};
noffset = ieee80211_ie_split(ie, ie_len,
before_vht, ARRAY_SIZE(before_vht),
@@ -2980,8 +2977,8 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt;
struct ieee80211_local *local = sdata->local;
int freq;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch) +
- sizeof(mgmt->u.action.u.chan_switch);
+ int hdr_len = offsetofend(struct ieee80211_mgmt,
+ u.action.u.chan_switch);
u8 *pos;
if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 19ec2189d3ac..b9276ac849fa 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -386,6 +386,16 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
bw = ieee80211_sta_cap_rx_bw(sta);
bw = min(bw, sta->cur_max_bandwidth);
+
+ /* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of
+ * IEEE80211-2016 specification makes higher bandwidth operation
+ * possible on the TDLS link if the peers have wider bandwidth
+ * capability.
+ */
+ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
+ test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW))
+ return bw;
+
bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width));
return bw;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 0d722ea98a1b..b58722d9de37 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -464,7 +464,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
pos += IEEE80211_CCMP_HDR_LEN;
ccmp_special_blocks(skb, pn, b_0, aad);
return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
- skb_put(skb, mic_len), mic_len);
+ skb_put(skb, mic_len));
}
@@ -543,7 +543,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
key->u.ccmp.tfm, b_0, aad,
skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN,
data_len,
- skb->data + skb->len - mic_len, mic_len))
+ skb->data + skb->len - mic_len))
return RX_DROP_UNUSABLE;
}
diff --git a/net/mac802154/cfg.h b/net/mac802154/cfg.h
index e2718f981e82..3bb089685500 100644
--- a/net/mac802154/cfg.h
+++ b/net/mac802154/cfg.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* mac802154 configuration hooks for cfg802154
*/
diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h
index fd9daf2ecec9..d23f0db98015 100644
--- a/net/mac802154/driver-ops.h
+++ b/net/mac802154/driver-ops.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MAC802154_DRIVER_OPS
#define __MAC802154_DRIVER_OPS
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 1e1c9b20bab7..2fb703d70803 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -623,13 +623,18 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
u8 iv[16];
struct scatterlist src;
SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
- int err;
+ int err, datalen;
+ unsigned char *data;
llsec_geniv(iv, sec->params.hwaddr, &hdr->sec);
- sg_init_one(&src, skb->data, skb->len);
+ /* Compute data payload offset and data length */
+ data = skb_mac_header(skb) + skb->mac_len;
+ datalen = skb_tail_pointer(skb) - data;
+ sg_init_one(&src, data, datalen);
+
skcipher_request_set_tfm(req, key->tfm0);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &src, &src, skb->len, iv);
+ skcipher_request_set_crypt(req, &src, &src, datalen, iv);
err = crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
return err;
@@ -713,7 +718,8 @@ int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb)
if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA)
return -EINVAL;
- if (!hdr.fc.security_enabled || hdr.sec.level == 0) {
+ if (!hdr.fc.security_enabled ||
+ (hdr.sec.level == IEEE802154_SCF_SECLEVEL_NONE)) {
skb_push(skb, hlen);
return 0;
}
diff --git a/net/mac802154/trace.c b/net/mac802154/trace.c
index 863e5e6b983d..c36e3d541a42 100644
--- a/net/mac802154/trace.c
+++ b/net/mac802154/trace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#ifndef __CHECKER__
diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h
index 6f30e0c93a16..2c8a43d3607f 100644
--- a/net/mac802154/trace.h
+++ b/net/mac802154/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Based on net/mac80211/trace.h */
#undef TRACE_SYSTEM
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index 5c467ef97311..801ea9098387 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig
@@ -24,6 +24,7 @@ config NET_MPLS_GSO
config MPLS_ROUTING
tristate "MPLS: routing support"
+ depends on NET_IP_TUNNEL || NET_IP_TUNNEL=n
---help---
Add support for forwarding of mpls packets.
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index ea4f481839dd..8ca9915befc8 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -16,6 +16,7 @@
#include <net/arp.h>
#include <net/ip_fib.h>
#include <net/netevent.h>
+#include <net/ip_tunnels.h>
#include <net/netns/generic.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -39,6 +40,36 @@ static int one = 1;
static int label_limit = (1 << 20) - 1;
static int ttl_max = 255;
+#if IS_ENABLED(CONFIG_NET_IP_TUNNEL)
+static size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e)
+{
+ return sizeof(struct mpls_shim_hdr);
+}
+
+static const struct ip_tunnel_encap_ops mpls_iptun_ops = {
+ .encap_hlen = ipgre_mpls_encap_hlen,
+};
+
+static int ipgre_tunnel_encap_add_mpls_ops(void)
+{
+ return ip_tunnel_encap_add_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
+}
+
+static void ipgre_tunnel_encap_del_mpls_ops(void)
+{
+ ip_tunnel_encap_del_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
+}
+#else
+static int ipgre_tunnel_encap_add_mpls_ops(void)
+{
+ return 0;
+}
+
+static void ipgre_tunnel_encap_del_mpls_ops(void)
+{
+}
+#endif
+
static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
struct nlmsghdr *nlh, struct net *net, u32 portid,
unsigned int nlm_flags);
@@ -2479,12 +2510,16 @@ static int __init mpls_init(void)
rtnl_af_register(&mpls_af_ops);
- rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL);
- rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL);
+ rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0);
+ rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0);
rtnl_register(PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes,
- NULL);
+ 0);
rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf,
- mpls_netconf_dump_devconf, NULL);
+ mpls_netconf_dump_devconf, 0);
+ err = ipgre_tunnel_encap_add_mpls_ops();
+ if (err)
+ pr_err("Can't add mpls over gre tunnel ops\n");
+
err = 0;
out:
return err;
@@ -2502,6 +2537,7 @@ static void __exit mpls_exit(void)
dev_remove_pack(&mpls_packet_type);
unregister_netdevice_notifier(&mpls_dev_notifier);
unregister_pernet_subsys(&mpls_net_ops);
+ ipgre_tunnel_encap_del_mpls_ops();
}
module_exit(mpls_exit);
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index cf65aec2e551..768a302879b4 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef MPLS_INTERNAL_H
#define MPLS_INTERNAL_H
#include <net/mpls.h>
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 1308a56f2591..d30f7bd741d0 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -180,6 +180,7 @@ struct ncsi_channel {
#define NCSI_CHANNEL_INACTIVE 1
#define NCSI_CHANNEL_ACTIVE 2
#define NCSI_CHANNEL_INVISIBLE 3
+ bool reconfigure_needed;
spinlock_t lock; /* Protect filters etc */
struct ncsi_package *package;
struct ncsi_channel_version version;
@@ -235,6 +236,9 @@ enum {
ncsi_dev_state_probe_dp,
ncsi_dev_state_config_sp = 0x0301,
ncsi_dev_state_config_cis,
+ ncsi_dev_state_config_clear_vids,
+ ncsi_dev_state_config_svf,
+ ncsi_dev_state_config_ev,
ncsi_dev_state_config_sma,
ncsi_dev_state_config_ebf,
#if IS_ENABLED(CONFIG_IPV6)
@@ -253,6 +257,12 @@ enum {
ncsi_dev_state_suspend_done
};
+struct vlan_vid {
+ struct list_head list;
+ __be16 proto;
+ u16 vid;
+};
+
struct ncsi_dev_priv {
struct ncsi_dev ndev; /* Associated NCSI device */
unsigned int flags; /* NCSI device flags */
@@ -276,6 +286,8 @@ struct ncsi_dev_priv {
struct work_struct work; /* For channel management */
struct packet_type ptype; /* NCSI packet Rx handler */
struct list_head node; /* Form NCSI device list */
+#define NCSI_MAX_VLAN_VIDS 15
+ struct list_head vlan_vids; /* List of active VLAN IDs */
};
struct ncsi_cmd_arg {
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 6898e7229285..67e708e98ccf 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -73,6 +73,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
ncm->data[2] = data;
ncm->data[4] = ntohl(lsc->oem_status);
+ netdev_info(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n",
+ nc->id, data & 0x1 ? "up" : "down");
+
chained = !list_empty(&nc->link);
state = nc->state;
spin_unlock_irqrestore(&nc->lock, flags);
@@ -145,6 +148,8 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp,
ncm = &nc->modes[NCSI_MODE_LINK];
hncdsc = (struct ncsi_aen_hncdsc_pkt *)h;
ncm->data[3] = ntohl(hncdsc->status);
+ netdev_info(ndp->ndev.dev, "NCSI: HNCDSC AEN - channel %u state %s\n",
+ nc->id, ncm->data[3] & 0x3 ? "up" : "down");
if (!list_empty(&nc->link) ||
nc->state != NCSI_CHANNEL_ACTIVE) {
spin_unlock_irqrestore(&nc->lock, flags);
@@ -187,7 +192,7 @@ static struct ncsi_aen_handler {
} ncsi_aen_handlers[] = {
{ NCSI_PKT_AEN_LSC, 12, ncsi_aen_handler_lsc },
{ NCSI_PKT_AEN_CR, 4, ncsi_aen_handler_cr },
- { NCSI_PKT_AEN_HNCDSC, 4, ncsi_aen_handler_hncdsc }
+ { NCSI_PKT_AEN_HNCDSC, 8, ncsi_aen_handler_hncdsc }
};
int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb)
@@ -212,10 +217,18 @@ int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb)
}
ret = ncsi_validate_aen_pkt(h, nah->payload);
- if (ret)
+ if (ret) {
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: 'bad' packet ignored for AEN type 0x%x\n",
+ h->type);
goto out;
+ }
ret = nah->handler(ndp, h);
+ if (ret)
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Handler for AEN type 0x%x returned %d\n",
+ h->type, ret);
out:
consume_skb(skb);
return ret;
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index 5e03ed190e18..7567ca63aae2 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -139,9 +139,9 @@ static int ncsi_cmd_handler_svf(struct sk_buff *skb,
struct ncsi_cmd_svf_pkt *cmd;
cmd = skb_put_zero(skb, sizeof(*cmd));
- cmd->vlan = htons(nca->words[0]);
- cmd->index = nca->bytes[2];
- cmd->enable = nca->bytes[3];
+ cmd->vlan = htons(nca->words[1]);
+ cmd->index = nca->bytes[6];
+ cmd->enable = nca->bytes[7];
ncsi_cmd_build_header(&cmd->cmd.common, nca);
return 0;
@@ -153,7 +153,7 @@ static int ncsi_cmd_handler_ev(struct sk_buff *skb,
struct ncsi_cmd_ev_pkt *cmd;
cmd = skb_put_zero(skb, sizeof(*cmd));
- cmd->mode = nca->bytes[0];
+ cmd->mode = nca->bytes[3];
ncsi_cmd_build_header(&cmd->cmd.common, nca);
return 0;
@@ -228,7 +228,7 @@ static struct ncsi_cmd_handler {
{ NCSI_PKT_CMD_AE, 8, ncsi_cmd_handler_ae },
{ NCSI_PKT_CMD_SL, 8, ncsi_cmd_handler_sl },
{ NCSI_PKT_CMD_GLS, 0, ncsi_cmd_handler_default },
- { NCSI_PKT_CMD_SVF, 4, ncsi_cmd_handler_svf },
+ { NCSI_PKT_CMD_SVF, 8, ncsi_cmd_handler_svf },
{ NCSI_PKT_CMD_EV, 4, ncsi_cmd_handler_ev },
{ NCSI_PKT_CMD_DV, 0, ncsi_cmd_handler_default },
{ NCSI_PKT_CMD_SMA, 8, ncsi_cmd_handler_sma },
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index a3bd5fa8ad09..a2b904a718c6 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -38,6 +38,25 @@ static inline int ncsi_filter_size(int table)
return sizes[table];
}
+static u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
+{
+ struct ncsi_channel_filter *ncf;
+ int size;
+
+ ncf = nc->filters[table];
+ if (!ncf)
+ return NULL;
+
+ size = ncsi_filter_size(table);
+ if (size < 0)
+ return NULL;
+
+ return ncf->data + size * index;
+}
+
+/* Find the first active filter in a filter table that matches the given
+ * data parameter. If data is NULL, this returns the first active filter.
+ */
int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data)
{
struct ncsi_channel_filter *ncf;
@@ -58,7 +77,7 @@ int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data)
index = -1;
while ((index = find_next_bit(bitmap, ncf->total, index + 1))
< ncf->total) {
- if (!memcmp(ncf->data + size * index, data, size)) {
+ if (!data || !memcmp(ncf->data + size * index, data, size)) {
spin_unlock_irqrestore(&nc->lock, flags);
return index;
}
@@ -170,6 +189,7 @@ static void ncsi_channel_monitor(unsigned long data)
struct ncsi_channel *nc = (struct ncsi_channel *)data;
struct ncsi_package *np = nc->package;
struct ncsi_dev_priv *ndp = np->ndp;
+ struct ncsi_channel_mode *ncm;
struct ncsi_cmd_arg nca;
bool enabled, chained;
unsigned int monitor_state;
@@ -183,11 +203,15 @@ static void ncsi_channel_monitor(unsigned long data)
monitor_state = nc->monitor.state;
spin_unlock_irqrestore(&nc->lock, flags);
- if (!enabled || chained)
+ if (!enabled || chained) {
+ ncsi_stop_channel_monitor(nc);
return;
+ }
if (state != NCSI_CHANNEL_INACTIVE &&
- state != NCSI_CHANNEL_ACTIVE)
+ state != NCSI_CHANNEL_ACTIVE) {
+ ncsi_stop_channel_monitor(nc);
return;
+ }
switch (monitor_state) {
case NCSI_CHANNEL_MONITOR_START:
@@ -198,28 +222,30 @@ static void ncsi_channel_monitor(unsigned long data)
nca.type = NCSI_PKT_CMD_GLS;
nca.req_flags = 0;
ret = ncsi_xmit_cmd(&nca);
- if (ret) {
+ if (ret)
netdev_err(ndp->ndev.dev, "Error %d sending GLS\n",
ret);
- return;
- }
-
break;
case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX:
break;
default:
- if (!(ndp->flags & NCSI_DEV_HWA) &&
- state == NCSI_CHANNEL_ACTIVE) {
+ netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n",
+ nc->id);
+ if (!(ndp->flags & NCSI_DEV_HWA)) {
ncsi_report_link(ndp, true);
ndp->flags |= NCSI_DEV_RESHUFFLE;
}
+ ncsi_stop_channel_monitor(nc);
+
+ ncm = &nc->modes[NCSI_MODE_LINK];
spin_lock_irqsave(&nc->lock, flags);
nc->state = NCSI_CHANNEL_INVISIBLE;
+ ncm->data[2] &= ~0x1;
spin_unlock_irqrestore(&nc->lock, flags);
spin_lock_irqsave(&ndp->lock, flags);
- nc->state = NCSI_CHANNEL_INACTIVE;
+ nc->state = NCSI_CHANNEL_ACTIVE;
list_add_tail_rcu(&nc->link, &ndp->channel_queue);
spin_unlock_irqrestore(&ndp->lock, flags);
ncsi_process_next_channel(ndp);
@@ -639,6 +665,99 @@ error:
nd->state = ncsi_dev_state_functional;
}
+/* Check the VLAN filter bitmap for a set filter, and construct a
+ * "Set VLAN Filter - Disable" packet if found.
+ */
+static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
+ struct ncsi_cmd_arg *nca)
+{
+ int index;
+ u32 *data;
+ u16 vid;
+
+ index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, NULL);
+ if (index < 0) {
+ /* Filter table empty */
+ return -1;
+ }
+
+ data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index);
+ if (!data) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: failed to retrieve filter %d\n", index);
+ /* Set the VLAN id to 0 - this will still disable the entry in
+ * the filter table, but we won't know what it was.
+ */
+ vid = 0;
+ } else {
+ vid = *(u16 *)data;
+ }
+
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "NCSI: removed vlan tag %u at index %d\n",
+ vid, index + 1);
+ ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index);
+
+ nca->type = NCSI_PKT_CMD_SVF;
+ nca->words[1] = vid;
+ /* HW filter index starts at 1 */
+ nca->bytes[6] = index + 1;
+ nca->bytes[7] = 0x00;
+ return 0;
+}
+
+/* Find an outstanding VLAN tag and constuct a "Set VLAN Filter - Enable"
+ * packet.
+ */
+static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
+ struct ncsi_cmd_arg *nca)
+{
+ struct vlan_vid *vlan = NULL;
+ int index = 0;
+
+ list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) {
+ index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, &vlan->vid);
+ if (index < 0) {
+ /* New tag to add */
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "NCSI: new vlan id to set: %u\n",
+ vlan->vid);
+ break;
+ }
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "vid %u already at filter pos %d\n",
+ vlan->vid, index);
+ }
+
+ if (!vlan || index >= 0) {
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "no vlan ids left to set\n");
+ return -1;
+ }
+
+ index = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan->vid);
+ if (index < 0) {
+ netdev_err(ndp->ndev.dev,
+ "Failed to add new VLAN tag, error %d\n", index);
+ if (index == -ENOSPC)
+ netdev_err(ndp->ndev.dev,
+ "Channel %u already has all VLAN filters set\n",
+ nc->id);
+ return -1;
+ }
+
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "NCSI: set vid %u in packet, index %u\n",
+ vlan->vid, index + 1);
+ nca->type = NCSI_PKT_CMD_SVF;
+ nca->words[1] = vlan->vid;
+ /* HW filter index starts at 1 */
+ nca->bytes[6] = index + 1;
+ nca->bytes[7] = 0x01;
+
+ return 0;
+}
+
static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
{
struct ncsi_dev *nd = &ndp->ndev;
@@ -667,8 +786,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
nca.package = np->id;
nca.channel = NCSI_RESERVED_CHANNEL;
ret = ncsi_xmit_cmd(&nca);
- if (ret)
+ if (ret) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Failed to transmit CMD_SP\n");
goto error;
+ }
nd->state = ncsi_dev_state_config_cis;
break;
@@ -680,11 +802,17 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
nca.package = np->id;
nca.channel = nc->id;
ret = ncsi_xmit_cmd(&nca);
- if (ret)
+ if (ret) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Failed to transmit CMD_CIS\n");
goto error;
+ }
- nd->state = ncsi_dev_state_config_sma;
+ nd->state = ncsi_dev_state_config_clear_vids;
break;
+ case ncsi_dev_state_config_clear_vids:
+ case ncsi_dev_state_config_svf:
+ case ncsi_dev_state_config_ev:
case ncsi_dev_state_config_sma:
case ncsi_dev_state_config_ebf:
#if IS_ENABLED(CONFIG_IPV6)
@@ -699,11 +827,40 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
nca.package = np->id;
nca.channel = nc->id;
+ /* Clear any active filters on the channel before setting */
+ if (nd->state == ncsi_dev_state_config_clear_vids) {
+ ret = clear_one_vid(ndp, nc, &nca);
+ if (ret) {
+ nd->state = ncsi_dev_state_config_svf;
+ schedule_work(&ndp->work);
+ break;
+ }
+ /* Repeat */
+ nd->state = ncsi_dev_state_config_clear_vids;
+ /* Add known VLAN tags to the filter */
+ } else if (nd->state == ncsi_dev_state_config_svf) {
+ ret = set_one_vid(ndp, nc, &nca);
+ if (ret) {
+ nd->state = ncsi_dev_state_config_ev;
+ schedule_work(&ndp->work);
+ break;
+ }
+ /* Repeat */
+ nd->state = ncsi_dev_state_config_svf;
+ /* Enable/Disable the VLAN filter */
+ } else if (nd->state == ncsi_dev_state_config_ev) {
+ if (list_empty(&ndp->vlan_vids)) {
+ nca.type = NCSI_PKT_CMD_DV;
+ } else {
+ nca.type = NCSI_PKT_CMD_EV;
+ nca.bytes[3] = NCSI_CAP_VLAN_NO;
+ }
+ nd->state = ncsi_dev_state_config_sma;
+ } else if (nd->state == ncsi_dev_state_config_sma) {
/* Use first entry in unicast filter table. Note that
* the MAC filter table starts from entry 1 instead of
* 0.
*/
- if (nd->state == ncsi_dev_state_config_sma) {
nca.type = NCSI_PKT_CMD_SMA;
for (index = 0; index < 6; index++)
nca.bytes[index] = dev->dev_addr[index];
@@ -746,17 +903,45 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
}
ret = ncsi_xmit_cmd(&nca);
- if (ret)
+ if (ret) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Failed to transmit CMD %x\n",
+ nca.type);
goto error;
+ }
break;
case ncsi_dev_state_config_done:
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "NCSI: channel %u config done\n", nc->id);
spin_lock_irqsave(&nc->lock, flags);
+ if (nc->reconfigure_needed) {
+ /* This channel's configuration has been updated
+ * part-way during the config state - start the
+ * channel configuration over
+ */
+ nc->reconfigure_needed = false;
+ nc->state = NCSI_CHANNEL_INACTIVE;
+ spin_unlock_irqrestore(&nc->lock, flags);
+
+ spin_lock_irqsave(&ndp->lock, flags);
+ list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ netdev_printk(KERN_DEBUG, dev,
+ "Dirty NCSI channel state reset\n");
+ ncsi_process_next_channel(ndp);
+ break;
+ }
+
if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
hot_nc = nc;
nc->state = NCSI_CHANNEL_ACTIVE;
} else {
hot_nc = NULL;
nc->state = NCSI_CHANNEL_INACTIVE;
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: channel %u link down after config\n",
+ nc->id);
}
spin_unlock_irqrestore(&nc->lock, flags);
@@ -769,8 +954,8 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
ncsi_process_next_channel(ndp);
break;
default:
- netdev_warn(dev, "Wrong NCSI state 0x%x in config\n",
- nd->state);
+ netdev_alert(dev, "Wrong NCSI state 0x%x in config\n",
+ nd->state);
}
return;
@@ -822,10 +1007,17 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
}
if (!found) {
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: No channel found with link\n");
ncsi_report_link(ndp, true);
return -ENODEV;
}
+ ncm = &found->modes[NCSI_MODE_LINK];
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "NCSI: Channel %u added to queue (link %s)\n",
+ found->id, ncm->data[2] & 0x1 ? "up" : "down");
+
out:
spin_lock_irqsave(&ndp->lock, flags);
list_add_tail_rcu(&found->link, &ndp->channel_queue);
@@ -839,12 +1031,15 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
struct ncsi_package *np;
struct ncsi_channel *nc;
unsigned int cap;
+ bool has_channel = false;
/* The hardware arbitration is disabled if any one channel
* doesn't support explicitly.
*/
NCSI_FOR_EACH_PACKAGE(ndp, np) {
NCSI_FOR_EACH_CHANNEL(np, nc) {
+ has_channel = true;
+
cap = nc->caps[NCSI_CAP_GENERIC].cap;
if (!(cap & NCSI_CAP_GENERIC_HWA) ||
(cap & NCSI_CAP_GENERIC_HWA_MASK) !=
@@ -855,8 +1050,13 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
}
}
- ndp->flags |= NCSI_DEV_HWA;
- return true;
+ if (has_channel) {
+ ndp->flags |= NCSI_DEV_HWA;
+ return true;
+ }
+
+ ndp->flags &= ~NCSI_DEV_HWA;
+ return false;
}
static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
@@ -879,6 +1079,8 @@ static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
/* We can have no channels in extremely case */
if (list_empty(&ndp->channel_queue)) {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: No available channels for HWA\n");
ncsi_report_link(ndp, false);
return -ENOENT;
}
@@ -1047,6 +1249,9 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
return;
error:
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Failed to transmit cmd 0x%x during probe\n",
+ nca.type);
ncsi_report_link(ndp, true);
}
@@ -1100,10 +1305,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp)
switch (old_state) {
case NCSI_CHANNEL_INACTIVE:
ndp->ndev.state = ncsi_dev_state_config;
+ netdev_info(ndp->ndev.dev, "NCSI: configuring channel %u\n",
+ nc->id);
ncsi_configure_channel(ndp);
break;
case NCSI_CHANNEL_ACTIVE:
ndp->ndev.state = ncsi_dev_state_suspend;
+ netdev_info(ndp->ndev.dev, "NCSI: suspending channel %u\n",
+ nc->id);
ncsi_suspend_channel(ndp);
break;
default:
@@ -1123,6 +1332,8 @@ out:
return ncsi_choose_active_channel(ndp);
}
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+ "NCSI: No more channels to process\n");
ncsi_report_link(ndp, false);
return -ENODEV;
}
@@ -1191,6 +1402,147 @@ static struct notifier_block ncsi_inet6addr_notifier = {
};
#endif /* CONFIG_IPV6 */
+static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
+{
+ struct ncsi_dev *nd = &ndp->ndev;
+ struct ncsi_channel *nc;
+ struct ncsi_package *np;
+ unsigned long flags;
+ unsigned int n = 0;
+
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ spin_lock_irqsave(&nc->lock, flags);
+
+ /* Channels may be busy, mark dirty instead of
+ * kicking if;
+ * a) not ACTIVE (configured)
+ * b) in the channel_queue (to be configured)
+ * c) it's ndev is in the config state
+ */
+ if (nc->state != NCSI_CHANNEL_ACTIVE) {
+ if ((ndp->ndev.state & 0xff00) ==
+ ncsi_dev_state_config ||
+ !list_empty(&nc->link)) {
+ netdev_printk(KERN_DEBUG, nd->dev,
+ "NCSI: channel %p marked dirty\n",
+ nc);
+ nc->reconfigure_needed = true;
+ }
+ spin_unlock_irqrestore(&nc->lock, flags);
+ continue;
+ }
+
+ spin_unlock_irqrestore(&nc->lock, flags);
+
+ ncsi_stop_channel_monitor(nc);
+ spin_lock_irqsave(&nc->lock, flags);
+ nc->state = NCSI_CHANNEL_INACTIVE;
+ spin_unlock_irqrestore(&nc->lock, flags);
+
+ spin_lock_irqsave(&ndp->lock, flags);
+ list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ netdev_printk(KERN_DEBUG, nd->dev,
+ "NCSI: kicked channel %p\n", nc);
+ n++;
+ }
+ }
+
+ return n;
+}
+
+int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct ncsi_dev_priv *ndp;
+ unsigned int n_vids = 0;
+ struct vlan_vid *vlan;
+ struct ncsi_dev *nd;
+ bool found = false;
+
+ if (vid == 0)
+ return 0;
+
+ nd = ncsi_find_dev(dev);
+ if (!nd) {
+ netdev_warn(dev, "NCSI: No net_device?\n");
+ return 0;
+ }
+
+ ndp = TO_NCSI_DEV_PRIV(nd);
+
+ /* Add the VLAN id to our internal list */
+ list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) {
+ n_vids++;
+ if (vlan->vid == vid) {
+ netdev_printk(KERN_DEBUG, dev,
+ "NCSI: vid %u already registered\n", vid);
+ return 0;
+ }
+ }
+ if (n_vids >= NCSI_MAX_VLAN_VIDS) {
+ netdev_warn(dev,
+ "tried to add vlan id %u but NCSI max already registered (%u)\n",
+ vid, NCSI_MAX_VLAN_VIDS);
+ return -ENOSPC;
+ }
+
+ vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+ if (!vlan)
+ return -ENOMEM;
+
+ vlan->proto = proto;
+ vlan->vid = vid;
+ list_add_rcu(&vlan->list, &ndp->vlan_vids);
+
+ netdev_printk(KERN_DEBUG, dev, "NCSI: Added new vid %u\n", vid);
+
+ found = ncsi_kick_channels(ndp) != 0;
+
+ return found ? ncsi_process_next_channel(ndp) : 0;
+}
+EXPORT_SYMBOL_GPL(ncsi_vlan_rx_add_vid);
+
+int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct vlan_vid *vlan, *tmp;
+ struct ncsi_dev_priv *ndp;
+ struct ncsi_dev *nd;
+ bool found = false;
+
+ if (vid == 0)
+ return 0;
+
+ nd = ncsi_find_dev(dev);
+ if (!nd) {
+ netdev_warn(dev, "NCSI: no net_device?\n");
+ return 0;
+ }
+
+ ndp = TO_NCSI_DEV_PRIV(nd);
+
+ /* Remove the VLAN id from our internal list */
+ list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list)
+ if (vlan->vid == vid) {
+ netdev_printk(KERN_DEBUG, dev,
+ "NCSI: vid %u found, removing\n", vid);
+ list_del_rcu(&vlan->list);
+ found = true;
+ kfree(vlan);
+ }
+
+ if (!found) {
+ netdev_err(dev, "NCSI: vid %u wasn't registered!\n", vid);
+ return -EINVAL;
+ }
+
+ found = ncsi_kick_channels(ndp) != 0;
+
+ return found ? ncsi_process_next_channel(ndp) : 0;
+}
+EXPORT_SYMBOL_GPL(ncsi_vlan_rx_kill_vid);
+
struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
void (*handler)(struct ncsi_dev *ndev))
{
@@ -1215,6 +1567,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
nd->handler = handler;
ndp->pending_req_num = 0;
INIT_LIST_HEAD(&ndp->channel_queue);
+ INIT_LIST_HEAD(&ndp->vlan_vids);
INIT_WORK(&ndp->work, ncsi_dev_work);
/* Initialize private NCSI device */
@@ -1263,10 +1616,12 @@ int ncsi_start_dev(struct ncsi_dev *nd)
return 0;
}
- if (ndp->flags & NCSI_DEV_HWA)
+ if (ndp->flags & NCSI_DEV_HWA) {
+ netdev_info(ndp->ndev.dev, "NCSI: Enabling HWA mode\n");
ret = ncsi_enable_hwa(ndp);
- else
+ } else {
ret = ncsi_choose_active_channel(ndp);
+ }
return ret;
}
@@ -1297,6 +1652,7 @@ void ncsi_stop_dev(struct ncsi_dev *nd)
}
}
+ netdev_printk(KERN_DEBUG, ndp->ndev.dev, "NCSI: Stopping device\n");
ncsi_report_link(ndp, true);
}
EXPORT_SYMBOL_GPL(ncsi_stop_dev);
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index 3ea49ed0a935..91b4b66438df 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -104,7 +104,7 @@ struct ncsi_cmd_svf_pkt {
unsigned char index; /* VLAN table index */
unsigned char enable; /* Enable or disable */
__be32 checksum; /* Checksum */
- unsigned char pad[14];
+ unsigned char pad[18];
};
/* Enable VLAN */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 087db775b3dc..efd933ff5570 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -146,7 +146,7 @@ static int ncsi_rsp_handler_ec(struct ncsi_request *nr)
ncm = &nc->modes[NCSI_MODE_ENABLE];
if (ncm->enable)
- return -EBUSY;
+ return 0;
ncm->enable = 1;
return 0;
@@ -173,7 +173,7 @@ static int ncsi_rsp_handler_dc(struct ncsi_request *nr)
ncm = &nc->modes[NCSI_MODE_ENABLE];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
ncm->enable = 0;
return 0;
@@ -217,7 +217,7 @@ static int ncsi_rsp_handler_ecnt(struct ncsi_request *nr)
ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
if (ncm->enable)
- return -EBUSY;
+ return 0;
ncm->enable = 1;
return 0;
@@ -239,7 +239,7 @@ static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr)
ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
ncm->enable = 1;
return 0;
@@ -263,7 +263,7 @@ static int ncsi_rsp_handler_ae(struct ncsi_request *nr)
/* Check if the AEN has been enabled */
ncm = &nc->modes[NCSI_MODE_AEN];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to AEN configuration */
cmd = (struct ncsi_cmd_ae_pkt *)skb_network_header(nr->cmd);
@@ -354,7 +354,8 @@ static int ncsi_rsp_handler_svf(struct ncsi_request *nr)
/* Add or remove the VLAN filter */
if (!(cmd->enable & 0x1)) {
- ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index);
+ /* HW indexes from 1 */
+ ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index - 1);
} else {
vlan = ntohs(cmd->vlan);
ret = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan);
@@ -381,7 +382,7 @@ static int ncsi_rsp_handler_ev(struct ncsi_request *nr)
/* Check if VLAN mode has been enabled */
ncm = &nc->modes[NCSI_MODE_VLAN];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to VLAN mode */
cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd);
@@ -408,7 +409,7 @@ static int ncsi_rsp_handler_dv(struct ncsi_request *nr)
/* Check if VLAN mode has been enabled */
ncm = &nc->modes[NCSI_MODE_VLAN];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to VLAN mode */
ncm->enable = 0;
@@ -454,13 +455,10 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
bitmap = &ncf->bitmap;
if (cmd->at_e & 0x1) {
- if (test_and_set_bit(cmd->index, bitmap))
- return -EBUSY;
+ set_bit(cmd->index, bitmap);
memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6);
} else {
- if (!test_and_clear_bit(cmd->index, bitmap))
- return -EBUSY;
-
+ clear_bit(cmd->index, bitmap);
memset(ncf->data + 6 * cmd->index, 0, 6);
}
@@ -484,7 +482,7 @@ static int ncsi_rsp_handler_ebf(struct ncsi_request *nr)
/* Check if broadcast filter has been enabled */
ncm = &nc->modes[NCSI_MODE_BC];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to broadcast filter mode */
cmd = (struct ncsi_cmd_ebf_pkt *)skb_network_header(nr->cmd);
@@ -510,7 +508,7 @@ static int ncsi_rsp_handler_dbf(struct ncsi_request *nr)
/* Check if broadcast filter isn't enabled */
ncm = &nc->modes[NCSI_MODE_BC];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to broadcast filter mode */
ncm->enable = 0;
@@ -537,7 +535,7 @@ static int ncsi_rsp_handler_egmf(struct ncsi_request *nr)
/* Check if multicast filter has been enabled */
ncm = &nc->modes[NCSI_MODE_MC];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to multicast filter mode */
cmd = (struct ncsi_cmd_egmf_pkt *)skb_network_header(nr->cmd);
@@ -563,7 +561,7 @@ static int ncsi_rsp_handler_dgmf(struct ncsi_request *nr)
/* Check if multicast filter has been enabled */
ncm = &nc->modes[NCSI_MODE_MC];
if (!ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to multicast filter mode */
ncm->enable = 0;
@@ -590,7 +588,7 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
/* Check if flow control has been enabled */
ncm = &nc->modes[NCSI_MODE_FC];
if (ncm->enable)
- return -EBUSY;
+ return 0;
/* Update to flow control mode */
cmd = (struct ncsi_cmd_snfc_pkt *)skb_network_header(nr->cmd);
@@ -693,7 +691,14 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
ncf->index = i;
ncf->total = cnt;
- ncf->bitmap = 0x0ul;
+ if (i == NCSI_FILTER_VLAN) {
+ /* Set VLAN filters active so they are cleared in
+ * first configuration state
+ */
+ ncf->bitmap = U64_MAX;
+ } else {
+ ncf->bitmap = 0x0ul;
+ }
nc->filters[i] = ncf;
}
@@ -951,7 +956,7 @@ static struct ncsi_rsp_handler {
{ NCSI_PKT_RSP_EGMF, 4, ncsi_rsp_handler_egmf },
{ NCSI_PKT_RSP_DGMF, 4, ncsi_rsp_handler_dgmf },
{ NCSI_PKT_RSP_SNFC, 4, ncsi_rsp_handler_snfc },
- { NCSI_PKT_RSP_GVI, 36, ncsi_rsp_handler_gvi },
+ { NCSI_PKT_RSP_GVI, 40, ncsi_rsp_handler_gvi },
{ NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc },
{ NCSI_PKT_RSP_GP, -1, ncsi_rsp_handler_gp },
{ NCSI_PKT_RSP_GCPS, 172, ncsi_rsp_handler_gcps },
@@ -1024,11 +1029,19 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
if (payload < 0)
payload = ntohs(hdr->length);
ret = ncsi_validate_rsp_pkt(nr, payload);
- if (ret)
+ if (ret) {
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: 'bad' packet ignored for type 0x%x\n",
+ hdr->type);
goto out;
+ }
/* Process the packet */
ret = nrh->handler(nr);
+ if (ret)
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Handler for packet type 0x%x returned %d\n",
+ hdr->type, ret);
out:
ncsi_free_request(nr);
return ret;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9b28864cc36a..e4a13cc8a2e7 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -636,6 +636,15 @@ config NFT_FWD_NETDEV
help
This option enables packet forwarding for the "netdev" family.
+config NFT_FIB_NETDEV
+ depends on NFT_FIB_IPV4
+ depends on NFT_FIB_IPV6
+ tristate "Netfilter nf_tables netdev fib lookups support"
+ help
+ This option allows using the FIB expression from the netdev table.
+ The lookup will be delegated to the IPv4 or IPv6 FIB depending
+ on the protocol of the packet.
+
endif # NF_TABLES_NETDEV
endif # NF_TABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 913380919301..f78ed2470831 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
@@ -100,6 +101,7 @@ obj-$(CONFIG_NFT_REDIR) += nft_redir.o
obj-$(CONFIG_NFT_HASH) += nft_hash.o
obj-$(CONFIG_NFT_FIB) += nft_fib.o
obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o
+obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 974cf2a3795a..52cd2901a097 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -21,7 +21,7 @@
#include <linux/inetdevice.h>
#include <linux/proc_fs.h>
#include <linux/mutex.h>
-#include <linux/slab.h>
+#include <linux/mm.h>
#include <linux/rcupdate.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -62,10 +62,182 @@ EXPORT_SYMBOL(nf_hooks_needed);
#endif
static DEFINE_MUTEX(nf_hook_mutex);
+
+/* max hooks per family/hooknum */
+#define MAX_HOOK_COUNT 1024
+
#define nf_entry_dereference(e) \
rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
-static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
+static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
+{
+ struct nf_hook_entries *e;
+ size_t alloc = sizeof(*e) +
+ sizeof(struct nf_hook_entry) * num +
+ sizeof(struct nf_hook_ops *) * num;
+
+ if (num == 0)
+ return NULL;
+
+ e = kvzalloc(alloc, GFP_KERNEL);
+ if (e)
+ e->num_hook_entries = num;
+ return e;
+}
+
+static unsigned int accept_all(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */
+}
+
+static const struct nf_hook_ops dummy_ops = {
+ .hook = accept_all,
+ .priority = INT_MIN,
+};
+
+static struct nf_hook_entries *
+nf_hook_entries_grow(const struct nf_hook_entries *old,
+ const struct nf_hook_ops *reg)
+{
+ unsigned int i, alloc_entries, nhooks, old_entries;
+ struct nf_hook_ops **orig_ops = NULL;
+ struct nf_hook_ops **new_ops;
+ struct nf_hook_entries *new;
+ bool inserted = false;
+
+ alloc_entries = 1;
+ old_entries = old ? old->num_hook_entries : 0;
+
+ if (old) {
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+
+ for (i = 0; i < old_entries; i++) {
+ if (orig_ops[i] != &dummy_ops)
+ alloc_entries++;
+ }
+ }
+
+ if (alloc_entries > MAX_HOOK_COUNT)
+ return ERR_PTR(-E2BIG);
+
+ new = allocate_hook_entries_size(alloc_entries);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+
+ new_ops = nf_hook_entries_get_hook_ops(new);
+
+ i = 0;
+ nhooks = 0;
+ while (i < old_entries) {
+ if (orig_ops[i] == &dummy_ops) {
+ ++i;
+ continue;
+ }
+ if (inserted || reg->priority > orig_ops[i]->priority) {
+ new_ops[nhooks] = (void *)orig_ops[i];
+ new->hooks[nhooks] = old->hooks[i];
+ i++;
+ } else {
+ new_ops[nhooks] = (void *)reg;
+ new->hooks[nhooks].hook = reg->hook;
+ new->hooks[nhooks].priv = reg->priv;
+ inserted = true;
+ }
+ nhooks++;
+ }
+
+ if (!inserted) {
+ new_ops[nhooks] = (void *)reg;
+ new->hooks[nhooks].hook = reg->hook;
+ new->hooks[nhooks].priv = reg->priv;
+ }
+
+ return new;
+}
+
+static void hooks_validate(const struct nf_hook_entries *hooks)
+{
+#ifdef CONFIG_DEBUG_KERNEL
+ struct nf_hook_ops **orig_ops;
+ int prio = INT_MIN;
+ size_t i = 0;
+
+ orig_ops = nf_hook_entries_get_hook_ops(hooks);
+
+ for (i = 0; i < hooks->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ continue;
+
+ WARN_ON(orig_ops[i]->priority < prio);
+
+ if (orig_ops[i]->priority > prio)
+ prio = orig_ops[i]->priority;
+ }
+#endif
+}
+
+/*
+ * __nf_hook_entries_try_shrink - try to shrink hook array
+ *
+ * @pp -- location of hook blob
+ *
+ * Hook unregistration must always succeed, so to-be-removed hooks
+ * are replaced by a dummy one that will just move to next hook.
+ *
+ * This counts the current dummy hooks, attempts to allocate new blob,
+ * copies the live hooks, then replaces and discards old one.
+ *
+ * return values:
+ *
+ * Returns address to free, or NULL.
+ */
+static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp)
+{
+ struct nf_hook_entries *old, *new = NULL;
+ unsigned int i, j, skip = 0, hook_entries;
+ struct nf_hook_ops **orig_ops;
+ struct nf_hook_ops **new_ops;
+
+ old = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!old))
+ return NULL;
+
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+ for (i = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ skip++;
+ }
+
+ /* if skip == hook_entries all hooks have been removed */
+ hook_entries = old->num_hook_entries;
+ if (skip == hook_entries)
+ goto out_assign;
+
+ if (skip == 0)
+ return NULL;
+
+ hook_entries -= skip;
+ new = allocate_hook_entries_size(hook_entries);
+ if (!new)
+ return NULL;
+
+ new_ops = nf_hook_entries_get_hook_ops(new);
+ for (i = 0, j = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] == &dummy_ops)
+ continue;
+ new->hooks[j] = old->hooks[i];
+ new_ops[j] = (void *)orig_ops[i];
+ j++;
+ }
+ hooks_validate(new);
+out_assign:
+ rcu_assign_pointer(*pp, new);
+ return old;
+}
+
+static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
{
if (reg->pf != NFPROTO_NETDEV)
return net->nf.hooks[reg->pf]+reg->hooknum;
@@ -76,13 +248,14 @@ static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const st
return &reg->dev->nf_hooks_ingress;
}
#endif
+ WARN_ON_ONCE(1);
return NULL;
}
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry __rcu **pp;
- struct nf_hook_entry *entry, *p;
+ struct nf_hook_entries *p, *new_hooks;
+ struct nf_hook_entries __rcu **pp;
if (reg->pf == NFPROTO_NETDEV) {
#ifndef CONFIG_NETFILTER_INGRESS
@@ -98,23 +271,19 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
if (!pp)
return -EINVAL;
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- return -ENOMEM;
-
- nf_hook_entry_init(entry, reg);
-
mutex_lock(&nf_hook_mutex);
- /* Find the spot in the list */
- for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
- if (reg->priority < nf_hook_entry_priority(p))
- break;
- }
- rcu_assign_pointer(entry->next, p);
- rcu_assign_pointer(*pp, entry);
+ p = nf_entry_dereference(*pp);
+ new_hooks = nf_hook_entries_grow(p, reg);
+
+ if (!IS_ERR(new_hooks))
+ rcu_assign_pointer(*pp, new_hooks);
mutex_unlock(&nf_hook_mutex);
+ if (IS_ERR(new_hooks))
+ return PTR_ERR(new_hooks);
+
+ hooks_validate(new_hooks);
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_inc_ingress_queue();
@@ -122,48 +291,74 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
#ifdef HAVE_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
+ synchronize_net();
+ BUG_ON(p == new_hooks);
+ kvfree(p);
return 0;
}
EXPORT_SYMBOL(nf_register_net_hook);
-static struct nf_hook_entry *
-__nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+/*
+ * __nf_unregister_net_hook - remove a hook from blob
+ *
+ * @oldp: current address of hook blob
+ * @unreg: hook to unregister
+ *
+ * This cannot fail, hook unregistration must always succeed.
+ * Therefore replace the to-be-removed hook with a dummy hook.
+ */
+static void __nf_unregister_net_hook(struct nf_hook_entries *old,
+ const struct nf_hook_ops *unreg)
{
- struct nf_hook_entry __rcu **pp;
- struct nf_hook_entry *p;
-
- pp = nf_hook_entry_head(net, reg);
- if (WARN_ON_ONCE(!pp))
- return NULL;
+ struct nf_hook_ops **orig_ops;
+ bool found = false;
+ unsigned int i;
- mutex_lock(&nf_hook_mutex);
- for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
- if (nf_hook_entry_ops(p) == reg) {
- rcu_assign_pointer(*pp, p->next);
- break;
- }
- }
- mutex_unlock(&nf_hook_mutex);
- if (!p) {
- WARN(1, "nf_unregister_net_hook: hook not found!\n");
- return NULL;
+ orig_ops = nf_hook_entries_get_hook_ops(old);
+ for (i = 0; i < old->num_hook_entries; i++) {
+ if (orig_ops[i] != unreg)
+ continue;
+ WRITE_ONCE(old->hooks[i].hook, accept_all);
+ WRITE_ONCE(orig_ops[i], &dummy_ops);
+ found = true;
+ break;
}
+
+ if (found) {
#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
- net_dec_ingress_queue();
+ if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
+ net_dec_ingress_queue();
#endif
#ifdef HAVE_JUMP_LABEL
- static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
+ static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]);
#endif
-
- return p;
+ } else {
+ WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum);
+ }
}
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg);
+ struct nf_hook_entries __rcu **pp;
+ struct nf_hook_entries *p;
unsigned int nfq;
+ pp = nf_hook_entry_head(net, reg);
+ if (!pp)
+ return;
+
+ mutex_lock(&nf_hook_mutex);
+
+ p = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!p)) {
+ mutex_unlock(&nf_hook_mutex);
+ return;
+ }
+
+ __nf_unregister_net_hook(p, reg);
+
+ p = __nf_hook_entries_try_shrink(pp);
+ mutex_unlock(&nf_hook_mutex);
if (!p)
return;
@@ -173,7 +368,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
nfq = nf_queue_nf_hook_drop(net);
if (nfq)
synchronize_net();
- kfree(p);
+ kvfree(p);
}
EXPORT_SYMBOL(nf_unregister_net_hook);
@@ -200,26 +395,59 @@ EXPORT_SYMBOL(nf_register_net_hooks);
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
unsigned int hookcount)
{
- struct nf_hook_entry *to_free[16];
- unsigned int i, n, nfq;
+ struct nf_hook_entries *to_free[16], *p;
+ struct nf_hook_entries __rcu **pp;
+ unsigned int i, j, n;
+
+ mutex_lock(&nf_hook_mutex);
+ for (i = 0; i < hookcount; i++) {
+ pp = nf_hook_entry_head(net, &reg[i]);
+ if (!pp)
+ continue;
+
+ p = nf_entry_dereference(*pp);
+ if (WARN_ON_ONCE(!p))
+ continue;
+ __nf_unregister_net_hook(p, &reg[i]);
+ }
+ mutex_unlock(&nf_hook_mutex);
do {
n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
- for (i = 0; i < n; i++)
- to_free[i] = __nf_unregister_net_hook(net, &reg[i]);
+ mutex_lock(&nf_hook_mutex);
- synchronize_net();
+ for (i = 0, j = 0; i < hookcount && j < n; i++) {
+ pp = nf_hook_entry_head(net, &reg[i]);
+ if (!pp)
+ continue;
+
+ p = nf_entry_dereference(*pp);
+ if (!p)
+ continue;
+
+ to_free[j] = __nf_hook_entries_try_shrink(pp);
+ if (to_free[j])
+ ++j;
+ }
+
+ mutex_unlock(&nf_hook_mutex);
+
+ if (j) {
+ unsigned int nfq;
- /* need 2nd synchronize_net() if nfqueue is used, skb
- * can get reinjected right before nf_queue_hook_drop()
- */
- nfq = nf_queue_nf_hook_drop(net);
- if (nfq)
synchronize_net();
- for (i = 0; i < n; i++)
- kfree(to_free[i]);
+ /* need 2nd synchronize_net() if nfqueue is used, skb
+ * can get reinjected right before nf_queue_hook_drop()
+ */
+ nfq = nf_queue_nf_hook_drop(net);
+ if (nfq)
+ synchronize_net();
+
+ for (i = 0; i < j; i++)
+ kvfree(to_free[i]);
+ }
reg += n;
hookcount -= n;
@@ -230,16 +458,15 @@ EXPORT_SYMBOL(nf_unregister_net_hooks);
/* Returns 1 if okfn() needs to be executed by the caller,
* -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry *entry)
+ const struct nf_hook_entries *e, unsigned int s)
{
unsigned int verdict;
int ret;
- do {
- verdict = nf_hook_entry_hookfn(entry, skb, state);
+ for (; s < e->num_hook_entries; s++) {
+ verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
- entry = rcu_dereference(entry->next);
break;
case NF_DROP:
kfree_skb(skb);
@@ -248,8 +475,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
ret = -EPERM;
return ret;
case NF_QUEUE:
- ret = nf_queue(skb, state, &entry, verdict);
- if (ret == 1 && entry)
+ ret = nf_queue(skb, state, e, s, verdict);
+ if (ret == 1)
continue;
return ret;
default:
@@ -258,7 +485,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
*/
return 0;
}
- } while (entry);
+ }
return 1;
}
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
index 28ec148df02d..a445a6bf4f11 100644
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the ipset modules
#
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 8ad2b52a0b32..5ca18f07683b 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -37,11 +37,11 @@
#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
static void
-mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
{
struct mtype *map = set->data;
- setup_timer(&map->gc, gc, (unsigned long)set);
+ timer_setup(&map->gc, gc, 0);
mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
}
@@ -272,10 +272,10 @@ out:
}
static void
-mtype_gc(unsigned long ul_set)
+mtype_gc(struct timer_list *t)
{
- struct ip_set *set = (struct ip_set *)ul_set;
- struct mtype *map = set->data;
+ struct mtype *map = from_timer(map, t, gc);
+ struct ip_set *set = map->set;
void *x;
u32 id;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 4783efff0bde..d8975a0b4282 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -48,6 +48,7 @@ struct bitmap_ip {
size_t memsize; /* members size */
u8 netmask; /* subnet netmask */
struct timer_list gc; /* garbage collection */
+ struct ip_set *set; /* attached to this ip_set */
unsigned char extensions[0] /* data extensions */
__aligned(__alignof__(u64));
};
@@ -232,6 +233,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
map->netmask = netmask;
set->timeout = IPSET_NO_TIMEOUT;
+ map->set = set;
set->data = map;
set->family = NFPROTO_IPV4;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 9a065f672d3a..4c279fbd2d5d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -52,6 +52,7 @@ struct bitmap_ipmac {
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
struct timer_list gc; /* garbage collector */
+ struct ip_set *set; /* attached to this ip_set */
unsigned char extensions[0] /* MAC + data extensions */
__aligned(__alignof__(u64));
};
@@ -307,6 +308,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
map->elements = elements;
set->timeout = IPSET_NO_TIMEOUT;
+ map->set = set;
set->data = map;
set->family = NFPROTO_IPV4;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 7f0c733358a4..7f9bbd7c98b5 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -40,6 +40,7 @@ struct bitmap_port {
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
struct timer_list gc; /* garbage collection */
+ struct ip_set *set; /* attached to this ip_set */
unsigned char extensions[0] /* data extensions */
__aligned(__alignof__(u64));
};
@@ -214,6 +215,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
map->last_port = last_port;
set->timeout = IPSET_NO_TIMEOUT;
+ map->set = set;
set->data = map;
set->family = NFPROTO_UNSPEC;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e495b5e484b1..cf84f7b37cd9 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1191,14 +1191,17 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
from->family == to->family))
return -IPSET_ERR_TYPE_MISMATCH;
- if (from->ref_netlink || to->ref_netlink)
+ write_lock_bh(&ip_set_ref_lock);
+
+ if (from->ref_netlink || to->ref_netlink) {
+ write_unlock_bh(&ip_set_ref_lock);
return -EBUSY;
+ }
strncpy(from_name, from->name, IPSET_MAXNAMELEN);
strncpy(from->name, to->name, IPSET_MAXNAMELEN);
strncpy(to->name, from_name, IPSET_MAXNAMELEN);
- write_lock_bh(&ip_set_ref_lock);
swap(from->ref, to->ref);
ip_set(inst, from_id) = to;
ip_set(inst, to_id) = from;
@@ -2072,25 +2075,28 @@ static struct pernet_operations ip_set_net_ops = {
static int __init
ip_set_init(void)
{
- int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
+ int ret = register_pernet_subsys(&ip_set_net_ops);
+
+ if (ret) {
+ pr_err("ip_set: cannot register pernet_subsys.\n");
+ return ret;
+ }
+ ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
if (ret != 0) {
pr_err("ip_set: cannot register with nfnetlink.\n");
+ unregister_pernet_subsys(&ip_set_net_ops);
return ret;
}
+
ret = nf_register_sockopt(&so_set);
if (ret != 0) {
pr_err("SO_SET registry failed: %d\n", ret);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+ unregister_pernet_subsys(&ip_set_net_ops);
return ret;
}
- ret = register_pernet_subsys(&ip_set_net_ops);
- if (ret) {
- pr_err("ip_set: cannot register pernet_subsys.\n");
- nf_unregister_sockopt(&so_set);
- nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
- return ret;
- }
+
pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL);
return 0;
}
@@ -2098,9 +2104,10 @@ ip_set_init(void)
static void __exit
ip_set_fini(void)
{
- unregister_pernet_subsys(&ip_set_net_ops);
nf_unregister_sockopt(&so_set);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+
+ unregister_pernet_subsys(&ip_set_net_ops);
pr_debug("these are the famous last words\n");
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index f236c0bc7b3f..efffc8eabafe 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -280,6 +280,7 @@ htable_bits(u32 hashsize)
struct htype {
struct htable __rcu *table; /* the hash table */
struct timer_list gc; /* garbage collection when timeout enabled */
+ struct ip_set *set; /* attached to this ip_set */
u32 maxelem; /* max elements in the hash */
u32 initval; /* random jhash init value */
#ifdef IP_SET_HASH_WITH_MARKMASK
@@ -429,11 +430,11 @@ mtype_destroy(struct ip_set *set)
}
static void
-mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
{
struct htype *h = set->data;
- setup_timer(&h->gc, gc, (unsigned long)set);
+ timer_setup(&h->gc, gc, 0);
mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
pr_debug("gc initialized, run in every %u\n",
IPSET_GC_PERIOD(set->timeout));
@@ -526,10 +527,10 @@ mtype_expire(struct ip_set *set, struct htype *h)
}
static void
-mtype_gc(unsigned long ul_set)
+mtype_gc(struct timer_list *t)
{
- struct ip_set *set = (struct ip_set *)ul_set;
- struct htype *h = set->data;
+ struct htype *h = from_timer(h, t, gc);
+ struct ip_set *set = h->set;
pr_debug("called\n");
spin_lock_bh(&set->lock);
@@ -1041,12 +1042,24 @@ out:
static int
mtype_head(struct ip_set *set, struct sk_buff *skb)
{
- const struct htype *h = set->data;
+ struct htype *h = set->data;
const struct htable *t;
struct nlattr *nested;
size_t memsize;
u8 htable_bits;
+ /* If any members have expired, set->elements will be wrong
+ * mytype_expire function will update it with the right count.
+ * we do not hold set->lock here, so grab it first.
+ * set->elements can still be incorrect in the case of a huge set,
+ * because elements might time out during the listing.
+ */
+ if (SET_WITH_TIMEOUT(set)) {
+ spin_lock_bh(&set->lock);
+ mtype_expire(set, h);
+ spin_unlock_bh(&set->lock);
+ }
+
rcu_read_lock_bh();
t = rcu_dereference_bh_nfnl(h->table);
memsize = mtype_ahash_memsize(h, t) + set->ext_size;
@@ -1302,6 +1315,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
t->htable_bits = hbits;
RCU_INIT_POINTER(h->table, t);
+ h->set = set;
set->data = h;
#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4) {
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 20bfbd315f61..613eb212cb48 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -123,13 +123,12 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
ip &= ip_set_hostmask(h->netmask);
+ e.ip = htonl(ip);
+ if (e.ip == 0)
+ return -IPSET_ERR_HASH_ELEM;
- if (adt == IPSET_TEST) {
- e.ip = htonl(ip);
- if (e.ip == 0)
- return -IPSET_ERR_HASH_ELEM;
+ if (adt == IPSET_TEST)
return adtfn(set, &e, &ext, &ext, flags);
- }
ip_to = ip;
if (tb[IPSET_ATTR_IP_TO]) {
@@ -148,17 +147,20 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
- if (retried)
+ if (retried) {
ip = ntohl(h->next.ip);
- for (; !before(ip_to, ip); ip += hosts) {
e.ip = htonl(ip);
- if (e.ip == 0)
- return -IPSET_ERR_HASH_ELEM;
+ }
+ for (; ip <= ip_to;) {
ret = adtfn(set, &e, &ext, &ext, flags);
-
if (ret && !ip_set_eexist(ret, flags))
return ret;
+ ip += hosts;
+ e.ip = htonl(ip);
+ if (e.ip == 0)
+ return 0;
+
ret = 0;
}
return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index b64cf14e8352..f3ba8348cf9d 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -149,7 +149,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; !before(ip_to, ip); ip++) {
+ for (; ip <= ip_to; ip++) {
e.ip = htonl(ip);
ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index f438740e6c6a..ddb8039ec1d2 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -178,7 +178,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; !before(ip_to, ip); ip++) {
+ for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 6215fb898c50..a7f4d7a85420 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -185,7 +185,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; !before(ip_to, ip); ip++) {
+ for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 5ab1b99a53c2..0f164e986bf1 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -271,7 +271,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; !before(ip_to, ip); ip++) {
+ for (; ip <= ip_to; ip++) {
e.ip = htonl(ip);
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
@@ -281,7 +281,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
ip == ntohl(h->next.ip) &&
p == ntohs(h->next.port)
? ntohl(h->next.ip2) : ip2_from;
- while (!after(ip2, ip2_to)) {
+ while (ip2 <= ip2_to) {
e.ip2 = htonl(ip2);
ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
&cidr);
@@ -434,7 +434,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
if (unlikely(tb[IPSET_ATTR_CIDR])) {
- u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
if (cidr != HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 5d9e895452e7..1c67a1761e45 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -193,7 +193,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
}
if (retried)
ip = ntohl(h->next.ip);
- while (!after(ip, ip_to)) {
+ while (ip <= ip_to) {
e.ip = htonl(ip);
last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 44cf11939c91..d417074f1c1a 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -255,7 +255,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- while (!after(ip, ip_to)) {
+ while (ip <= ip_to) {
e.ip = htonl(ip);
last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index db614e13b193..7f9ae2e9645b 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -250,13 +250,13 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip[0]);
- while (!after(ip, ip_to)) {
+ while (ip <= ip_to) {
e.ip[0] = htonl(ip);
last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
ip2 = (retried &&
ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1])
: ip2_from;
- while (!after(ip2, ip2_to)) {
+ while (ip2 <= ip2_to) {
e.ip[1] = htonl(ip2);
last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 54b64b6cd0cd..e6ef382febe4 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -241,7 +241,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- while (!after(ip, ip_to)) {
+ while (ip <= ip_to) {
e.ip = htonl(ip);
last = ip_set_range_to_cidr(ip, ip_to, &cidr);
e.cidr = cidr - 1;
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index aff846960ac4..8602f2595a1a 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -291,7 +291,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip[0]);
- while (!after(ip, ip_to)) {
+ while (ip <= ip_to) {
e.ip[0] = htonl(ip);
ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port)
@@ -301,7 +301,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
ip2 = (retried && ip == ntohl(h->next.ip[0]) &&
p == ntohs(h->next.port)) ? ntohl(h->next.ip[1])
: ip2_from;
- while (!after(ip2, ip2_to)) {
+ while (ip2 <= ip2_to) {
e.ip[1] = htonl(ip2);
ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
&e.cidr[1]);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 178d4eba013b..e864681b8dc5 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -44,6 +44,7 @@ struct set_adt_elem {
struct list_set {
u32 size; /* size of set list array */
struct timer_list gc; /* garbage collection */
+ struct ip_set *set; /* attached to this ip_set */
struct net *net; /* namespace */
struct list_head members; /* the set members */
};
@@ -453,7 +454,6 @@ static size_t
list_set_memsize(const struct list_set *map, size_t dsize)
{
struct set_elem *e;
- size_t memsize;
u32 n = 0;
rcu_read_lock();
@@ -461,9 +461,7 @@ list_set_memsize(const struct list_set *map, size_t dsize)
n++;
rcu_read_unlock();
- memsize = sizeof(*map) + n * dsize;
-
- return memsize;
+ return (sizeof(*map) + n * dsize);
}
static int
@@ -571,10 +569,10 @@ static const struct ip_set_type_variant set_variant = {
};
static void
-list_set_gc(unsigned long ul_set)
+list_set_gc(struct timer_list *t)
{
- struct ip_set *set = (struct ip_set *)ul_set;
- struct list_set *map = set->data;
+ struct list_set *map = from_timer(map, t, gc);
+ struct ip_set *set = map->set;
spin_lock_bh(&set->lock);
set_cleanup_entries(set);
@@ -585,11 +583,11 @@ list_set_gc(unsigned long ul_set)
}
static void
-list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+list_set_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
{
struct list_set *map = set->data;
- setup_timer(&map->gc, gc, (unsigned long)set);
+ timer_setup(&map->gc, gc, 0);
mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
}
@@ -606,6 +604,7 @@ init_list_set(struct net *net, struct ip_set *set, u32 size)
map->size = size;
map->net = net;
+ map->set = set;
INIT_LIST_HEAD(&map->members);
set->data = map;
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
index 1c8a42c1056c..d5be9c25fad6 100644
--- a/net/netfilter/ipset/pfxlen.c
+++ b/net/netfilter/ipset/pfxlen.c
@@ -3,6 +3,141 @@
/* Prefixlen maps for fast conversions, by Jan Engelhardt. */
+#ifdef E
+#undef E
+#endif
+
+#define PREFIXES_MAP \
+ E(0x00000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0x80000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), \
+ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+
#define E(a, b, c, d) \
{.ip6 = { \
htonl(a), htonl(b), \
@@ -13,135 +148,7 @@
* just use prefixlen_netmask_map[prefixlength].ip.
*/
const union nf_inet_addr ip_set_netmask_map[] = {
- E(0x00000000, 0x00000000, 0x00000000, 0x00000000),
- E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+ PREFIXES_MAP
};
EXPORT_SYMBOL_GPL(ip_set_netmask_map);
@@ -155,135 +162,7 @@ EXPORT_SYMBOL_GPL(ip_set_netmask_map);
* just use prefixlen_hostmask_map[prefixlength].ip.
*/
const union nf_inet_addr ip_set_hostmask_map[] = {
- E(0x00000000, 0x00000000, 0x00000000, 0x00000000),
- E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
- E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+ PREFIXES_MAP
};
EXPORT_SYMBOL_GPL(ip_set_hostmask_map);
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index 67f3f4389602..c552993fa4b9 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the IPVS modules on top of IPv4.
#
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 3d2ac71a83ec..3e053cb30070 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -104,7 +104,7 @@ static inline void ct_write_unlock_bh(unsigned int key)
spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
-static void ip_vs_conn_expire(unsigned long data);
+static void ip_vs_conn_expire(struct timer_list *t);
/*
* Returns hash value for IPVS connection entry
@@ -185,7 +185,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
ret = 1;
} else {
- pr_err("%s(): request for already hashed, called from %pF\n",
+ pr_err("%s(): request for already hashed, called from %pS\n",
__func__, __builtin_return_address(0));
ret = 0;
}
@@ -457,7 +457,7 @@ EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
{
__ip_vs_conn_put(cp);
- ip_vs_conn_expire((unsigned long)cp);
+ ip_vs_conn_expire(&cp->timer);
}
/*
@@ -817,9 +817,9 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head)
kmem_cache_free(ip_vs_conn_cachep, cp);
}
-static void ip_vs_conn_expire(unsigned long data)
+static void ip_vs_conn_expire(struct timer_list *t)
{
- struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+ struct ip_vs_conn *cp = from_timer(cp, t, timer);
struct netns_ipvs *ipvs = cp->ipvs;
/*
@@ -909,7 +909,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
}
INIT_HLIST_NODE(&cp->c_list);
- setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+ timer_setup(&cp->timer, ip_vs_conn_expire, 0);
cp->ipvs = ipvs;
cp->af = p->af;
cp->daf = dest_af;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e31956b58aba..5cb7cac9177d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -125,14 +125,12 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.inpkts++;
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
u64_stats_update_begin(&s->syncp);
@@ -159,14 +157,12 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.outpkts++;
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
u64_stats_update_begin(&s->syncp);
@@ -1222,7 +1218,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
if (!pptr)
return NULL;
- rcu_read_lock();
dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
&iph->saddr, pptr[0]);
if (dest) {
@@ -1237,7 +1232,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
pptr[0], pptr[1]);
}
}
- rcu_read_unlock();
return cp;
}
@@ -1689,11 +1683,9 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
if (dest) {
struct ip_vs_dest_dst *dest_dst;
- rcu_read_lock();
dest_dst = rcu_dereference(dest->dest_dst);
if (dest_dst)
mtu = dst_mtu(dest_dst->dst_cache);
- rcu_read_unlock();
}
if (mtu > 68 + sizeof(struct iphdr))
mtu -= sizeof(struct iphdr);
@@ -2109,7 +2101,7 @@ ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
#endif
-static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
+static const struct nf_hook_ops ip_vs_ops[] = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 1fa3c2307b6e..fff213eacf2a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -300,7 +300,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
unsigned int hash;
if (svc->flags & IP_VS_SVC_F_HASHED) {
- pr_err("%s(): request for already hashed, called from %pF\n",
+ pr_err("%s(): request for already hashed, called from %pS\n",
__func__, __builtin_return_address(0));
return 0;
}
@@ -334,7 +334,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
static int ip_vs_svc_unhash(struct ip_vs_service *svc)
{
if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
- pr_err("%s(): request for unhash flagged, called from %pF\n",
+ pr_err("%s(): request for unhash flagged, called from %pS\n",
__func__, __builtin_return_address(0));
return 0;
}
@@ -550,18 +550,15 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
/* Check for "full" addressed entries */
hash = ip_vs_rs_hashkey(af, daddr, dport);
- rcu_read_lock();
hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
if (dest->port == dport &&
dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) &&
(dest->protocol == protocol || dest->vfwmark)) {
/* HIT */
- rcu_read_unlock();
return true;
}
}
- rcu_read_unlock();
return false;
}
@@ -1149,9 +1146,9 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return 0;
}
-static void ip_vs_dest_trash_expire(unsigned long data)
+static void ip_vs_dest_trash_expire(struct timer_list *t)
{
- struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
+ struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer);
struct ip_vs_dest *dest, *next;
unsigned long now = jiffies;
@@ -2037,12 +2034,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
seq_puts(seq,
" -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
} else {
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
const struct ip_vs_service *svc = v;
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
char *sched_name = sched ? sched->name : "none";
+ if (svc->ipvs != ipvs)
+ return 0;
if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
@@ -4022,8 +4023,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
INIT_LIST_HEAD(&ipvs->dest_trash);
spin_lock_init(&ipvs->dest_trash_lock);
- setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
- (unsigned long) ipvs);
+ timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0);
atomic_set(&ipvs->ftpsvc_counter, 0);
atomic_set(&ipvs->nullsvc_counter, 0);
atomic_set(&ipvs->conn_out_counter, 0);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 457c6c193e13..489055091a9b 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -97,12 +97,12 @@ static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
}
-static void estimation_timer(unsigned long arg)
+static void estimation_timer(struct timer_list *t)
{
struct ip_vs_estimator *e;
struct ip_vs_stats *s;
u64 rate;
- struct netns_ipvs *ipvs = (struct netns_ipvs *)arg;
+ struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer);
spin_lock(&ipvs->est_lock);
list_for_each_entry(e, &ipvs->est_list, list) {
@@ -192,7 +192,7 @@ int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs)
{
INIT_LIST_HEAD(&ipvs->est_list);
spin_lock_init(&ipvs->est_lock);
- setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)ipvs);
+ timer_setup(&ipvs->est_timer, estimation_timer, 0);
mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index fb780be76d15..3e17d32b629d 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -269,13 +269,11 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* hopefully it will succeed on the retransmitted
* packet.
*/
- rcu_read_lock();
mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
iph->ihl * 4,
start - data,
end - start,
buf, buf_len);
- rcu_read_unlock();
if (mangled) {
ip_vs_nfct_expect_related(skb, ct, n_cp,
IPPROTO_TCP, 0, 0);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index b6aa4a970c6e..d625179de485 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -106,6 +106,7 @@ struct ip_vs_lblc_table {
struct rcu_head rcu_head;
struct hlist_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
struct timer_list periodic_timer; /* collect stale entries */
+ struct ip_vs_service *svc; /* pointer back to service */
atomic_t entries; /* number of entries */
int max_size; /* maximum size of entries */
int rover; /* rover for expire check */
@@ -294,10 +295,10 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
* of the table.
* The full expiration check is for this purpose now.
*/
-static void ip_vs_lblc_check_expire(unsigned long data)
+static void ip_vs_lblc_check_expire(struct timer_list *t)
{
- struct ip_vs_service *svc = (struct ip_vs_service *) data;
- struct ip_vs_lblc_table *tbl = svc->sched_data;
+ struct ip_vs_lblc_table *tbl = from_timer(tbl, t, periodic_timer);
+ struct ip_vs_service *svc = tbl->svc;
unsigned long now = jiffies;
int goal;
int i, j;
@@ -369,12 +370,12 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
tbl->rover = 0;
tbl->counter = 1;
tbl->dead = 0;
+ tbl->svc = svc;
/*
* Hook periodic timer for garbage collection
*/
- setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
- (unsigned long)svc);
+ timer_setup(&tbl->periodic_timer, ip_vs_lblc_check_expire, 0);
mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
return 0;
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index c13ff575f9f7..84c57b62a588 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -278,6 +278,7 @@ struct ip_vs_lblcr_table {
atomic_t entries; /* number of entries */
int max_size; /* maximum size of entries */
struct timer_list periodic_timer; /* collect stale entries */
+ struct ip_vs_service *svc; /* pointer back to service */
int rover; /* rover for expire check */
int counter; /* counter for no expire */
bool dead;
@@ -458,10 +459,10 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
* of the table.
* The full expiration check is for this purpose now.
*/
-static void ip_vs_lblcr_check_expire(unsigned long data)
+static void ip_vs_lblcr_check_expire(struct timer_list *t)
{
- struct ip_vs_service *svc = (struct ip_vs_service *) data;
- struct ip_vs_lblcr_table *tbl = svc->sched_data;
+ struct ip_vs_lblcr_table *tbl = from_timer(tbl, t, periodic_timer);
+ struct ip_vs_service *svc = tbl->svc;
unsigned long now = jiffies;
int goal;
int i, j;
@@ -532,12 +533,12 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
tbl->rover = 0;
tbl->counter = 1;
tbl->dead = 0;
+ tbl->svc = svc;
/*
* Hook periodic timer for garbage collection
*/
- setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
- (unsigned long)svc);
+ timer_setup(&tbl->periodic_timer, ip_vs_lblcr_check_expire, 0);
mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
return 0;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 3ffad4adaddf..eff7569824e5 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/ip.h>
#include <linux/sctp.h>
@@ -24,9 +25,13 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
if (sh) {
sch = skb_header_pointer(skb, iph->len + sizeof(_sctph),
sizeof(_schunkh), &_schunkh);
- if (sch && (sch->type == SCTP_CID_INIT ||
- sysctl_sloppy_sctp(ipvs)))
+ if (sch) {
+ if (sch->type == SCTP_CID_ABORT ||
+ !(sysctl_sloppy_sctp(ipvs) ||
+ sch->type == SCTP_CID_INIT))
+ return 1;
ports = &sh->source;
+ }
}
} else {
ports = skb_header_pointer(
@@ -38,7 +43,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
return 0;
}
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
&iph->daddr, ports[1]);
@@ -53,7 +57,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -67,11 +70,9 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -526,12 +527,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = sctp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -544,11 +543,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
-out:
+
return result;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 12dc8d5bc37d..121a321b91be 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -63,7 +63,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
}
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
@@ -80,7 +79,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -95,11 +93,9 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -661,12 +657,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = tcp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -680,12 +674,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
- out:
return result;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index e494e9a88c7f..30e11cd6aa8a 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -53,7 +53,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
return 0;
}
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
&iph->daddr, ports[1]);
@@ -69,7 +68,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -84,11 +82,9 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -410,12 +406,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = udp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -429,12 +423,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
- out:
return result;
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 0e5b64a75da0..9ee71cb276d7 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* IPVS An implementation of the IP virtual server support for the
* LINUX operating system. IPVS is now implemented as a module
@@ -457,7 +458,7 @@ static inline bool in_persistence(struct ip_vs_conn *cp)
static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
struct ip_vs_conn *cp, int pkts)
{
- unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
+ unsigned long orig = READ_ONCE(cp->sync_endtime);
unsigned long now = jiffies;
unsigned long n = (now + cp->timeout) & ~3UL;
unsigned int sync_refresh_period;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 2eab1e0400f4..4527921b1c3a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -678,7 +678,6 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr,
IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
goto tx_error;
@@ -689,14 +688,12 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -710,7 +707,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL,
&iph->daddr, NULL,
ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
@@ -720,14 +716,12 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -746,7 +740,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
__be16 _pt, *p;
@@ -815,14 +808,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
- rcu_read_unlock();
LeaveFunction(10);
return rc;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -837,7 +828,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
__be16 _pt, *p;
@@ -906,7 +896,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
- rcu_read_unlock();
LeaveFunction(10);
return rc;
@@ -914,7 +903,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
tx_error:
LeaveFunction(10);
kfree_skb(skb);
- rcu_read_unlock();
return NF_STOLEN;
}
#endif
@@ -933,6 +921,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
{
struct sk_buff *new_skb = NULL;
struct iphdr *old_iph = NULL;
+ __u8 old_dsfield;
#ifdef CONFIG_IP_VS_IPV6
struct ipv6hdr *old_ipv6h = NULL;
#endif
@@ -957,7 +946,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
*payload_len =
ntohs(old_ipv6h->payload_len) +
sizeof(*old_ipv6h);
- *dsfield = ipv6_get_dsfield(old_ipv6h);
+ old_dsfield = ipv6_get_dsfield(old_ipv6h);
*ttl = old_ipv6h->hop_limit;
if (df)
*df = 0;
@@ -972,12 +961,15 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
/* fix old IP header checksum */
ip_send_check(old_iph);
- *dsfield = ipv4_get_dsfield(old_iph);
+ old_dsfield = ipv4_get_dsfield(old_iph);
*ttl = old_iph->ttl;
if (payload_len)
*payload_len = ntohs(old_iph->tot_len);
}
+ /* Implement full-functionality option for ECN encapsulation */
+ *dsfield = INET_ECN_encapsulate(old_dsfield, old_dsfield);
+
return skb;
error:
kfree_skb(skb);
@@ -1035,7 +1027,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -1043,10 +1034,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
- }
rt = skb_rtable(skb);
tdev = rt->dst.dev;
@@ -1095,7 +1084,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_local_out(net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
@@ -1104,7 +1092,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
tx_error:
if (!IS_ERR(skb))
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1127,7 +1114,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6,
&saddr, ipvsh, 1,
@@ -1136,10 +1122,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_TUNNEL);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
- }
rt = (struct rt6_info *) skb_dst(skb);
tdev = rt->dst.dev;
@@ -1185,7 +1169,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
ip6_local_out(cp->ipvs->net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
@@ -1194,7 +1177,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
tx_error:
if (!IS_ERR(skb))
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1213,17 +1195,14 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
- }
ip_send_check(ip_hdr(skb));
@@ -1231,14 +1210,12 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1252,7 +1229,6 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6,
NULL, ipvsh, 0,
@@ -1261,23 +1237,19 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_KNOWN_NH);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
- }
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1322,7 +1294,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
rt_mode = (hooknum != NF_INET_FORWARD) ?
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
- rcu_read_lock();
local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
NULL, iph);
if (local < 0)
@@ -1368,12 +1339,10 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
- rcu_read_unlock();
goto out;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
rc = NF_STOLEN;
out:
LeaveFunction(10);
@@ -1414,7 +1383,6 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
rt_mode = (hooknum != NF_INET_FORWARD) ?
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
if (local < 0)
@@ -1460,12 +1428,10 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
- rcu_read_unlock();
goto out;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
rc = NF_STOLEN;
out:
LeaveFunction(10);
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 4e99cca61612..ecc3ab784633 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -40,7 +40,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
goto out;
- rcu_read_lock();
in_dev = __in_dev_get_rcu(rt->dst.dev);
if (in_dev != NULL) {
for_primary_ifa(in_dev) {
@@ -50,7 +49,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
}
} endfor_ifa(in_dev);
}
- rcu_read_unlock();
if (mask == 0)
goto out;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9979f46c81dc..85f643c1e227 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -56,6 +56,8 @@
#include <net/netfilter/nf_nat_helper.h>
#include <net/netns/hash.h>
+#include "nf_internals.h"
+
#define NF_CONNTRACK_VERSION "0.5.0"
int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
@@ -96,19 +98,26 @@ static struct conntrack_gc_work conntrack_gc_work;
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
{
+ /* 1) Acquire the lock */
spin_lock(lock);
- while (unlikely(nf_conntrack_locks_all)) {
- spin_unlock(lock);
- /*
- * Order the 'nf_conntrack_locks_all' load vs. the
- * spin_unlock_wait() loads below, to ensure
- * that 'nf_conntrack_locks_all_lock' is indeed held:
- */
- smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
- spin_unlock_wait(&nf_conntrack_locks_all_lock);
- spin_lock(lock);
- }
+ /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics
+ * It pairs with the smp_store_release() in nf_conntrack_all_unlock()
+ */
+ if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false))
+ return;
+
+ /* fast path failed, unlock */
+ spin_unlock(lock);
+
+ /* Slow path 1) get global lock */
+ spin_lock(&nf_conntrack_locks_all_lock);
+
+ /* Slow path 2) get the lock we want */
+ spin_lock(lock);
+
+ /* Slow path 3) release the global lock */
+ spin_unlock(&nf_conntrack_locks_all_lock);
}
EXPORT_SYMBOL_GPL(nf_conntrack_lock);
@@ -149,28 +158,27 @@ static void nf_conntrack_all_lock(void)
int i;
spin_lock(&nf_conntrack_locks_all_lock);
- nf_conntrack_locks_all = true;
- /*
- * Order the above store of 'nf_conntrack_locks_all' against
- * the spin_unlock_wait() loads below, such that if
- * nf_conntrack_lock() observes 'nf_conntrack_locks_all'
- * we must observe nf_conntrack_locks[] held:
- */
- smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
+ nf_conntrack_locks_all = true;
for (i = 0; i < CONNTRACK_LOCKS; i++) {
- spin_unlock_wait(&nf_conntrack_locks[i]);
+ spin_lock(&nf_conntrack_locks[i]);
+
+ /* This spin_unlock provides the "release" to ensure that
+ * nf_conntrack_locks_all==true is visible to everyone that
+ * acquired spin_lock(&nf_conntrack_locks[]).
+ */
+ spin_unlock(&nf_conntrack_locks[i]);
}
}
static void nf_conntrack_all_unlock(void)
{
- /*
- * All prior stores must be complete before we clear
+ /* All prior stores must be complete before we clear
* 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
* might observe the false value but not the entire
- * critical section:
+ * critical section.
+ * It pairs with the smp_load_acquire() in nf_conntrack_lock()
*/
smp_store_release(&nf_conntrack_locks_all, false);
spin_unlock(&nf_conntrack_locks_all_lock);
@@ -248,8 +256,8 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
u_int16_t l3num,
struct net *net, struct nf_conntrack_tuple *tuple)
{
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
unsigned int protoff;
u_int8_t protonum;
int ret;
@@ -398,22 +406,19 @@ static void
destroy_conntrack(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
pr_debug("destroy_conntrack(%p)\n", ct);
- NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
+ WARN_ON(atomic_read(&nfct->use) != 0);
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
return;
}
- rcu_read_lock();
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->destroy)
l4proto->destroy(ct);
- rcu_read_unlock();
-
local_bh_disable();
/* Expectations will have been removed in clean_from_lists,
* except TFTP can create an expectation on the first packet,
@@ -695,7 +700,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
{
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
@@ -757,12 +762,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
* connections for unconfirmed conns. But packet copies and
* REJECT will give spurious warnings here.
*/
- /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
/* No external references means no one else could have
* confirmed us.
*/
- NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+ WARN_ON(nf_ct_is_confirmed(ct));
pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
* a race against nf_ct_get_next_corpse() possibly called from
@@ -1079,12 +1083,12 @@ static void gc_worker(struct work_struct *work)
next_run = gc_work->next_gc_run;
gc_work->last_bucket = i;
gc_work->early_drop = false;
- queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
+ queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
- INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
gc_work->next_gc_run = HZ;
gc_work->exiting = false;
}
@@ -1161,7 +1165,7 @@ void nf_conntrack_free(struct nf_conn *ct)
/* A freed object has refcnt == 0, that's
* the golden rule for SLAB_TYPESAFE_BY_RCU
*/
- NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
+ WARN_ON(atomic_read(&ct->ct_general.use) != 0);
nf_ct_ext_destroy(ct);
nf_ct_ext_free(ct);
@@ -1177,8 +1181,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto,
+ const struct nf_conntrack_l3proto *l3proto,
+ const struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
unsigned int dataoff, u32 hash)
{
@@ -1289,8 +1293,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
- struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l3proto *l3proto,
+ const struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
@@ -1345,10 +1349,10 @@ unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nf_conn *ct, *tmpl;
enum ip_conntrack_info ctinfo;
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
unsigned int *timeouts;
unsigned int dataoff;
u_int8_t protonum;
@@ -1415,7 +1419,7 @@ repeat:
/* Decide what timeout policy we want to apply to this flow. */
timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
- ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
+ ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
@@ -1469,7 +1473,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
struct nf_conn_help *help = nfct_help(ct);
/* Should be unconfirmed, so not in hash table yet */
- NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+ WARN_ON(nf_ct_is_confirmed(ct));
pr_debug("Altering reply tuple of %p to ", ct);
nf_ct_dump_tuple(newreply);
@@ -1491,7 +1495,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
unsigned long extra_jiffies,
int do_acct)
{
- NF_CT_ASSERT(skb);
+ WARN_ON(!skb);
/* Only update if this is not a fixed timeout */
if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
@@ -1559,9 +1563,14 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
}
EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
-int nf_ct_port_nlattr_tuple_size(void)
+unsigned int nf_ct_port_nlattr_tuple_size(void)
{
- return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+ static unsigned int size __read_mostly;
+
+ if (!size)
+ size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+
+ return size;
}
EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
#endif
@@ -1689,6 +1698,18 @@ __nf_ct_unconfirmed_destroy(struct net *net)
}
}
+void nf_ct_unconfirmed_destroy(struct net *net)
+{
+ might_sleep();
+
+ if (atomic_read(&net->ct.count) > 0) {
+ __nf_ct_unconfirmed_destroy(net);
+ nf_queue_nf_hook_drop(net);
+ synchronize_net();
+ }
+}
+EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy);
+
void nf_ct_iterate_cleanup_net(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report)
@@ -1700,14 +1721,10 @@ void nf_ct_iterate_cleanup_net(struct net *net,
if (atomic_read(&net->ct.count) == 0)
return;
- __nf_ct_unconfirmed_destroy(net);
-
d.iter = iter;
d.data = data;
d.net = net;
- synchronize_net();
-
nf_ct_iterate_cleanup(iter_net_only, &d, portid, report);
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
@@ -1733,6 +1750,7 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
if (atomic_read(&net->ct.count) == 0)
continue;
__nf_ct_unconfirmed_destroy(net);
+ nf_queue_nf_hook_drop(net);
}
rtnl_unlock();
@@ -1927,7 +1945,7 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
return 0;
}
-int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
{
unsigned int hashsize;
int rc;
@@ -2071,7 +2089,7 @@ int nf_conntrack_init_start(void)
goto err_proto;
conntrack_gc_work_init(&conntrack_gc_work);
- queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);
+ queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
return 0;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 899c2c36da13..64778f9a8548 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -51,8 +51,8 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
struct nf_conn_help *master_help = nfct_help(exp->master);
struct net *net = nf_ct_exp_net(exp);
- NF_CT_ASSERT(master_help);
- NF_CT_ASSERT(!timer_pending(&exp->timeout));
+ WARN_ON(!master_help);
+ WARN_ON(timer_pending(&exp->timeout));
hlist_del_rcu(&exp->hnode);
net->ct.expect_count--;
@@ -368,12 +368,6 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
/* two references : one for hash insert, one for the timer */
refcount_add(2, &exp->use);
- hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
- master_help->expecting[exp->class]++;
-
- hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
- net->ct.expect_count++;
-
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
helper = rcu_dereference_protected(master_help->helper,
@@ -384,6 +378,12 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
}
add_timer(&exp->timeout);
+ hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
+ master_help->expecting[exp->class]++;
+
+ hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
+ net->ct.expect_count++;
+
NF_CT_STAT_INC(net, expect_create);
}
@@ -474,6 +474,60 @@ out:
}
EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
+void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data),
+ void *data)
+{
+ struct nf_conntrack_expect *exp;
+ const struct hlist_node *next;
+ unsigned int i;
+
+ spin_lock_bh(&nf_conntrack_expect_lock);
+
+ for (i = 0; i < nf_ct_expect_hsize; i++) {
+ hlist_for_each_entry_safe(exp, next,
+ &nf_ct_expect_hash[i],
+ hnode) {
+ if (iter(exp, data) && del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
+ }
+ }
+ }
+
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy);
+
+void nf_ct_expect_iterate_net(struct net *net,
+ bool (*iter)(struct nf_conntrack_expect *e, void *data),
+ void *data,
+ u32 portid, int report)
+{
+ struct nf_conntrack_expect *exp;
+ const struct hlist_node *next;
+ unsigned int i;
+
+ spin_lock_bh(&nf_conntrack_expect_lock);
+
+ for (i = 0; i < nf_ct_expect_hsize; i++) {
+ hlist_for_each_entry_safe(exp, next,
+ &nf_ct_expect_hash[i],
+ hnode) {
+
+ if (!net_eq(nf_ct_exp_net(exp), net))
+ continue;
+
+ if (iter(exp, data) && del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect_report(exp, portid, report);
+ nf_ct_expect_put(exp);
+ }
+ }
+ }
+
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net);
+
#ifdef CONFIG_NF_CONNTRACK_PROCFS
struct ct_expect_iter_state {
struct seq_net_private p;
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 6c605e88ebae..9fe0ddc333fb 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -47,7 +47,7 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
struct nf_ct_ext_type *t;
/* Conntrack must not be confirmed to avoid races on reallocation. */
- NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+ WARN_ON(nf_ct_is_confirmed(ct));
old = ct->ext;
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 89b2e46925c4..cf1bf2605c10 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -91,41 +91,41 @@ typedef struct field_t {
} field_t;
/* Bit Stream */
-typedef struct {
+struct bitstr {
unsigned char *buf;
unsigned char *beg;
unsigned char *end;
unsigned char *cur;
unsigned int bit;
-} bitstr_t;
+};
/* Tool Functions */
#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
-static unsigned int get_len(bitstr_t *bs);
-static unsigned int get_bit(bitstr_t *bs);
-static unsigned int get_bits(bitstr_t *bs, unsigned int b);
-static unsigned int get_bitmap(bitstr_t *bs, unsigned int b);
-static unsigned int get_uint(bitstr_t *bs, int b);
+static unsigned int get_len(struct bitstr *bs);
+static unsigned int get_bit(struct bitstr *bs);
+static unsigned int get_bits(struct bitstr *bs, unsigned int b);
+static unsigned int get_bitmap(struct bitstr *bs, unsigned int b);
+static unsigned int get_uint(struct bitstr *bs, int b);
/* Decoder Functions */
-static int decode_nul(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_bool(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_oid(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_int(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_enum(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_bitstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_numstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_octstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_seq(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_seqof(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_choice(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_oid(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_int(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bitstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_numstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_octstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_seq(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_seqof(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_choice(struct bitstr *bs, const struct field_t *f, char *base, int level);
/* Decoder Functions Vector */
-typedef int (*decoder_t)(bitstr_t *, const struct field_t *, char *, int);
+typedef int (*decoder_t)(struct bitstr *, const struct field_t *, char *, int);
static const decoder_t Decoders[] = {
decode_nul,
decode_bool,
@@ -150,7 +150,7 @@ static const decoder_t Decoders[] = {
* Functions
****************************************************************************/
/* Assume bs is aligned && v < 16384 */
-static unsigned int get_len(bitstr_t *bs)
+static unsigned int get_len(struct bitstr *bs)
{
unsigned int v;
@@ -166,7 +166,7 @@ static unsigned int get_len(bitstr_t *bs)
}
/****************************************************************************/
-static unsigned int get_bit(bitstr_t *bs)
+static unsigned int get_bit(struct bitstr *bs)
{
unsigned int b = (*bs->cur) & (0x80 >> bs->bit);
@@ -177,7 +177,7 @@ static unsigned int get_bit(bitstr_t *bs)
/****************************************************************************/
/* Assume b <= 8 */
-static unsigned int get_bits(bitstr_t *bs, unsigned int b)
+static unsigned int get_bits(struct bitstr *bs, unsigned int b)
{
unsigned int v, l;
@@ -203,7 +203,7 @@ static unsigned int get_bits(bitstr_t *bs, unsigned int b)
/****************************************************************************/
/* Assume b <= 32 */
-static unsigned int get_bitmap(bitstr_t *bs, unsigned int b)
+static unsigned int get_bitmap(struct bitstr *bs, unsigned int b)
{
unsigned int v, l, shift, bytes;
@@ -242,7 +242,7 @@ static unsigned int get_bitmap(bitstr_t *bs, unsigned int b)
/****************************************************************************
* Assume bs is aligned and sizeof(unsigned int) == 4
****************************************************************************/
-static unsigned int get_uint(bitstr_t *bs, int b)
+static unsigned int get_uint(struct bitstr *bs, int b)
{
unsigned int v = 0;
@@ -264,7 +264,7 @@ static unsigned int get_uint(bitstr_t *bs, int b)
}
/****************************************************************************/
-static int decode_nul(bitstr_t *bs, const struct field_t *f,
+static int decode_nul(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -273,7 +273,7 @@ static int decode_nul(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_bool(bitstr_t *bs, const struct field_t *f,
+static int decode_bool(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -285,7 +285,7 @@ static int decode_bool(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_oid(bitstr_t *bs, const struct field_t *f,
+static int decode_oid(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
int len;
@@ -302,7 +302,7 @@ static int decode_oid(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_int(bitstr_t *bs, const struct field_t *f,
+static int decode_int(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -346,7 +346,7 @@ static int decode_int(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_enum(bitstr_t *bs, const struct field_t *f,
+static int decode_enum(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -362,7 +362,7 @@ static int decode_enum(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_bitstr(bitstr_t *bs, const struct field_t *f,
+static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -396,7 +396,7 @@ static int decode_bitstr(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_numstr(bitstr_t *bs, const struct field_t *f,
+static int decode_numstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -414,7 +414,7 @@ static int decode_numstr(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_octstr(bitstr_t *bs, const struct field_t *f,
+static int decode_octstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -463,7 +463,7 @@ static int decode_octstr(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_bmpstr(bitstr_t *bs, const struct field_t *f,
+static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int len;
@@ -489,7 +489,7 @@ static int decode_bmpstr(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_seq(bitstr_t *bs, const struct field_t *f,
+static int decode_seq(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int ext, bmp, i, opt, len = 0, bmp2, bmp2_len;
@@ -606,7 +606,7 @@ static int decode_seq(bitstr_t *bs, const struct field_t *f,
}
/****************************************************************************/
-static int decode_seqof(bitstr_t *bs, const struct field_t *f,
+static int decode_seqof(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int count, effective_count = 0, i, len = 0;
@@ -696,7 +696,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f,
/****************************************************************************/
-static int decode_choice(bitstr_t *bs, const struct field_t *f,
+static int decode_choice(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
unsigned int type, ext, len = 0;
@@ -772,7 +772,7 @@ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT,
0, _RasMessage
};
- bitstr_t bs;
+ struct bitstr bs;
bs.buf = bs.beg = bs.cur = buf;
bs.end = buf + sz;
@@ -789,7 +789,7 @@ static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT,
0, _H323_UserInformation
};
- bitstr_t bs;
+ struct bitstr bs;
bs.buf = buf;
bs.beg = bs.cur = beg;
@@ -808,7 +808,7 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4,
DECODE | EXT, 0, _MultimediaSystemControlMessage
};
- bitstr_t bs;
+ struct bitstr bs;
bs.buf = bs.beg = bs.cur = buf;
bs.end = buf + sz;
@@ -877,6 +877,7 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
if (sz < 1)
break;
len = *p++;
+ sz--;
if (sz < len)
break;
p += len;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 9129bb3b5153..551a1eddf0fa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -437,12 +437,22 @@ out:
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
{
- struct nf_conntrack_expect *exp;
- const struct hlist_node *next;
- unsigned int i;
+ struct nf_conn_help *help = nfct_help(exp->master);
+ const struct nf_conntrack_helper *me = data;
+ const struct nf_conntrack_helper *this;
+
+ if (exp->helper == me)
+ return true;
+ this = rcu_dereference_protected(help->helper,
+ lockdep_is_held(&nf_conntrack_expect_lock));
+ return this == me;
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
mutex_lock(&nf_ct_helper_mutex);
hlist_del_rcu(&me->hnode);
nf_ct_helper_count--;
@@ -453,21 +463,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
*/
synchronize_rcu();
- /* Get rid of expectations */
- spin_lock_bh(&nf_conntrack_expect_lock);
- for (i = 0; i < nf_ct_expect_hsize; i++) {
- hlist_for_each_entry_safe(exp, next,
- &nf_ct_expect_hash[i], hnode) {
- struct nf_conn_help *help = nfct_help(exp->master);
- if ((rcu_dereference_protected(
- help->helper,
- lockdep_is_held(&nf_conntrack_expect_lock)
- ) == me || exp->helper == me))
- nf_ct_remove_expect(exp);
- }
- }
- spin_unlock_bh(&nf_conntrack_expect_lock);
-
+ nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
nf_ct_iterate_destroy(unhelp, me);
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index cf9ace70bece..397e6911214f 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -49,11 +49,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-static void generic_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
-}
-
static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
@@ -64,10 +59,8 @@ static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
.l3proto = PF_UNSPEC,
- .name = "unknown",
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
- .print_tuple = generic_print_tuple,
.get_l4proto = generic_get_l4proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7999e70c3bfb..59c08997bfdf 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -61,8 +61,8 @@ MODULE_LICENSE("GPL");
static char __initdata version[] = "0.93";
static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
- const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
struct nlattr *nest_parms;
@@ -86,7 +86,7 @@ nla_put_failure:
static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_l3proto *l3proto)
+ const struct nf_conntrack_l3proto *l3proto)
{
int ret = 0;
struct nlattr *nest_parms;
@@ -109,9 +109,9 @@ nla_put_failure:
static int ctnetlink_dump_tuples(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
int ret;
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
rcu_read_lock();
l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
@@ -163,7 +163,7 @@ nla_put_failure:
static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
{
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nlattr *nest_proto;
int ret;
@@ -533,22 +533,27 @@ nla_put_failure:
return -1;
}
-static inline size_t ctnetlink_proto_size(const struct nf_conn *ct)
+#if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS)
+static size_t ctnetlink_proto_size(const struct nf_conn *ct)
{
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
- size_t len = 0;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
+ size_t len, len4 = 0;
- rcu_read_lock();
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
- len += l3proto->nla_size;
+ len = l3proto->nla_size;
+ len *= 3u; /* ORIG, REPLY, MASTER */
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
len += l4proto->nla_size;
- rcu_read_unlock();
+ if (l4proto->nlattr_tuple_size) {
+ len4 = l4proto->nlattr_tuple_size();
+ len4 *= 3u; /* ORIG, REPLY, MASTER */
+ }
- return len;
+ return len + len4;
}
+#endif
static inline size_t ctnetlink_acct_size(const struct nf_conn *ct)
{
@@ -664,7 +669,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
- rcu_read_lock();
zone = nf_ct_zone(ct);
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
@@ -736,8 +740,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
&& ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
#endif
- rcu_read_unlock();
-
nlmsg_end(skb, nlh);
err = nfnetlink_send(skb, net, item->portid, group, item->report,
GFP_ATOMIC);
@@ -747,7 +749,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
return 0;
nla_put_failure:
- rcu_read_unlock();
nlmsg_cancel(skb, nlh);
nlmsg_failure:
kfree_skb(skb);
@@ -941,8 +942,8 @@ static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = {
static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
struct nf_conntrack_tuple *tuple)
{
+ const struct nf_conntrack_l4proto *l4proto;
struct nlattr *tb[CTA_PROTO_MAX+1];
- struct nf_conntrack_l4proto *l4proto;
int ret = 0;
ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy,
@@ -1585,8 +1586,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct,
const struct nlattr * const cda[])
{
const struct nlattr *attr = cda[CTA_PROTOINFO];
+ const struct nf_conntrack_l4proto *l4proto;
struct nlattr *tb[CTA_PROTOINFO_MAX+1];
- struct nf_conntrack_l4proto *l4proto;
int err = 0;
err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy,
@@ -2213,7 +2214,6 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
const struct nf_conntrack_zone *zone;
struct nlattr *nest_parms;
- rcu_read_lock();
zone = nf_ct_zone(ct);
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
@@ -2272,11 +2272,9 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
#endif
if (ctnetlink_dump_labels(skb, ct) < 0)
goto nla_put_failure;
- rcu_read_unlock();
return 0;
nla_put_failure:
- rcu_read_unlock();
return -ENOSPC;
}
@@ -2483,11 +2481,11 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple_mask *mask)
{
- int ret;
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple m;
struct nlattr *nest_parms;
+ int ret;
memset(&m, 0xFF, sizeof(m));
memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3));
@@ -2661,17 +2659,14 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
- rcu_read_lock();
if (ctnetlink_exp_dump_expect(skb, exp) < 0)
goto nla_put_failure;
- rcu_read_unlock();
nlmsg_end(skb, nlh);
nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC);
return 0;
nla_put_failure:
- rcu_read_unlock();
nlmsg_cancel(skb, nlh);
nlmsg_failure:
kfree_skb(skb);
@@ -2910,6 +2905,21 @@ out:
return err == -EAGAIN ? -ENOBUFS : err;
}
+static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
+{
+ const struct nf_conn_help *m_help;
+ const char *name = data;
+
+ m_help = nfct_help(exp->master);
+
+ return strcmp(m_help->helper->name, name) == 0;
+}
+
+static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data)
+{
+ return true;
+}
+
static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const cda[],
@@ -2918,10 +2928,8 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple tuple;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- struct hlist_node *next;
u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_zone zone;
- unsigned int i;
int err;
if (cda[CTA_EXPECT_TUPLE]) {
@@ -2961,49 +2969,15 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
nf_ct_expect_put(exp);
} else if (cda[CTA_EXPECT_HELP_NAME]) {
char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]);
- struct nf_conn_help *m_help;
-
- /* delete all expectations for this helper */
- spin_lock_bh(&nf_conntrack_expect_lock);
- for (i = 0; i < nf_ct_expect_hsize; i++) {
- hlist_for_each_entry_safe(exp, next,
- &nf_ct_expect_hash[i],
- hnode) {
-
- if (!net_eq(nf_ct_exp_net(exp), net))
- continue;
- m_help = nfct_help(exp->master);
- if (!strcmp(m_help->helper->name, name) &&
- del_timer(&exp->timeout)) {
- nf_ct_unlink_expect_report(exp,
- NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
- nf_ct_expect_put(exp);
- }
- }
- }
- spin_unlock_bh(&nf_conntrack_expect_lock);
+ nf_ct_expect_iterate_net(net, expect_iter_name, name,
+ NETLINK_CB(skb).portid,
+ nlmsg_report(nlh));
} else {
/* This basically means we have to flush everything*/
- spin_lock_bh(&nf_conntrack_expect_lock);
- for (i = 0; i < nf_ct_expect_hsize; i++) {
- hlist_for_each_entry_safe(exp, next,
- &nf_ct_expect_hash[i],
- hnode) {
-
- if (!net_eq(nf_ct_exp_net(exp), net))
- continue;
-
- if (del_timer(&exp->timeout)) {
- nf_ct_unlink_expect_report(exp,
- NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
- nf_ct_expect_put(exp);
- }
- }
- }
- spin_unlock_bh(&nf_conntrack_expect_lock);
+ nf_ct_expect_iterate_net(net, expect_iter_all, NULL,
+ NETLINK_CB(skb).portid,
+ nlmsg_report(nlh));
}
return 0;
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 6959e93063d4..11562f2a08bb 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -113,7 +113,6 @@ static void pptp_expectfn(struct nf_conn *ct,
/* Can you see how rusty this code is, compared with the pre-2.6.11
* one? That's what happened to my shiny newnat of 2002 ;( -HW */
- rcu_read_lock();
nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn);
if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK)
nf_nat_pptp_expectfn(ct, exp);
@@ -136,7 +135,6 @@ static void pptp_expectfn(struct nf_conn *ct,
pr_debug("not found\n");
}
}
- rcu_read_unlock();
}
static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 1dcad229c3cc..c8e9c9503a08 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -27,6 +27,7 @@
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_log.h>
static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly;
@@ -63,9 +64,55 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
*header = NULL;
*table = NULL;
}
+
+__printf(5, 6)
+void nf_l4proto_log_invalid(const struct sk_buff *skb,
+ struct net *net,
+ u16 pf, u8 protonum,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ if (net->ct.sysctl_log_invalid != protonum ||
+ net->ct.sysctl_log_invalid != IPPROTO_RAW)
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_proto_%d: %pV ", protonum, &vaf);
+ va_end(args);
+}
+EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
+
+__printf(3, 4)
+void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
+ const struct nf_conn *ct,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ struct net *net;
+ va_list args;
+
+ net = nf_ct_net(ct);
+ if (likely(net->ct.sysctl_log_invalid == 0))
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct),
+ nf_ct_protonum(ct), "%pV", &vaf);
+ va_end(args);
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
#endif
-struct nf_conntrack_l4proto *
+const struct nf_conntrack_l4proto *
__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
{
if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
@@ -77,7 +124,7 @@ EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
/* this is guaranteed to always return a valid protocol helper, since
* it falls back to generic_protocol */
-struct nf_conntrack_l3proto *
+const struct nf_conntrack_l3proto *
nf_ct_l3proto_find_get(u_int16_t l3proto)
{
struct nf_conntrack_l3proto *p;
@@ -95,8 +142,8 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
int
nf_ct_l3proto_try_module_get(unsigned short l3proto)
{
+ const struct nf_conntrack_l3proto *p;
int ret;
- struct nf_conntrack_l3proto *p;
retry: p = nf_ct_l3proto_find_get(l3proto);
if (p == &nf_conntrack_l3proto_generic) {
@@ -125,7 +172,7 @@ void nf_ct_l3proto_module_put(unsigned short l3proto)
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
-int nf_ct_netns_get(struct net *net, u8 nfproto)
+static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
{
const struct nf_conntrack_l3proto *l3proto;
int ret;
@@ -150,9 +197,33 @@ int nf_ct_netns_get(struct net *net, u8 nfproto)
return ret;
}
+
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+ int err;
+
+ if (nfproto == NFPROTO_INET) {
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+ if (err < 0)
+ goto err1;
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+ if (err < 0)
+ goto err2;
+ } else {
+ err = nf_ct_netns_do_get(net, nfproto);
+ if (err < 0)
+ goto err1;
+ }
+ return 0;
+
+err2:
+ nf_ct_netns_put(net, NFPROTO_IPV4);
+err1:
+ return err;
+}
EXPORT_SYMBOL_GPL(nf_ct_netns_get);
-void nf_ct_netns_put(struct net *net, u8 nfproto)
+static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
{
const struct nf_conntrack_l3proto *l3proto;
@@ -171,12 +242,21 @@ void nf_ct_netns_put(struct net *net, u8 nfproto)
nf_ct_l3proto_module_put(nfproto);
}
+
+void nf_ct_netns_put(struct net *net, uint8_t nfproto)
+{
+ if (nfproto == NFPROTO_INET) {
+ nf_ct_netns_do_put(net, NFPROTO_IPV4);
+ nf_ct_netns_do_put(net, NFPROTO_IPV6);
+ } else
+ nf_ct_netns_do_put(net, nfproto);
+}
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
-struct nf_conntrack_l4proto *
+const struct nf_conntrack_l4proto *
nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
{
- struct nf_conntrack_l4proto *p;
+ const struct nf_conntrack_l4proto *p;
rcu_read_lock();
p = __nf_ct_l4proto_find(l3num, l4num);
@@ -188,7 +268,7 @@ nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
-void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p)
+void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
{
module_put(p->me);
}
@@ -196,28 +276,28 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
static int kill_l3proto(struct nf_conn *i, void *data)
{
- return nf_ct_l3num(i) == ((struct nf_conntrack_l3proto *)data)->l3proto;
+ return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto;
}
static int kill_l4proto(struct nf_conn *i, void *data)
{
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
l4proto = data;
return nf_ct_protonum(i) == l4proto->l4proto &&
nf_ct_l3num(i) == l4proto->l3proto;
}
-int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
+int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto)
{
int ret = 0;
struct nf_conntrack_l3proto *old;
if (proto->l3proto >= NFPROTO_NUMPROTO)
return -EBUSY;
-
- if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ if (proto->tuple_to_nlattr && proto->nla_size == 0)
return -EINVAL;
-
+#endif
mutex_lock(&nf_ct_proto_mutex);
old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
lockdep_is_held(&nf_ct_proto_mutex));
@@ -226,9 +306,6 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
goto out_unlock;
}
- if (proto->nlattr_tuple_size)
- proto->nla_size = 3 * proto->nlattr_tuple_size();
-
rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
out_unlock:
@@ -238,21 +315,7 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
-#ifdef CONFIG_SYSCTL
-extern unsigned int nf_conntrack_default_on;
-
-int nf_ct_l3proto_pernet_register(struct net *net,
- struct nf_conntrack_l3proto *proto)
-{
- if (nf_conntrack_default_on == 0)
- return 0;
-
- return proto->net_ns_get ? proto->net_ns_get(net) : 0;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
-#endif
-
-void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
+void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto)
{
BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
@@ -266,27 +329,12 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
synchronize_rcu();
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_destroy(kill_l3proto, proto);
+ nf_ct_iterate_destroy(kill_l3proto, (void*)proto);
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
-void nf_ct_l3proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l3proto *proto)
-{
- /*
- * nf_conntrack_default_on *might* have registered hooks.
- * ->net_ns_put must cope with more puts() than get(), i.e.
- * if nf_conntrack_default_on was 0 at time of
- * nf_ct_l3proto_pernet_register invocation this net_ns_put()
- * should be a noop.
- */
- if (proto->net_ns_put)
- proto->net_ns_put(net);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
-
static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
if (l4proto->get_net_proto) {
/* statically built-in protocols use static per-net */
@@ -301,7 +349,7 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
static
int nf_ct_l4proto_register_sysctl(struct net *net,
struct nf_proto_net *pn,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
int err = 0;
@@ -324,8 +372,8 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
static
void nf_ct_l4proto_unregister_sysctl(struct net *net,
- struct nf_proto_net *pn,
- struct nf_conntrack_l4proto *l4proto)
+ struct nf_proto_net *pn,
+ const struct nf_conntrack_l4proto *l4proto)
{
#ifdef CONFIG_SYSCTL
if (pn->ctl_table_header != NULL)
@@ -383,8 +431,6 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
l4proto->nla_size = 0;
if (l4proto->nlattr_size)
l4proto->nla_size += l4proto->nlattr_size();
- if (l4proto->nlattr_tuple_size)
- l4proto->nla_size += 3 * l4proto->nlattr_tuple_size();
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
l4proto);
@@ -395,7 +441,7 @@ out_unlock:
EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
int nf_ct_l4proto_pernet_register_one(struct net *net,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
struct nf_proto_net *pn = NULL;
@@ -420,7 +466,7 @@ out:
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
-static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
{
BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
@@ -433,7 +479,7 @@ static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
&nf_conntrack_l4proto_generic);
}
-void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
{
mutex_lock(&nf_ct_proto_mutex);
__nf_ct_l4proto_unregister_one(l4proto);
@@ -444,7 +490,7 @@ void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
void nf_ct_l4proto_pernet_unregister_one(struct net *net,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
@@ -469,8 +515,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
}
if (i != num_proto) {
ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
- pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n",
- ver, l4proto[i]->name, ver);
+ pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n",
+ ver, l4proto[i]->l4proto);
nf_ct_l4proto_unregister(l4proto, i);
}
return ret;
@@ -478,7 +524,7 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
int nf_ct_l4proto_pernet_register(struct net *net,
- struct nf_conntrack_l4proto *l4proto[],
+ struct nf_conntrack_l4proto *const l4proto[],
unsigned int num_proto)
{
int ret = -EINVAL;
@@ -490,8 +536,8 @@ int nf_ct_l4proto_pernet_register(struct net *net,
break;
}
if (i != num_proto) {
- pr_err("nf_conntrack_%s%d: pernet registration failed\n",
- l4proto[i]->name,
+ pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n",
+ l4proto[i]->l4proto,
l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
nf_ct_l4proto_pernet_unregister(net, l4proto, i);
}
@@ -514,8 +560,8 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
void nf_ct_l4proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l4proto *l4proto[],
- unsigned int num_proto)
+ struct nf_conntrack_l4proto *const l4proto[],
+ unsigned int num_proto)
{
while (num_proto-- != 0)
nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 4707d997558a..2a446f4a554c 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -428,13 +428,13 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
default:
dn = dccp_pernet(net);
if (dn->dccp_loose == 0) {
- msg = "nf_ct_dccp: not picking up existing connection ";
+ msg = "not picking up existing connection ";
goto out_invalid;
}
case CT_DCCP_REQUEST:
break;
case CT_DCCP_INVALID:
- msg = "nf_ct_dccp: invalid state transition ";
+ msg = "invalid state transition ";
goto out_invalid;
}
@@ -447,9 +447,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
out_invalid:
- if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL,
- NULL, "%s", msg);
+ nf_ct_l4proto_log_invalid(skb, ct, "%s", msg);
return false;
}
@@ -469,10 +467,8 @@ static unsigned int *dccp_get_timeouts(struct net *net)
static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, enum ip_conntrack_info ctinfo,
- u_int8_t pf, unsigned int hooknum,
unsigned int *timeouts)
{
- struct net *net = nf_ct_net(ct);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
struct dccp_hdr _dh, *dh;
u_int8_t type, old_state, new_state;
@@ -534,15 +530,11 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.dccp.last_pkt = type;
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_dccp: invalid packet ignored ");
+ nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet");
return NF_ACCEPT;
case CT_DCCP_INVALID:
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_dccp: invalid state transition ");
+ nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition");
return -NF_ACCEPT;
}
@@ -604,8 +596,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
return NF_ACCEPT;
out_invalid:
- if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", msg);
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg);
return -NF_ACCEPT;
}
@@ -623,18 +614,12 @@ static bool dccp_can_early_drop(const struct nf_conn *ct)
return false;
}
-static void dccp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.dccp.port),
- ntohs(tuple->dst.u.dccp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
}
+#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
@@ -880,7 +865,6 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.l3proto = AF_INET,
.l4proto = IPPROTO_DCCP,
- .name = "dccp",
.pkt_to_tuple = dccp_pkt_to_tuple,
.invert_tuple = dccp_invert_tuple,
.new = dccp_new,
@@ -888,8 +872,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.get_timeouts = dccp_get_timeouts,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
- .print_tuple = dccp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = dccp_print_conntrack,
+#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = dccp_to_nlattr,
.nlattr_size = dccp_nlattr_size,
@@ -916,7 +901,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.l3proto = AF_INET6,
.l4proto = IPPROTO_DCCP,
- .name = "dccp",
.pkt_to_tuple = dccp_pkt_to_tuple,
.invert_tuple = dccp_invert_tuple,
.new = dccp_new,
@@ -924,8 +908,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.get_timeouts = dccp_get_timeouts,
.error = dccp_error,
.can_early_drop = dccp_can_early_drop,
- .print_tuple = dccp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = dccp_print_conntrack,
+#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = dccp_to_nlattr,
.nlattr_size = dccp_nlattr_size,
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index d5868bad33a7..1f86ddf6649a 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -17,22 +17,10 @@ static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
static bool nf_generic_should_process(u8 proto)
{
switch (proto) {
-#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE
- case IPPROTO_SCTP:
- return false;
-#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE
- case IPPROTO_DCCP:
- return false;
-#endif
#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
case IPPROTO_GRE:
return false;
#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE
- case IPPROTO_UDPLITE:
- return false;
-#endif
default:
return true;
}
@@ -62,12 +50,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void generic_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
-}
-
static unsigned int *generic_get_timeouts(struct net *net)
{
return &(generic_pernet(net)->timeout);
@@ -78,8 +60,6 @@ static int generic_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeout)
{
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
@@ -187,10 +167,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
{
.l3proto = PF_UNSPEC,
.l4proto = 255,
- .name = "unknown",
.pkt_to_tuple = generic_pkt_to_tuple,
.invert_tuple = generic_invert_tuple,
- .print_tuple = generic_print_tuple,
.packet = generic_packet,
.get_timeouts = generic_get_timeouts,
.new = generic_new,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 87bb40a3feb5..a2503005d80b 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -224,15 +224,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
return true;
}
-/* print gre part of tuple */
-static void gre_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "srckey=0x%x dstkey=0x%x ",
- ntohs(tuple->src.u.gre.key),
- ntohs(tuple->dst.u.gre.key));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* print private data for conntrack */
static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
@@ -240,6 +232,7 @@ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
(ct->proto.gre.timeout / HZ),
(ct->proto.gre.stream_timeout / HZ));
}
+#endif
static unsigned int *gre_get_timeouts(struct net *net)
{
@@ -251,8 +244,6 @@ static int gre_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeouts)
{
/* If we've seen traffic both ways, this is a GRE connection.
@@ -364,11 +355,11 @@ static int gre_init_net(struct net *net, u_int16_t proto)
static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
.l3proto = AF_INET,
.l4proto = IPPROTO_GRE,
- .name = "gre",
.pkt_to_tuple = gre_pkt_to_tuple,
.invert_tuple = gre_invert_tuple,
- .print_tuple = gre_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
+#endif
.get_timeouts = gre_get_timeouts,
.packet = gre_packet,
.new = gre_new,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 6eef29d2eec4..80faf04ddf15 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -174,20 +174,13 @@ static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void sctp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.sctp.port),
- ntohs(tuple->dst.u.sctp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]);
}
+#endif
#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \
@@ -313,8 +306,6 @@ static int sctp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeouts)
{
enum sctp_conntrack new_state, old_state;
@@ -530,8 +521,7 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
}
return NF_ACCEPT;
out_invalid:
- if (LOG_INVALID(net, IPPROTO_SCTP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", logmsg);
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg);
return -NF_ACCEPT;
}
@@ -791,11 +781,11 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.l3proto = PF_INET,
.l4proto = IPPROTO_SCTP,
- .name = "sctp",
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
- .print_tuple = sctp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
+#endif
.packet = sctp_packet,
.get_timeouts = sctp_get_timeouts,
.new = sctp_new,
@@ -828,11 +818,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.l3proto = PF_INET6,
.l4proto = IPPROTO_SCTP,
- .name = "sctp",
.pkt_to_tuple = sctp_pkt_to_tuple,
.invert_tuple = sctp_invert_tuple,
- .print_tuple = sctp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
+#endif
.packet = sctp_packet,
.get_timeouts = sctp_get_timeouts,
.new = sctp_new,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9758a7dfd83e..b12fc07111d0 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -301,20 +301,13 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void tcp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.tcp.port),
- ntohs(tuple->dst.u.tcp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
}
+#endif
static unsigned int get_conntrack_index(const struct tcphdr *tcph)
{
@@ -500,8 +493,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
unsigned int index,
const struct sk_buff *skb,
unsigned int dataoff,
- const struct tcphdr *tcph,
- u_int8_t pf)
+ const struct tcphdr *tcph)
{
struct net *net = nf_ct_net(ct);
struct nf_tcp_net *tn = tcp_pernet(net);
@@ -709,9 +701,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
tn->tcp_be_liberal)
res = true;
- if (!res && LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: %s ",
+ if (!res) {
+ nf_ct_l4proto_log_invalid(skb, ct,
+ "%s",
before(seq, sender->td_maxend + 1) ?
in_recv_win ?
before(sack, receiver->td_end + 1) ?
@@ -720,6 +712,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
: "ACK is over the upper bound (ACKed data not seen yet)"
: "SEQ is under the lower bound (already ACKed data retransmitted)"
: "SEQ is over the upper bound (over the window of the receiver)");
+ }
}
pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
@@ -745,6 +738,12 @@ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
[TCPHDR_ACK|TCPHDR_URG] = 1,
};
+static void tcp_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg);
+}
+
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
static int tcp_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
@@ -760,17 +759,13 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
/* Smaller that minimal TCP header? */
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
if (th == NULL) {
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: short packet ");
+ tcp_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
/* Not whole TCP header or malformed packet */
if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: truncated/malformed packet ");
+ tcp_error_log(skb, net, pf, "truncated packet");
return -NF_ACCEPT;
}
@@ -781,18 +776,14 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
/* FIXME: Source route IP option packets --RR */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: bad TCP checksum ");
+ tcp_error_log(skb, net, pf, "bad checksum");
return -NF_ACCEPT;
}
/* Check TCP flags. */
tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
if (!tcp_valid_flags[tcpflags]) {
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: invalid TCP flag combination ");
+ tcp_error_log(skb, net, pf, "invalid tcp flag combination");
return -NF_ACCEPT;
}
@@ -809,8 +800,6 @@ static int tcp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeouts)
{
struct net *net = nf_ct_net(ct);
@@ -947,10 +936,8 @@ static int tcp_packet(struct nf_conn *ct,
IP_CT_EXP_CHALLENGE_ACK;
}
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: invalid packet ignored in "
- "state %s ", tcp_conntrack_names[old_state]);
+ nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
+ "state %s ", tcp_conntrack_names[old_state]);
return NF_ACCEPT;
case TCP_CONNTRACK_MAX:
/* Special case for SYN proxy: when the SYN to the server or
@@ -972,9 +959,7 @@ static int tcp_packet(struct nf_conn *ct,
pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(th), old_state);
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: invalid state ");
+ nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
return -NF_ACCEPT;
case TCP_CONNTRACK_TIME_WAIT:
/* RFC5961 compliance cause stack to send "challenge-ACK"
@@ -989,9 +974,7 @@ static int tcp_packet(struct nf_conn *ct,
/* Detected RFC5961 challenge ACK */
ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: challenge-ACK ignored ");
+ nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
return NF_ACCEPT; /* Don't change state */
}
break;
@@ -1001,9 +984,7 @@ static int tcp_packet(struct nf_conn *ct,
&& before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
/* Invalid RST */
spin_unlock_bh(&ct->lock);
- if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL,
- NULL, "nf_ct_tcp: invalid RST ");
+ nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
return -NF_ACCEPT;
}
if (index == TCP_RST_SET
@@ -1030,7 +1011,7 @@ static int tcp_packet(struct nf_conn *ct,
}
if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
- skb, dataoff, th, pf)) {
+ skb, dataoff, th)) {
spin_unlock_bh(&ct->lock);
return -NF_ACCEPT;
}
@@ -1296,9 +1277,14 @@ static int tcp_nlattr_size(void)
+ nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
}
-static int tcp_nlattr_tuple_size(void)
+static unsigned int tcp_nlattr_tuple_size(void)
{
- return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+ static unsigned int size __read_mostly;
+
+ if (!size)
+ size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+
+ return size;
}
#endif
@@ -1556,11 +1542,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_TCP,
- .name = "tcp",
.pkt_to_tuple = tcp_pkt_to_tuple,
.invert_tuple = tcp_invert_tuple,
- .print_tuple = tcp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = tcp_print_conntrack,
+#endif
.packet = tcp_packet,
.get_timeouts = tcp_get_timeouts,
.new = tcp_new,
@@ -1594,11 +1580,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_TCP,
- .name = "tcp",
.pkt_to_tuple = tcp_pkt_to_tuple,
.invert_tuple = tcp_invert_tuple,
- .print_tuple = tcp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = tcp_print_conntrack,
+#endif
.packet = tcp_packet,
.get_timeouts = tcp_get_timeouts,
.new = tcp_new,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index f6ebce6178ca..3a5f727103af 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -63,15 +63,6 @@ static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-/* Print out the per-protocol part of the tuple. */
-static void udp_print_tuple(struct seq_file *s,
- const struct nf_conntrack_tuple *tuple)
-{
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.udp.port),
- ntohs(tuple->dst.u.udp.port));
-}
-
static unsigned int *udp_get_timeouts(struct net *net)
{
return udp_pernet(net)->timeouts;
@@ -82,8 +73,6 @@ static int udp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- u_int8_t pf,
- unsigned int hooknum,
unsigned int *timeouts)
{
/* If we've seen traffic both ways, this is some kind of UDP
@@ -109,6 +98,12 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
}
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+static void udplite_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg);
+}
+
static int udplite_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
@@ -122,9 +117,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (!hdr) {
- if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udplite: short packet ");
+ udplite_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
@@ -132,17 +125,13 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
if (cscov == 0) {
cscov = udplen;
} else if (cscov < sizeof(*hdr) || cscov > udplen) {
- if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udplite: invalid checksum coverage ");
+ udplite_error_log(skb, net, pf, "invalid checksum coverage");
return -NF_ACCEPT;
}
/* UDPLITE mandates checksums */
if (!hdr->check) {
- if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udplite: checksum missing ");
+ udplite_error_log(skb, net, pf, "checksum missing");
return -NF_ACCEPT;
}
@@ -150,9 +139,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
pf)) {
- if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udplite: bad UDPLite checksum ");
+ udplite_error_log(skb, net, pf, "bad checksum");
return -NF_ACCEPT;
}
@@ -160,6 +147,12 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
}
#endif
+static void udp_error_log(const struct sk_buff *skb, struct net *net,
+ u8 pf, const char *msg)
+{
+ nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg);
+}
+
static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
unsigned int dataoff,
u_int8_t pf,
@@ -172,17 +165,13 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hdr == NULL) {
- if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udp: short packet ");
+ udp_error_log(skb, net, pf, "short packet");
return -NF_ACCEPT;
}
/* Truncated/malformed packets */
if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
- if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udp: truncated/malformed packet ");
+ udp_error_log(skb, net, pf, "truncated/malformed packet");
return -NF_ACCEPT;
}
@@ -196,9 +185,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
* FIXME: Source route IP option packets --RR */
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
- if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_udp: bad UDP checksum ");
+ udp_error_log(skb, net, pf, "bad checksum");
return -NF_ACCEPT;
}
@@ -313,11 +300,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDP,
- .name = "udp",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
@@ -347,11 +332,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDPLITE,
- .name = "udplite",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
@@ -381,11 +364,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDP,
- .name = "udp",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
@@ -415,11 +396,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDPLITE,
- .name = "udplite",
.allow_clash = true,
.pkt_to_tuple = udp_pkt_to_tuple,
.invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
.packet = udp_packet,
.get_timeouts = udp_get_timeouts,
.new = udp_new,
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index d38af4274335..4dbb5bad4363 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -884,7 +884,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
tuple.dst.u3 = *daddr;
tuple.dst.u.udp.port = port;
- rcu_read_lock();
do {
exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
@@ -918,10 +917,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
goto err1;
}
- if (skip_expect) {
- rcu_read_unlock();
+ if (skip_expect)
return NF_ACCEPT;
- }
rtp_exp = nf_ct_expect_alloc(ct);
if (rtp_exp == NULL)
@@ -952,7 +949,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
err2:
nf_ct_expect_put(rtp_exp);
err1:
- rcu_read_unlock();
return ret;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index ccb5cb9043e0..5a101caa3e12 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -41,8 +41,62 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
{
- l3proto->print_tuple(s, tuple);
- l4proto->print_tuple(s, tuple);
+ switch (l3proto->l3proto) {
+ case NFPROTO_IPV4:
+ seq_printf(s, "src=%pI4 dst=%pI4 ",
+ &tuple->src.u3.ip, &tuple->dst.u3.ip);
+ break;
+ case NFPROTO_IPV6:
+ seq_printf(s, "src=%pI6 dst=%pI6 ",
+ tuple->src.u3.ip6, tuple->dst.u3.ip6);
+ break;
+ default:
+ break;
+ }
+
+ switch (l4proto->l4proto) {
+ case IPPROTO_ICMP:
+ seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
+ break;
+ case IPPROTO_TCP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.tcp.port),
+ ntohs(tuple->dst.u.tcp.port));
+ break;
+ case IPPROTO_UDPLITE: /* fallthrough */
+ case IPPROTO_UDP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.udp.port),
+ ntohs(tuple->dst.u.udp.port));
+
+ break;
+ case IPPROTO_DCCP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.dccp.port),
+ ntohs(tuple->dst.u.dccp.port));
+ break;
+ case IPPROTO_SCTP:
+ seq_printf(s, "sport=%hu dport=%hu ",
+ ntohs(tuple->src.u.sctp.port),
+ ntohs(tuple->dst.u.sctp.port));
+ break;
+ case IPPROTO_ICMPV6:
+ seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
+ break;
+ case IPPROTO_GRE:
+ seq_printf(s, "srckey=0x%x dstkey=0x%x ",
+ ntohs(tuple->src.u.gre.key),
+ ntohs(tuple->dst.u.gre.key));
+ break;
+ default:
+ break;
+ }
}
EXPORT_SYMBOL_GPL(print_tuple);
@@ -198,6 +252,31 @@ ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
}
#endif
+static const char* l3proto_name(u16 proto)
+{
+ switch (proto) {
+ case AF_INET: return "ipv4";
+ case AF_INET6: return "ipv6";
+ }
+
+ return "unknown";
+}
+
+static const char* l4proto_name(u16 proto)
+{
+ switch (proto) {
+ case IPPROTO_ICMP: return "icmp";
+ case IPPROTO_TCP: return "tcp";
+ case IPPROTO_UDP: return "udp";
+ case IPPROTO_DCCP: return "dccp";
+ case IPPROTO_GRE: return "gre";
+ case IPPROTO_SCTP: return "sctp";
+ case IPPROTO_UDPLITE: return "udplite";
+ }
+
+ return "unknown";
+}
+
/* return 0 on success, 1 in case of error */
static int ct_seq_show(struct seq_file *s, void *v)
{
@@ -208,7 +287,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
struct net *net = seq_file_net(s);
int ret = 0;
- NF_CT_ASSERT(ct);
+ WARN_ON(!ct);
if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
return 0;
@@ -225,14 +304,14 @@ static int ct_seq_show(struct seq_file *s, void *v)
goto release;
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
- NF_CT_ASSERT(l3proto);
+ WARN_ON(!l3proto);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
- NF_CT_ASSERT(l4proto);
+ WARN_ON(!l4proto);
ret = -ENOSPC;
seq_printf(s, "%-8s %u %-8s %u %ld ",
- l3proto->name, nf_ct_l3num(ct),
- l4proto->name, nf_ct_protonum(ct),
+ l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
+ l4proto_name(l4proto->l4proto), nf_ct_protonum(ct),
nf_ct_expires(ct) / HZ);
if (l4proto->print_conntrack)
@@ -452,9 +531,6 @@ static int log_invalid_proto_max __read_mostly = 255;
/* size the user *wants to set */
static unsigned int nf_conntrack_htable_size_user __read_mostly;
-extern unsigned int nf_conntrack_default_on;
-unsigned int nf_conntrack_default_on __read_mostly = 1;
-
static int
nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -520,13 +596,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "nf_conntrack_default_on",
- .data = &nf_conntrack_default_on,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
{ }
};
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index bfa742da83af..44284cd2528d 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NF_INTERNALS_H
#define _NF_INTERNALS_H
@@ -5,17 +6,11 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NFDEBUG(format, args...) printk(KERN_DEBUG format , ## args)
-#else
-#define NFDEBUG(format, args...)
-#endif
-
/* nf_queue.c */
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry **entryp, unsigned int verdict);
+ const struct nf_hook_entries *entries, unsigned int index,
+ unsigned int verdict);
unsigned int nf_queue_nf_hook_drop(struct net *net);
-int __init netfilter_queue_init(void);
/* nf_log.c */
int __init netfilter_log_init(void);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index eb541786ccb7..6c38421e31f9 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -30,19 +30,17 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
+static spinlock_t nf_nat_locks[CONNTRACK_LOCKS];
+
static DEFINE_MUTEX(nf_nat_proto_mutex);
static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
__read_mostly;
static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
__read_mostly;
-struct nf_nat_conn_key {
- const struct net *net;
- const struct nf_conntrack_tuple *tuple;
- const struct nf_conntrack_zone *zone;
-};
-
-static struct rhltable nf_nat_bysource_table;
+static struct hlist_head *nf_nat_bysource __read_mostly;
+static unsigned int nf_nat_htable_size __read_mostly;
+static unsigned int nf_nat_hash_rnd __read_mostly;
inline const struct nf_nat_l3proto *
__nf_nat_l3proto_find(u8 family)
@@ -118,17 +116,19 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
EXPORT_SYMBOL(nf_xfrm_me_harder);
#endif /* CONFIG_XFRM */
-static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed)
+/* We keep an extra hash for each conntrack, for fast searching. */
+static unsigned int
+hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
{
- const struct nf_conntrack_tuple *t;
- const struct nf_conn *ct = data;
+ unsigned int hash;
+
+ get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
- t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* Original src, to ensure we map it consistently if poss. */
+ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
+ tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
- seed ^= net_hash_mix(nf_ct_net(ct));
- return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32),
- t->dst.protonum ^ seed);
+ return reciprocal_scale(hash, nf_nat_htable_size);
}
/* Is this tuple already taken? (not by us) */
@@ -184,28 +184,6 @@ same_src(const struct nf_conn *ct,
t->src.u.all == tuple->src.u.all);
}
-static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg,
- const void *obj)
-{
- const struct nf_nat_conn_key *key = arg->key;
- const struct nf_conn *ct = obj;
-
- if (!same_src(ct, key->tuple) ||
- !net_eq(nf_ct_net(ct), key->net) ||
- !nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL))
- return 1;
-
- return 0;
-}
-
-static struct rhashtable_params nf_nat_bysource_params = {
- .head_offset = offsetof(struct nf_conn, nat_bysource),
- .obj_hashfn = nf_nat_bysource_hash,
- .obj_cmpfn = nf_nat_bysource_cmp,
- .nelem_hint = 256,
- .min_size = 1024,
-};
-
/* Only called for SRC manip */
static int
find_appropriate_src(struct net *net,
@@ -216,26 +194,22 @@ find_appropriate_src(struct net *net,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
+ unsigned int h = hash_by_src(net, tuple);
const struct nf_conn *ct;
- struct nf_nat_conn_key key = {
- .net = net,
- .tuple = tuple,
- .zone = zone
- };
- struct rhlist_head *hl, *h;
- hl = rhltable_lookup(&nf_nat_bysource_table, &key,
- nf_nat_bysource_params);
-
- rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) {
- nf_ct_invert_tuplepr(result,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- result->dst = tuple->dst;
-
- if (in_range(l3proto, l4proto, result, range))
- return 1;
+ hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
+ if (same_src(ct, tuple) &&
+ net_eq(net, nf_ct_net(ct)) &&
+ nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
+ /* Copy source part from reply tuple. */
+ nf_ct_invert_tuplepr(result,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ result->dst = tuple->dst;
+
+ if (in_range(l3proto, l4proto, result, range))
+ return 1;
+ }
}
-
return 0;
}
@@ -408,15 +382,18 @@ nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_tuple curr_tuple, new_tuple;
/* Can't setup nat info for confirmed ct. */
if (nf_ct_is_confirmed(ct))
return NF_ACCEPT;
- NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
- maniptype == NF_NAT_MANIP_DST);
- BUG_ON(nf_nat_initialized(ct, maniptype));
+ WARN_ON(maniptype != NF_NAT_MANIP_SRC &&
+ maniptype != NF_NAT_MANIP_DST);
+
+ if (WARN_ON(nf_nat_initialized(ct, maniptype)))
+ return NF_DROP;
/* What we've got will look like inverse of reply. Normally
* this is what is in the conntrack, except for prior
@@ -441,25 +418,22 @@ nf_nat_setup_info(struct nf_conn *ct,
else
ct->status |= IPS_DST_NAT;
- if (nfct_help(ct))
+ if (nfct_help(ct) && !nfct_seqadj(ct))
if (!nfct_seqadj_ext_add(ct))
return NF_DROP;
}
if (maniptype == NF_NAT_MANIP_SRC) {
- struct nf_nat_conn_key key = {
- .net = nf_ct_net(ct),
- .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- .zone = nf_ct_zone(ct),
- };
- int err;
-
- err = rhltable_insert_key(&nf_nat_bysource_table,
- &key,
- &ct->nat_bysource,
- nf_nat_bysource_params);
- if (err)
- return NF_DROP;
+ unsigned int srchash;
+ spinlock_t *lock;
+
+ srchash = hash_by_src(net,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
+ spin_lock_bh(lock);
+ hlist_add_head_rcu(&ct->nat_bysource,
+ &nf_nat_bysource[srchash]);
+ spin_unlock_bh(lock);
}
/* It's done. */
@@ -553,23 +527,29 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
return i->status & IPS_NAT_MASK ? 1 : 0;
}
+static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
+{
+ unsigned int h;
+
+ h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
+ hlist_del_rcu(&ct->nat_bysource);
+ spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
+}
+
static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
{
if (nf_nat_proto_remove(ct, data))
return 1;
- if ((ct->status & IPS_SRC_NAT_DONE) == 0)
- return 0;
-
- /* This netns is being destroyed, and conntrack has nat null binding.
+ /* This module is being removed and conntrack has nat null binding.
* Remove it from bysource hash, as the table will be freed soon.
*
* Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
* will delete entry from already-freed table.
*/
- clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
- rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ if (test_and_clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status))
+ __nf_nat_cleanup_conntrack(ct);
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -698,8 +678,7 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
if (ct->status & IPS_SRC_NAT_DONE)
- rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ __nf_nat_cleanup_conntrack(ct);
}
static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -821,19 +800,27 @@ static struct nf_ct_helper_expectfn follow_master_nat = {
static int __init nf_nat_init(void)
{
- int ret;
+ int ret, i;
- ret = rhltable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
- if (ret)
- return ret;
+ /* Leave them the same for the moment. */
+ nf_nat_htable_size = nf_conntrack_htable_size;
+ if (nf_nat_htable_size < CONNTRACK_LOCKS)
+ nf_nat_htable_size = CONNTRACK_LOCKS;
+
+ nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
+ if (!nf_nat_bysource)
+ return -ENOMEM;
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
- rhltable_destroy(&nf_nat_bysource_table);
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
return ret;
}
+ for (i = 0; i < CONNTRACK_LOCKS; i++)
+ spin_lock_init(&nf_nat_locks[i]);
+
nf_ct_helper_expectfn_register(&follow_master_nat);
BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
@@ -863,8 +850,8 @@ static void __exit nf_nat_cleanup(void)
for (i = 0; i < NFPROTO_NUMPROTO; i++)
kfree(nf_nat_l4protos[i]);
-
- rhltable_destroy(&nf_nat_bysource_table);
+ synchronize_net();
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
}
MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index e84a578dbe35..d76afafdc699 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -134,7 +134,7 @@ static int __init nf_nat_ftp_init(void)
}
/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
+static int warn_set(const char *val, const struct kernel_param *kp)
{
printk(KERN_INFO KBUILD_MODNAME
": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index 0648cb096bd8..dcb5f6375d9d 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -106,7 +106,7 @@ static int __init nf_nat_irc_init(void)
}
/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
+static int warn_set(const char *val, const struct kernel_param *kp)
{
printk(KERN_INFO KBUILD_MODNAME
": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 86067560a318..25b06b959118 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -38,11 +38,11 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
__be32 newdst;
struct nf_nat_range newrange;
- NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
- hooknum == NF_INET_LOCAL_OUT);
+ WARN_ON(hooknum != NF_INET_PRE_ROUTING &&
+ hooknum != NF_INET_LOCAL_OUT);
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+ WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
/* Local packets: make them go to loopback */
if (hooknum == NF_INET_LOCAL_OUT) {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 043850c9d154..f7e21953b1de 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -109,9 +109,11 @@ unsigned int nf_queue_nf_hook_drop(struct net *net)
return count;
}
+EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
- struct nf_hook_entry *hook_entry, unsigned int queuenum)
+ const struct nf_hook_entries *entries,
+ unsigned int index, unsigned int queuenum)
{
int status = -ENOENT;
struct nf_queue_entry *entry = NULL;
@@ -139,7 +141,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
*entry = (struct nf_queue_entry) {
.skb = skb,
.state = *state,
- .hook = hook_entry,
+ .hook_index = index,
.size = sizeof(*entry) + afinfo->route_key_size,
};
@@ -162,18 +164,16 @@ err:
/* Packets leaving via this function must come back through nf_reinject(). */
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
- struct nf_hook_entry **entryp, unsigned int verdict)
+ const struct nf_hook_entries *entries, unsigned int index,
+ unsigned int verdict)
{
- struct nf_hook_entry *entry = *entryp;
int ret;
- ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS);
+ ret = __nf_queue(skb, state, entries, index, verdict >> NF_VERDICT_QBITS);
if (ret < 0) {
if (ret == -ESRCH &&
- (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) {
- *entryp = rcu_dereference(entry->next);
+ (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
return 1;
- }
kfree_skb(skb);
}
@@ -182,33 +182,56 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
static unsigned int nf_iterate(struct sk_buff *skb,
struct nf_hook_state *state,
- struct nf_hook_entry **entryp)
+ const struct nf_hook_entries *hooks,
+ unsigned int *index)
{
- unsigned int verdict;
+ const struct nf_hook_entry *hook;
+ unsigned int verdict, i = *index;
- do {
+ while (i < hooks->num_hook_entries) {
+ hook = &hooks->hooks[i];
repeat:
- verdict = nf_hook_entry_hookfn((*entryp), skb, state);
+ verdict = nf_hook_entry_hookfn(hook, skb, state);
if (verdict != NF_ACCEPT) {
if (verdict != NF_REPEAT)
return verdict;
goto repeat;
}
- *entryp = rcu_dereference((*entryp)->next);
- } while (*entryp);
+ i++;
+ }
+ *index = i;
return NF_ACCEPT;
}
+/* Caller must hold rcu read-side lock */
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
- struct nf_hook_entry *hook_entry = entry->hook;
+ const struct nf_hook_entry *hook_entry;
+ const struct nf_hook_entries *hooks;
struct sk_buff *skb = entry->skb;
const struct nf_afinfo *afinfo;
+ const struct net *net;
+ unsigned int i;
int err;
+ u8 pf;
+
+ net = entry->state.net;
+ pf = entry->state.pf;
+
+ hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]);
nf_queue_entry_release_refs(entry);
+ i = entry->hook_index;
+ if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) {
+ kfree_skb(skb);
+ kfree(entry);
+ return;
+ }
+
+ hook_entry = &hooks->hooks[i];
+
/* Continue traversal iff userspace said ok... */
if (verdict == NF_REPEAT)
verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
@@ -220,27 +243,22 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
}
if (verdict == NF_ACCEPT) {
- hook_entry = rcu_dereference(hook_entry->next);
- if (hook_entry)
next_hook:
- verdict = nf_iterate(skb, &entry->state, &hook_entry);
+ ++i;
+ verdict = nf_iterate(skb, &entry->state, hooks, &i);
}
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_STOP:
-okfn:
local_bh_disable();
entry->state.okfn(entry->state.net, entry->state.sk, skb);
local_bh_enable();
break;
case NF_QUEUE:
- err = nf_queue(skb, &entry->state, &hook_entry, verdict);
- if (err == 1) {
- if (hook_entry)
- goto next_hook;
- goto okfn;
- }
+ err = nf_queue(skb, &entry->state, hooks, i, verdict);
+ if (err == 1)
+ goto next_hook;
break;
case NF_STOLEN:
break;
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index c68c1e58b362..46cb3786e0ec 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -33,7 +34,7 @@ int nf_register_sockopt(struct nf_sockopt_ops *reg)
reg->set_optmin, reg->set_optmax)
|| overlap(ops->get_optmin, ops->get_optmax,
reg->get_optmin, reg->get_optmax))) {
- NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+ pr_debug("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
ops->set_optmin, ops->set_optmax,
ops->get_optmin, ops->get_optmax,
reg->set_optmin, reg->set_optmax,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7843efa33c59..d8327b43e4dc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -726,7 +726,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
if (table == NULL)
goto err2;
- nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
+ table->name = nla_strdup(name, GFP_KERNEL);
+ if (table->name == NULL)
+ goto err3;
+
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
INIT_LIST_HEAD(&table->objects);
@@ -735,10 +738,12 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
- goto err3;
+ goto err4;
list_add_tail_rcu(&table->list, &afi->tables);
return 0;
+err4:
+ kfree(table->name);
err3:
kfree(table);
err2:
@@ -855,6 +860,10 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
if (IS_ERR(table))
return PTR_ERR(table);
+ if (nlh->nlmsg_flags & NLM_F_NONREC &&
+ table->use > 0)
+ return -EBUSY;
+
ctx.afi = afi;
ctx.table = table;
@@ -865,6 +874,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
{
BUG_ON(ctx->table->use > 0);
+ kfree(ctx->table->name);
kfree(ctx->table);
module_put(ctx->afi->owner);
}
@@ -1038,7 +1048,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name))
goto nla_put_failure;
- if (nft_dump_stats(skb, nft_base_chain(chain)->stats))
+ if (basechain->stats && nft_dump_stats(skb, basechain->stats))
goto nla_put_failure;
}
@@ -1240,10 +1250,14 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
module_put(basechain->type->owner);
free_percpu(basechain->stats);
+ if (basechain->stats)
+ static_branch_dec(&nft_counters_enabled);
if (basechain->ops[0].dev != NULL)
dev_put(basechain->ops[0].dev);
+ kfree(chain->name);
kfree(basechain);
} else {
+ kfree(chain->name);
kfree(chain);
}
}
@@ -1325,155 +1339,18 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
dev_put(hook->dev);
}
-static int nf_tables_newchain(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ u8 policy, bool create)
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nlattr * uninitialized_var(name);
- struct nft_af_info *afi;
- struct nft_table *table;
+ const struct nlattr * const *nla = ctx->nla;
+ struct nft_table *table = ctx->table;
+ struct nft_af_info *afi = ctx->afi;
+ struct nft_base_chain *basechain;
+ struct nft_stats __percpu *stats;
+ struct net *net = ctx->net;
struct nft_chain *chain;
- struct nft_base_chain *basechain = NULL;
- u8 genmask = nft_genmask_next(net);
- int family = nfmsg->nfgen_family;
- u8 policy = NF_ACCEPT;
- u64 handle = 0;
unsigned int i;
- struct nft_stats __percpu *stats;
int err;
- bool create;
- struct nft_ctx ctx;
-
- create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
-
- afi = nf_tables_afinfo_lookup(net, family, true);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
- if (IS_ERR(table))
- return PTR_ERR(table);
-
- chain = NULL;
- name = nla[NFTA_CHAIN_NAME];
-
- if (nla[NFTA_CHAIN_HANDLE]) {
- handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
- chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
- if (IS_ERR(chain))
- return PTR_ERR(chain);
- } else {
- chain = nf_tables_chain_lookup(table, name, genmask);
- if (IS_ERR(chain)) {
- if (PTR_ERR(chain) != -ENOENT)
- return PTR_ERR(chain);
- chain = NULL;
- }
- }
-
- if (nla[NFTA_CHAIN_POLICY]) {
- if (chain != NULL &&
- !nft_is_base_chain(chain))
- return -EOPNOTSUPP;
-
- if (chain == NULL &&
- nla[NFTA_CHAIN_HOOK] == NULL)
- return -EOPNOTSUPP;
-
- policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
- switch (policy) {
- case NF_DROP:
- case NF_ACCEPT:
- break;
- default:
- return -EINVAL;
- }
- }
-
- if (chain != NULL) {
- struct nft_stats *stats = NULL;
- struct nft_trans *trans;
-
- if (nlh->nlmsg_flags & NLM_F_EXCL)
- return -EEXIST;
- if (nlh->nlmsg_flags & NLM_F_REPLACE)
- return -EOPNOTSUPP;
-
- if (nla[NFTA_CHAIN_HOOK]) {
- struct nft_base_chain *basechain;
- struct nft_chain_hook hook;
- struct nf_hook_ops *ops;
-
- if (!nft_is_base_chain(chain))
- return -EBUSY;
-
- err = nft_chain_parse_hook(net, nla, afi, &hook,
- create);
- if (err < 0)
- return err;
-
- basechain = nft_base_chain(chain);
- if (basechain->type != hook.type) {
- nft_chain_release_hook(&hook);
- return -EBUSY;
- }
-
- for (i = 0; i < afi->nops; i++) {
- ops = &basechain->ops[i];
- if (ops->hooknum != hook.num ||
- ops->priority != hook.priority ||
- ops->dev != hook.dev) {
- nft_chain_release_hook(&hook);
- return -EBUSY;
- }
- }
- nft_chain_release_hook(&hook);
- }
-
- if (nla[NFTA_CHAIN_HANDLE] && name) {
- struct nft_chain *chain2;
-
- chain2 = nf_tables_chain_lookup(table,
- nla[NFTA_CHAIN_NAME],
- genmask);
- if (IS_ERR(chain2))
- return PTR_ERR(chain2);
- }
-
- if (nla[NFTA_CHAIN_COUNTERS]) {
- if (!nft_is_base_chain(chain))
- return -EOPNOTSUPP;
-
- stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
- if (IS_ERR(stats))
- return PTR_ERR(stats);
- }
-
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
- trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
- sizeof(struct nft_trans_chain));
- if (trans == NULL) {
- free_percpu(stats);
- return -ENOMEM;
- }
-
- nft_trans_chain_stats(trans) = stats;
- nft_trans_chain_update(trans) = true;
-
- if (nla[NFTA_CHAIN_POLICY])
- nft_trans_chain_policy(trans) = policy;
- else
- nft_trans_chain_policy(trans) = -1;
-
- if (nla[NFTA_CHAIN_HANDLE] && name) {
- nla_strlcpy(nft_trans_chain_name(trans), name,
- NFT_CHAIN_MAXNAMELEN);
- }
- list_add_tail(&trans->list, &net->nft.commit_list);
- return 0;
- }
if (table->use == UINT_MAX)
return -EOVERFLOW;
@@ -1504,14 +1381,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
return PTR_ERR(stats);
}
basechain->stats = stats;
- } else {
- stats = netdev_alloc_pcpu_stats(struct nft_stats);
- if (stats == NULL) {
- nft_chain_release_hook(&hook);
- kfree(basechain);
- return -ENOMEM;
- }
- rcu_assign_pointer(basechain->stats, stats);
+ static_branch_inc(&nft_counters_enabled);
}
hookfn = hook.type->hooks[hook.num];
@@ -1539,31 +1409,204 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (chain == NULL)
return -ENOMEM;
}
-
INIT_LIST_HEAD(&chain->rules);
chain->handle = nf_tables_alloc_handle(table);
chain->table = table;
- nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+ chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+ if (!chain->name) {
+ err = -ENOMEM;
+ goto err1;
+ }
err = nf_tables_register_hooks(net, table, chain, afi->nops);
if (err < 0)
goto err1;
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
- err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
+ ctx->chain = chain;
+ err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (err < 0)
goto err2;
table->use++;
list_add_tail_rcu(&chain->list, &table->chains);
+
return 0;
err2:
nf_tables_unregister_hooks(net, table, chain, afi->nops);
err1:
nf_tables_chain_destroy(chain);
+
return err;
}
+static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
+ bool create)
+{
+ const struct nlattr * const *nla = ctx->nla;
+ struct nft_table *table = ctx->table;
+ struct nft_chain *chain = ctx->chain;
+ struct nft_af_info *afi = ctx->afi;
+ struct nft_base_chain *basechain;
+ struct nft_stats *stats = NULL;
+ struct nft_chain_hook hook;
+ const struct nlattr *name;
+ struct nf_hook_ops *ops;
+ struct nft_trans *trans;
+ int err, i;
+
+ if (nla[NFTA_CHAIN_HOOK]) {
+ if (!nft_is_base_chain(chain))
+ return -EBUSY;
+
+ err = nft_chain_parse_hook(ctx->net, nla, ctx->afi, &hook,
+ create);
+ if (err < 0)
+ return err;
+
+ basechain = nft_base_chain(chain);
+ if (basechain->type != hook.type) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+
+ for (i = 0; i < afi->nops; i++) {
+ ops = &basechain->ops[i];
+ if (ops->hooknum != hook.num ||
+ ops->priority != hook.priority ||
+ ops->dev != hook.dev) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+ }
+ nft_chain_release_hook(&hook);
+ }
+
+ if (nla[NFTA_CHAIN_HANDLE] &&
+ nla[NFTA_CHAIN_NAME]) {
+ struct nft_chain *chain2;
+
+ chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME],
+ genmask);
+ if (!IS_ERR(chain2))
+ return -EEXIST;
+ }
+
+ if (nla[NFTA_CHAIN_COUNTERS]) {
+ if (!nft_is_base_chain(chain))
+ return -EOPNOTSUPP;
+
+ stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+ if (IS_ERR(stats))
+ return PTR_ERR(stats);
+ }
+
+ trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN,
+ sizeof(struct nft_trans_chain));
+ if (trans == NULL) {
+ free_percpu(stats);
+ return -ENOMEM;
+ }
+
+ nft_trans_chain_stats(trans) = stats;
+ nft_trans_chain_update(trans) = true;
+
+ if (nla[NFTA_CHAIN_POLICY])
+ nft_trans_chain_policy(trans) = policy;
+ else
+ nft_trans_chain_policy(trans) = -1;
+
+ name = nla[NFTA_CHAIN_NAME];
+ if (nla[NFTA_CHAIN_HANDLE] && name) {
+ nft_trans_chain_name(trans) =
+ nla_strdup(name, GFP_KERNEL);
+ if (!nft_trans_chain_name(trans)) {
+ kfree(trans);
+ free_percpu(stats);
+ return -ENOMEM;
+ }
+ }
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+}
+
+static int nf_tables_newchain(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nlattr * uninitialized_var(name);
+ u8 genmask = nft_genmask_next(net);
+ int family = nfmsg->nfgen_family;
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_chain *chain;
+ u8 policy = NF_ACCEPT;
+ struct nft_ctx ctx;
+ u64 handle = 0;
+ bool create;
+
+ create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+ afi = nf_tables_afinfo_lookup(net, family, true);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ chain = NULL;
+ name = nla[NFTA_CHAIN_NAME];
+
+ if (nla[NFTA_CHAIN_HANDLE]) {
+ handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+ chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+ } else {
+ chain = nf_tables_chain_lookup(table, name, genmask);
+ if (IS_ERR(chain)) {
+ if (PTR_ERR(chain) != -ENOENT)
+ return PTR_ERR(chain);
+ chain = NULL;
+ }
+ }
+
+ if (nla[NFTA_CHAIN_POLICY]) {
+ if (chain != NULL &&
+ !nft_is_base_chain(chain))
+ return -EOPNOTSUPP;
+
+ if (chain == NULL &&
+ nla[NFTA_CHAIN_HOOK] == NULL)
+ return -EOPNOTSUPP;
+
+ policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
+ switch (policy) {
+ case NF_DROP:
+ case NF_ACCEPT:
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+
+ if (chain != NULL) {
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+
+ return nf_tables_updchain(&ctx, genmask, policy, create);
+ }
+
+ return nf_tables_addchain(&ctx, family, genmask, policy, create);
+}
+
static int nf_tables_delchain(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -1574,8 +1617,11 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
+ struct nft_rule *rule;
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
+ u32 use;
+ int err;
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -1588,11 +1634,30 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
- if (chain->use > 0)
+
+ if (nlh->nlmsg_flags & NLM_F_NONREC &&
+ chain->use > 0)
return -EBUSY;
nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+ use = chain->use;
+ list_for_each_entry(rule, &chain->rules, list) {
+ if (!nft_is_active_next(net, rule))
+ continue;
+ use--;
+
+ err = nft_delrule(&ctx, rule);
+ if (err < 0)
+ return err;
+ }
+
+ /* There are rules and elements that are still holding references to us,
+ * we cannot do a recursive removal in this case.
+ */
+ if (use > 0)
+ return -EBUSY;
+
return nft_delchain(&ctx);
}
@@ -1977,8 +2042,8 @@ err:
}
struct nft_rule_dump_ctx {
- char table[NFT_TABLE_MAXNAMELEN];
- char chain[NFT_CHAIN_MAXNAMELEN];
+ char *table;
+ char *chain;
};
static int nf_tables_dump_rules(struct sk_buff *skb,
@@ -2002,7 +2067,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
continue;
list_for_each_entry_rcu(table, &afi->tables, list) {
- if (ctx && ctx->table[0] &&
+ if (ctx && ctx->table &&
strcmp(ctx->table, table->name) != 0)
continue;
@@ -2042,7 +2107,13 @@ done:
static int nf_tables_dump_rules_done(struct netlink_callback *cb)
{
- kfree(cb->data);
+ struct nft_rule_dump_ctx *ctx = cb->data;
+
+ if (ctx) {
+ kfree(ctx->table);
+ kfree(ctx->chain);
+ kfree(ctx);
+ }
return 0;
}
@@ -2074,12 +2145,23 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
if (!ctx)
return -ENOMEM;
- if (nla[NFTA_RULE_TABLE])
- nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE],
- sizeof(ctx->table));
- if (nla[NFTA_RULE_CHAIN])
- nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN],
- sizeof(ctx->chain));
+ if (nla[NFTA_RULE_TABLE]) {
+ ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
+ GFP_KERNEL);
+ if (!ctx->table) {
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ }
+ if (nla[NFTA_RULE_CHAIN]) {
+ ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
+ GFP_KERNEL);
+ if (!ctx->chain) {
+ kfree(ctx->table);
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ }
c.data = ctx;
}
@@ -2467,14 +2549,9 @@ nft_select_set_ops(const struct nft_ctx *ctx,
case NFT_SET_POL_PERFORMANCE:
if (est.lookup < best.lookup)
break;
- if (est.lookup == best.lookup) {
- if (!desc->size) {
- if (est.space < best.space)
- break;
- } else if (est.size < best.size) {
- break;
- }
- }
+ if (est.lookup == best.lookup &&
+ est.space < best.space)
+ break;
continue;
case NFT_SET_POL_MEMORY:
if (!desc->size) {
@@ -2621,7 +2698,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
unsigned long *inuse;
unsigned int n = 0, min = 0;
- p = strnchr(name, NFT_SET_MAXNAMELEN, '%');
+ p = strchr(name, '%');
if (p != NULL) {
if (p[1] != 'd' || strchr(p + 2, '%'))
return -EINVAL;
@@ -2652,12 +2729,17 @@ cont:
free_page((unsigned long)inuse);
}
- snprintf(set->name, sizeof(set->name), name, min + n);
+ set->name = kasprintf(GFP_KERNEL, name, min + n);
+ if (!set->name)
+ return -ENOMEM;
+
list_for_each_entry(i, &ctx->table->sets, list) {
if (!nft_is_active_next(ctx->net, i))
continue;
- if (!strcmp(set->name, i->name))
+ if (!strcmp(set->name, i->name)) {
+ kfree(set->name);
return -ENFILE;
+ }
}
return 0;
}
@@ -2929,7 +3011,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
- char name[NFT_SET_MAXNAMELEN];
+ char *name;
unsigned int size;
bool create;
u64 timeout;
@@ -3075,8 +3157,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
goto err1;
}
- nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
+ name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
+ if (!name) {
+ err = -ENOMEM;
+ goto err2;
+ }
+
err = nf_tables_set_alloc_name(&ctx, set, name);
+ kfree(name);
if (err < 0)
goto err2;
@@ -3126,6 +3214,7 @@ static void nft_set_destroy(struct nft_set *set)
{
set->ops->destroy(set);
module_put(set->ops->type->owner);
+ kfree(set->name);
kvfree(set);
}
@@ -3159,7 +3248,9 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings))
+
+ if (!list_empty(&set->bindings) ||
+ (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0))
return -EBUSY;
return nft_delset(&ctx, set);
@@ -3497,45 +3588,6 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb)
return 0;
}
-static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
-{
- u8 genmask = nft_genmask_cur(net);
- const struct nft_set *set;
- struct nft_ctx ctx;
- int err;
-
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
- if (err < 0)
- return err;
-
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
- genmask);
- if (IS_ERR(set))
- return PTR_ERR(set);
-
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
- struct netlink_dump_control c = {
- .dump = nf_tables_dump_set,
- .done = nf_tables_dump_set_done,
- };
- struct nft_set_dump_ctx *dump_ctx;
-
- dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
- if (!dump_ctx)
- return -ENOMEM;
-
- dump_ctx->set = set;
- dump_ctx->ctx = ctx;
-
- c.data = dump_ctx;
- return netlink_dump_start(nlsk, skb, nlh, &c);
- }
- return -EOPNOTSUPP;
-}
-
static int nf_tables_fill_setelem_info(struct sk_buff *skb,
const struct nft_ctx *ctx, u32 seq,
u32 portid, int event, u16 flags,
@@ -3581,6 +3633,135 @@ nla_put_failure:
return -1;
}
+static int nft_setelem_parse_flags(const struct nft_set *set,
+ const struct nlattr *attr, u32 *flags)
+{
+ if (attr == NULL)
+ return 0;
+
+ *flags = ntohl(nla_get_be32(attr));
+ if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
+ if (!(set->flags & NFT_SET_INTERVAL) &&
+ *flags & NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+ const struct nlattr *attr)
+{
+ struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+ const struct nft_set_ext *ext;
+ struct nft_data_desc desc;
+ struct nft_set_elem elem;
+ struct sk_buff *skb;
+ uint32_t flags = 0;
+ void *priv;
+ int err;
+
+ err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
+ nft_set_elem_policy, NULL);
+ if (err < 0)
+ return err;
+
+ if (!nla[NFTA_SET_ELEM_KEY])
+ return -EINVAL;
+
+ err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
+ if (err < 0)
+ return err;
+
+ err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
+ nla[NFTA_SET_ELEM_KEY]);
+ if (err < 0)
+ return err;
+
+ err = -EINVAL;
+ if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
+ return err;
+
+ priv = set->ops->get(ctx->net, set, &elem, flags);
+ if (IS_ERR(priv))
+ return PTR_ERR(priv);
+
+ elem.priv = priv;
+ ext = nft_set_elem_ext(set, &elem);
+
+ err = -ENOMEM;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err1;
+
+ err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid,
+ NFT_MSG_NEWSETELEM, 0, set, &elem);
+ if (err < 0)
+ goto err2;
+
+ err = nfnetlink_unicast(skb, ctx->net, ctx->portid, MSG_DONTWAIT);
+ /* This avoids a loop in nfnetlink. */
+ if (err < 0)
+ goto err1;
+
+ return 0;
+err2:
+ kfree_skb(skb);
+err1:
+ /* this avoids a loop in nfnetlink. */
+ return err == -EAGAIN ? -ENOBUFS : err;
+}
+
+static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
+{
+ u8 genmask = nft_genmask_cur(net);
+ struct nft_set *set;
+ struct nlattr *attr;
+ struct nft_ctx ctx;
+ int rem, err = 0;
+
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
+ if (err < 0)
+ return err;
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+ genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_set,
+ .done = nf_tables_dump_set_done,
+ };
+ struct nft_set_dump_ctx *dump_ctx;
+
+ dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
+ if (!dump_ctx)
+ return -ENOMEM;
+
+ dump_ctx->set = set;
+ dump_ctx->ctx = ctx;
+
+ c.data = dump_ctx;
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+
+ if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
+ return -EINVAL;
+
+ nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+ err = nft_get_set_elem(&ctx, set, attr);
+ if (err < 0)
+ break;
+ }
+
+ return err;
+}
+
static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
const struct nft_set *set,
const struct nft_set_elem *elem,
@@ -3681,22 +3862,6 @@ static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem)
kfree(elem);
}
-static int nft_setelem_parse_flags(const struct nft_set *set,
- const struct nlattr *attr, u32 *flags)
-{
- if (attr == NULL)
- return 0;
-
- *flags = ntohl(nla_get_be32(attr));
- if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
- return -EINVAL;
- if (!(set->flags & NFT_SET_INTERVAL) &&
- *flags & NFT_SET_ELEM_INTERVAL_END)
- return -EINVAL;
-
- return 0;
-}
-
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr, u32 nlmsg_flags)
{
@@ -4209,7 +4374,7 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
list_for_each_entry(obj, &table->objects, list) {
if (!nla_strcmp(nla, obj->name) &&
- objtype == obj->type->type &&
+ objtype == obj->ops->type->type &&
nft_active_genmask(obj, genmask))
return obj;
}
@@ -4231,6 +4396,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
const struct nlattr *attr)
{
struct nlattr *tb[type->maxattr + 1];
+ const struct nft_object_ops *ops;
struct nft_object *obj;
int err;
@@ -4243,16 +4409,27 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1));
}
+ if (type->select_ops) {
+ ops = type->select_ops(ctx, (const struct nlattr * const *)tb);
+ if (IS_ERR(ops)) {
+ err = PTR_ERR(ops);
+ goto err1;
+ }
+ } else {
+ ops = type->ops;
+ }
+
err = -ENOMEM;
- obj = kzalloc(sizeof(struct nft_object) + type->size, GFP_KERNEL);
+ obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
if (obj == NULL)
goto err1;
- err = type->init(ctx, (const struct nlattr * const *)tb, obj);
+ err = ops->init(ctx, (const struct nlattr * const *)tb, obj);
if (err < 0)
goto err2;
- obj->type = type;
+ obj->ops = ops;
+
return obj;
err2:
kfree(obj);
@@ -4268,7 +4445,7 @@ static int nft_object_dump(struct sk_buff *skb, unsigned int attr,
nest = nla_nest_start(skb, attr);
if (!nest)
goto nla_put_failure;
- if (obj->type->dump(skb, obj, reset) < 0)
+ if (obj->ops->dump(skb, obj, reset) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
return 0;
@@ -4363,18 +4540,24 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
goto err1;
}
obj->table = table;
- nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN);
+ obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+ if (!obj->name) {
+ err = -ENOMEM;
+ goto err2;
+ }
err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj);
if (err < 0)
- goto err2;
+ goto err3;
list_add_tail_rcu(&obj->list, &table->objects);
table->use++;
return 0;
+err3:
+ kfree(obj->name);
err2:
- if (obj->type->destroy)
- obj->type->destroy(obj);
+ if (obj->ops->destroy)
+ obj->ops->destroy(obj);
kfree(obj);
err1:
module_put(type->owner);
@@ -4401,7 +4584,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
- nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->type->type)) ||
+ nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
goto nla_put_failure;
@@ -4415,7 +4598,7 @@ nla_put_failure:
}
struct nft_obj_filter {
- char table[NFT_OBJ_MAXNAMELEN];
+ char *table;
u32 type;
};
@@ -4455,7 +4638,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
goto cont;
if (filter &&
filter->type != NFT_OBJECT_UNSPEC &&
- obj->type->type != filter->type)
+ obj->ops->type->type != filter->type)
goto cont;
if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
@@ -4480,7 +4663,10 @@ done:
static int nf_tables_dump_obj_done(struct netlink_callback *cb)
{
- kfree(cb->data);
+ struct nft_obj_filter *filter = cb->data;
+
+ kfree(filter->table);
+ kfree(filter);
return 0;
}
@@ -4494,9 +4680,13 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
if (!filter)
return ERR_PTR(-ENOMEM);
- if (nla[NFTA_OBJ_TABLE])
- nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE],
- NFT_TABLE_MAXNAMELEN);
+ if (nla[NFTA_OBJ_TABLE]) {
+ filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_KERNEL);
+ if (!filter->table) {
+ kfree(filter);
+ return ERR_PTR(-ENOMEM);
+ }
+ }
if (nla[NFTA_OBJ_TYPE])
filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
@@ -4576,10 +4766,11 @@ err:
static void nft_obj_destroy(struct nft_object *obj)
{
- if (obj->type->destroy)
- obj->type->destroy(obj);
+ if (obj->ops->destroy)
+ obj->ops->destroy(obj);
- module_put(obj->type->owner);
+ module_put(obj->ops->type->owner);
+ kfree(obj->name);
kfree(obj);
}
@@ -4662,6 +4853,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
+ char buf[TASK_COMM_LEN];
int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
@@ -4673,7 +4865,9 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
- if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
+ if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) ||
+ nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
+ nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -4842,7 +5036,7 @@ static void nft_chain_commit_update(struct nft_trans *trans)
{
struct nft_base_chain *basechain;
- if (nft_trans_chain_name(trans)[0])
+ if (nft_trans_chain_name(trans))
strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
if (!nft_is_base_chain(trans->ctx.chain))
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 65dbeadcb118..dfd0bf3810d2 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -29,7 +29,7 @@ static const char *const comments[__NFT_TRACETYPE_MAX] = {
[NFT_TRACETYPE_RULE] = "rule",
};
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
@@ -114,6 +114,22 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
return true;
}
+DEFINE_STATIC_KEY_FALSE(nft_counters_enabled);
+
+static noinline void nft_update_chain_stats(const struct nft_chain *chain,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_stats *stats;
+
+ local_bh_disable();
+ stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats));
+ u64_stats_update_begin(&stats->syncp);
+ stats->pkts++;
+ stats->bytes += pkt->skb->len;
+ u64_stats_update_end(&stats->syncp);
+ local_bh_enable();
+}
+
struct nft_jumpstack {
const struct nft_chain *chain;
const struct nft_rule *rule;
@@ -130,7 +146,6 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
struct nft_regs regs;
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
- struct nft_stats *stats;
int rulenum;
unsigned int gencursor = nft_genmask_cur(net);
struct nft_traceinfo info;
@@ -220,13 +235,8 @@ next_rule:
nft_trace_packet(&info, basechain, NULL, -1,
NFT_TRACETYPE_POLICY);
- rcu_read_lock_bh();
- stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
- u64_stats_update_begin(&stats->syncp);
- stats->pkts++;
- stats->bytes += pkt->skb->len;
- u64_stats_update_end(&stats->syncp);
- rcu_read_unlock_bh();
+ if (static_branch_unlikely(&nft_counters_enabled))
+ nft_update_chain_stats(basechain, pkt);
return nft_base_chain(basechain)->policy;
}
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index e1b15e7a5793..e1dc527a493b 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -162,6 +162,27 @@ static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
NFTA_TRACE_PAD);
}
+static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info)
+{
+ switch (info->type) {
+ case NFT_TRACETYPE_RETURN:
+ case NFT_TRACETYPE_RULE:
+ break;
+ default:
+ return false;
+ }
+
+ switch (info->verdict->code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
void nft_trace_notify(struct nft_traceinfo *info)
{
const struct nft_pktinfo *pkt = info->pkt;
@@ -175,13 +196,12 @@ void nft_trace_notify(struct nft_traceinfo *info)
return;
size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
- nla_total_size(NFT_TABLE_MAXNAMELEN) +
- nla_total_size(NFT_CHAIN_MAXNAMELEN) +
+ nla_total_size(strlen(info->chain->table->name)) +
+ nla_total_size(strlen(info->chain->name)) +
nla_total_size_64bit(sizeof(__be64)) + /* rule handle */
nla_total_size(sizeof(__be32)) + /* trace type */
nla_total_size(0) + /* VERDICT, nested */
nla_total_size(sizeof(u32)) + /* verdict code */
- nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */
nla_total_size(sizeof(u32)) + /* id */
nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
@@ -194,6 +214,9 @@ void nft_trace_notify(struct nft_traceinfo *info)
nla_total_size(sizeof(u32)) + /* nfproto */
nla_total_size(sizeof(u32)); /* policy */
+ if (nft_trace_have_verdict_chain(info))
+ size += nla_total_size(strlen(info->verdict->chain->name)); /* jump target */
+
skb = nlmsg_new(size, GFP_ATOMIC);
if (!skb)
return;
@@ -217,14 +240,11 @@ void nft_trace_notify(struct nft_traceinfo *info)
if (trace_fill_id(skb, pkt->skb))
goto nla_put_failure;
- if (info->chain) {
- if (nla_put_string(skb, NFTA_TRACE_CHAIN,
- info->chain->name))
- goto nla_put_failure;
- if (nla_put_string(skb, NFTA_TRACE_TABLE,
- info->chain->table->name))
- goto nla_put_failure;
- }
+ if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name))
+ goto nla_put_failure;
+
+ if (nla_put_string(skb, NFTA_TRACE_TABLE, info->chain->table->name))
+ goto nla_put_failure;
if (nf_trace_fill_rule_info(skb, info))
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 400e9ae97153..32b1c0b44e79 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -47,7 +47,8 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
};
static int
-ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
+ctnl_timeout_parse_policy(void *timeouts,
+ const struct nf_conntrack_l4proto *l4proto,
struct net *net, const struct nlattr *attr)
{
int ret = 0;
@@ -74,7 +75,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
{
__u16 l3num;
__u8 l4num;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct ctnl_timeout *timeout, *matching = NULL;
char *name;
int ret;
@@ -158,7 +159,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
- struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
+ const struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -363,10 +364,10 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
const struct nlattr * const cda[],
struct netlink_ext_ack *extack)
{
+ const struct nf_conntrack_l4proto *l4proto;
+ unsigned int *timeouts;
__u16 l3num;
__u8 l4num;
- struct nf_conntrack_l4proto *l4proto;
- unsigned int *timeouts;
int ret;
if (!cda[CTA_TIMEOUT_L3PROTO] ||
@@ -401,7 +402,7 @@ err:
static int
cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
u32 seq, u32 type, int event,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
@@ -453,11 +454,11 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
const struct nlattr * const cda[],
struct netlink_ext_ack *extack)
{
- __u16 l3num;
- __u8 l4num;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct sk_buff *skb2;
int ret, err;
+ __u16 l3num;
+ __u8 l4num;
if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO])
return -EINVAL;
@@ -505,7 +506,6 @@ ctnl_timeout_find_get(struct net *net, const char *name)
{
struct ctnl_timeout *timeout, *matching = NULL;
- rcu_read_lock();
list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) {
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
@@ -521,7 +521,6 @@ ctnl_timeout_find_get(struct net *net, const char *name)
break;
}
err:
- rcu_read_unlock();
return matching;
}
@@ -572,6 +571,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
{
struct ctnl_timeout *cur, *tmp;
+ nf_ct_unconfirmed_destroy(net);
ctnl_untimeout(net, NULL);
list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index c684ba95dbb4..cad6498f10b0 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -606,7 +606,7 @@ nla_put_failure:
return -1;
}
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_ULOG,
.u = {
.ulog = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 16fa04086880..a16356cacec3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -41,6 +41,10 @@
#include "../bridge/br_private.h"
#endif
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
#define NFQNL_QMAX_DEFAULT 1024
/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
@@ -397,7 +401,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
outdev = entry->state.out;
- switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
+ switch ((enum nfqnl_config_mode)READ_ONCE(queue->copy_mode)) {
case NFQNL_COPY_META:
case NFQNL_COPY_NONE:
break;
@@ -408,7 +412,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
skb_checksum_help(entskb))
return NULL;
- data_len = ACCESS_ONCE(queue->copy_range);
+ data_len = READ_ONCE(queue->copy_range);
if (data_len > entskb->len)
data_len = entskb->len;
@@ -612,6 +616,18 @@ nlmsg_failure:
return NULL;
}
+static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
+ const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
+
+ if (ct && ((ct->status & flags) == IPS_DYING))
+ return true;
+#endif
+ return false;
+}
+
static int
__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry)
@@ -628,6 +644,9 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
}
spin_lock_bh(&queue->lock);
+ if (nf_ct_drop_unconfirmed(entry))
+ goto err_out_free_nskb;
+
if (queue->queue_total >= queue->queue_maxlen) {
if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
failopen = 1;
@@ -928,7 +947,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net)
unsigned int instances = 0;
int i;
- rcu_read_lock();
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct nfqnl_instance *inst;
struct hlist_head *head = &q->instance_table[i];
@@ -938,7 +956,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net)
instances++;
}
}
- rcu_read_unlock();
return instances;
}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index f5a7cb68694e..b89f4f65b2a0 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -305,7 +305,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
const struct nf_hook_ops *ops = &basechain->ops[0];
hook_mask = 1 << ops->hooknum;
- if (!(hook_mask & target->hooks))
+ if (target->hooks && !(hook_mask & target->hooks))
return -EINVAL;
ret = nft_compat_chain_validate_dependency(target->table,
@@ -484,7 +484,7 @@ static int nft_match_validate(const struct nft_ctx *ctx,
const struct nf_hook_ops *ops = &basechain->ops[0];
hook_mask = 1 << ops->hooknum;
- if (!(hook_mask & match->hooks))
+ if (match->hooks && !(hook_mask & match->hooks))
return -EINVAL;
ret = nft_compat_chain_validate_dependency(match->table,
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 67a710ebde09..eefe3b409925 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -175,15 +175,21 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
[NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
};
-static struct nft_object_type nft_counter_obj __read_mostly = {
- .type = NFT_OBJECT_COUNTER,
+static struct nft_object_type nft_counter_obj_type;
+static const struct nft_object_ops nft_counter_obj_ops = {
+ .type = &nft_counter_obj_type,
.size = sizeof(struct nft_counter_percpu_priv),
- .maxattr = NFTA_COUNTER_MAX,
- .policy = nft_counter_policy,
.eval = nft_counter_obj_eval,
.init = nft_counter_obj_init,
.destroy = nft_counter_obj_destroy,
.dump = nft_counter_obj_dump,
+};
+
+static struct nft_object_type nft_counter_obj_type __read_mostly = {
+ .type = NFT_OBJECT_COUNTER,
+ .ops = &nft_counter_obj_ops,
+ .maxattr = NFTA_COUNTER_MAX,
+ .policy = nft_counter_policy,
.owner = THIS_MODULE,
};
@@ -271,7 +277,7 @@ static int __init nft_counter_module_init(void)
for_each_possible_cpu(cpu)
seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
- err = nft_register_obj(&nft_counter_obj);
+ err = nft_register_obj(&nft_counter_obj_type);
if (err < 0)
return err;
@@ -281,14 +287,14 @@ static int __init nft_counter_module_init(void)
return 0;
err1:
- nft_unregister_obj(&nft_counter_obj);
+ nft_unregister_obj(&nft_counter_obj_type);
return err;
}
static void __exit nft_counter_module_exit(void)
{
nft_unregister_expr(&nft_counter_type);
- nft_unregister_obj(&nft_counter_obj);
+ nft_unregister_obj(&nft_counter_obj_type);
}
module_init(nft_counter_module_init);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 1678e9e75e8e..2647b895f4b0 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -312,39 +312,6 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
[NFTA_CT_SREG] = { .type = NLA_U32 },
};
-static int nft_ct_netns_get(struct net *net, uint8_t family)
-{
- int err;
-
- if (family == NFPROTO_INET) {
- err = nf_ct_netns_get(net, NFPROTO_IPV4);
- if (err < 0)
- goto err1;
- err = nf_ct_netns_get(net, NFPROTO_IPV6);
- if (err < 0)
- goto err2;
- } else {
- err = nf_ct_netns_get(net, family);
- if (err < 0)
- goto err1;
- }
- return 0;
-
-err2:
- nf_ct_netns_put(net, NFPROTO_IPV4);
-err1:
- return err;
-}
-
-static void nft_ct_netns_put(struct net *net, uint8_t family)
-{
- if (family == NFPROTO_INET) {
- nf_ct_netns_put(net, NFPROTO_IPV4);
- nf_ct_netns_put(net, NFPROTO_IPV6);
- } else
- nf_ct_netns_put(net, family);
-}
-
#ifdef CONFIG_NF_CONNTRACK_ZONES
static void nft_ct_tmpl_put_pcpu(void)
{
@@ -489,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- err = nft_ct_netns_get(ctx->net, ctx->afi->family);
+ err = nf_ct_netns_get(ctx->net, ctx->afi->family);
if (err < 0)
return err;
@@ -583,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
if (err < 0)
goto err1;
- err = nft_ct_netns_get(ctx->net, ctx->afi->family);
+ err = nf_ct_netns_get(ctx->net, ctx->afi->family);
if (err < 0)
goto err1;
@@ -606,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
struct nft_ct *priv = nft_expr_priv(expr);
__nft_ct_set_destroy(ctx, priv);
- nft_ct_netns_put(ctx->net, ctx->afi->family);
+ nf_ct_netns_put(ctx->net, ctx->afi->family);
}
static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -904,15 +871,21 @@ static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
[NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
};
-static struct nft_object_type nft_ct_helper_obj __read_mostly = {
- .type = NFT_OBJECT_CT_HELPER,
+static struct nft_object_type nft_ct_helper_obj_type;
+static const struct nft_object_ops nft_ct_helper_obj_ops = {
+ .type = &nft_ct_helper_obj_type,
.size = sizeof(struct nft_ct_helper_obj),
- .maxattr = NFTA_CT_HELPER_MAX,
- .policy = nft_ct_helper_policy,
.eval = nft_ct_helper_obj_eval,
.init = nft_ct_helper_obj_init,
.destroy = nft_ct_helper_obj_destroy,
.dump = nft_ct_helper_obj_dump,
+};
+
+static struct nft_object_type nft_ct_helper_obj_type __read_mostly = {
+ .type = NFT_OBJECT_CT_HELPER,
+ .ops = &nft_ct_helper_obj_ops,
+ .maxattr = NFTA_CT_HELPER_MAX,
+ .policy = nft_ct_helper_policy,
.owner = THIS_MODULE,
};
@@ -930,7 +903,7 @@ static int __init nft_ct_module_init(void)
if (err < 0)
goto err1;
- err = nft_register_obj(&nft_ct_helper_obj);
+ err = nft_register_obj(&nft_ct_helper_obj_type);
if (err < 0)
goto err2;
@@ -945,7 +918,7 @@ err1:
static void __exit nft_ct_module_exit(void)
{
- nft_unregister_obj(&nft_ct_helper_obj);
+ nft_unregister_obj(&nft_ct_helper_obj_type);
nft_unregister_expr(&nft_notrack_type);
nft_unregister_expr(&nft_ct_type);
}
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 1ec49fe5845f..a0a93d987a3b 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -8,6 +8,7 @@
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
+#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -23,6 +24,7 @@ struct nft_exthdr {
u8 len;
u8 op;
enum nft_registers dreg:8;
+ enum nft_registers sreg:8;
u8 flags;
};
@@ -61,6 +63,26 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void *
+nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
+ unsigned int len, void *buffer, unsigned int *tcphdr_len)
+{
+ struct tcphdr *tcph;
+
+ if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
+ return NULL;
+
+ tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buffer);
+ if (!tcph)
+ return NULL;
+
+ *tcphdr_len = __tcp_hdrlen(tcph);
+ if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len)
+ return NULL;
+
+ return skb_header_pointer(pkt->skb, pkt->xt.thoff, *tcphdr_len, buffer);
+}
+
static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -72,18 +94,7 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
struct tcphdr *tcph;
u8 *opt;
- if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
- goto err;
-
- tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buff);
- if (!tcph)
- goto err;
-
- tcphdr_len = __tcp_hdrlen(tcph);
- if (tcphdr_len < sizeof(*tcph))
- goto err;
-
- tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, tcphdr_len, buff);
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
if (!tcph)
goto err;
@@ -115,6 +126,88 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ unsigned int i, optl, tcphdr_len, offset;
+ struct tcphdr *tcph;
+ u8 *opt;
+ u32 src;
+
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
+ if (!tcph)
+ return;
+
+ opt = (u8 *)tcph;
+ for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+ union {
+ u8 octet;
+ __be16 v16;
+ __be32 v32;
+ } old, new;
+
+ optl = optlen(opt, i);
+
+ if (priv->type != opt[i])
+ continue;
+
+ if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
+ return;
+
+ if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len))
+ return;
+
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
+ &tcphdr_len);
+ if (!tcph)
+ return;
+
+ src = regs->data[priv->sreg];
+ offset = i + priv->offset;
+
+ switch (priv->len) {
+ case 2:
+ old.v16 = get_unaligned((u16 *)(opt + offset));
+ new.v16 = src;
+
+ switch (priv->type) {
+ case TCPOPT_MSS:
+ /* increase can cause connection to stall */
+ if (ntohs(old.v16) <= ntohs(new.v16))
+ return;
+ break;
+ }
+
+ if (old.v16 == new.v16)
+ return;
+
+ put_unaligned(new.v16, (u16*)(opt + offset));
+ inet_proto_csum_replace2(&tcph->check, pkt->skb,
+ old.v16, new.v16, false);
+ break;
+ case 4:
+ new.v32 = src;
+ old.v32 = get_unaligned((u32 *)(opt + offset));
+
+ if (old.v32 == new.v32)
+ return;
+
+ put_unaligned(new.v32, (u32*)(opt + offset));
+ inet_proto_csum_replace4(&tcph->check, pkt->skb,
+ old.v32, new.v32, false);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return;
+ }
+}
+
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
[NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
@@ -171,12 +264,57 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, priv->len);
}
-static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
- const struct nft_exthdr *priv = nft_expr_priv(expr);
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
+ int err;
- if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
- goto nla_put_failure;
+ if (!tb[NFTA_EXTHDR_SREG] ||
+ !tb[NFTA_EXTHDR_TYPE] ||
+ !tb[NFTA_EXTHDR_OFFSET] ||
+ !tb[NFTA_EXTHDR_LEN])
+ return -EINVAL;
+
+ if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS])
+ return -EINVAL;
+
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
+ if (err < 0)
+ return err;
+
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
+ if (err < 0)
+ return err;
+
+ if (offset < 2)
+ return -EOPNOTSUPP;
+
+ switch (len) {
+ case 2: break;
+ case 4: break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
+ if (err < 0)
+ return err;
+
+ priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+ priv->offset = offset;
+ priv->len = len;
+ priv->sreg = nft_parse_register(tb[NFTA_EXTHDR_SREG]);
+ priv->flags = flags;
+ priv->op = op;
+
+ return nft_validate_register_load(priv->sreg, priv->len);
+}
+
+static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
+{
if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
@@ -193,6 +331,26 @@ nla_put_failure:
return -1;
}
+static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
+ return -1;
+
+ return nft_exthdr_dump_common(skb, priv);
+}
+
+static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg))
+ return -1;
+
+ return nft_exthdr_dump_common(skb, priv);
+}
+
static struct nft_expr_type nft_exthdr_type;
static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
.type = &nft_exthdr_type,
@@ -210,6 +368,14 @@ static const struct nft_expr_ops nft_exthdr_tcp_ops = {
.dump = nft_exthdr_dump,
};
+static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_tcp_set_eval,
+ .init = nft_exthdr_tcp_set_init,
+ .dump = nft_exthdr_dump_set,
+};
+
static const struct nft_expr_ops *
nft_exthdr_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -219,12 +385,21 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
if (!tb[NFTA_EXTHDR_OP])
return &nft_exthdr_ipv6_ops;
- op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP]));
+ if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG])
+ return ERR_PTR(-EOPNOTSUPP);
+
+ op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP]));
switch (op) {
case NFT_EXTHDR_OP_TCPOPT:
- return &nft_exthdr_tcp_ops;
+ if (tb[NFTA_EXTHDR_SREG])
+ return &nft_exthdr_tcp_set_ops;
+ if (tb[NFTA_EXTHDR_DREG])
+ return &nft_exthdr_tcp_ops;
+ break;
case NFT_EXTHDR_OP_IPV6:
- return &nft_exthdr_ipv6_ops;
+ if (tb[NFTA_EXTHDR_DREG])
+ return &nft_exthdr_ipv6_ops;
+ break;
}
return ERR_PTR(-EOPNOTSUPP);
diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c
new file mode 100644
index 000000000000..3997ee36cfbd
--- /dev/null
+++ b/net/netfilter/nft_fib_netdev.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017 Pablo M. Bermudo Garay <pablombg@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This code is based on net/netfilter/nft_fib_inet.c, written by
+ * Florian Westphal <fw@strlen.de>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#include <net/netfilter/nft_fib.h>
+
+static void nft_fib_netdev_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+
+ switch (ntohs(pkt->skb->protocol)) {
+ case ETH_P_IP:
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ case NFT_FIB_RESULT_OIFNAME:
+ return nft_fib4_eval(expr, regs, pkt);
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return nft_fib4_eval_type(expr, regs, pkt);
+ }
+ break;
+ case ETH_P_IPV6:
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ case NFT_FIB_RESULT_OIFNAME:
+ return nft_fib6_eval(expr, regs, pkt);
+ case NFT_FIB_RESULT_ADDRTYPE:
+ return nft_fib6_eval_type(expr, regs, pkt);
+ }
+ break;
+ }
+
+ regs->verdict.code = NFT_BREAK;
+}
+
+static struct nft_expr_type nft_fib_netdev_type;
+static const struct nft_expr_ops nft_fib_netdev_ops = {
+ .type = &nft_fib_netdev_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+ .eval = nft_fib_netdev_eval,
+ .init = nft_fib_init,
+ .dump = nft_fib_dump,
+ .validate = nft_fib_validate,
+};
+
+static struct nft_expr_type nft_fib_netdev_type __read_mostly = {
+ .family = NFPROTO_NETDEV,
+ .name = "fib",
+ .ops = &nft_fib_netdev_ops,
+ .policy = nft_fib_policy,
+ .maxattr = NFTA_FIB_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_fib_netdev_module_init(void)
+{
+ return nft_register_expr(&nft_fib_netdev_type);
+}
+
+static void __exit nft_fib_netdev_module_exit(void)
+{
+ nft_unregister_expr(&nft_fib_netdev_type);
+}
+
+module_init(nft_fib_netdev_module_init);
+module_exit(nft_fib_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo M. Bermudo Garay <pablombg@gmail.com>");
+MODULE_ALIAS_NFT_AF_EXPR(5, "fib");
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 18dd57a52651..a9fc298ef4c3 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -65,19 +65,23 @@ static int nft_limit_init(struct nft_limit *limit,
limit->nsecs = unit * NSEC_PER_SEC;
if (limit->rate == 0 || limit->nsecs < unit)
return -EOVERFLOW;
- limit->tokens = limit->tokens_max = limit->nsecs;
-
- if (tb[NFTA_LIMIT_BURST]) {
- u64 rate;
+ if (tb[NFTA_LIMIT_BURST])
limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
+ else
+ limit->burst = 0;
- rate = limit->rate + limit->burst;
- if (rate < limit->rate)
- return -EOVERFLOW;
+ if (limit->rate + limit->burst < limit->rate)
+ return -EOVERFLOW;
+
+ /* The token bucket size limits the number of tokens can be
+ * accumulated. tokens_max specifies the bucket size.
+ * tokens_max = unit * (rate + burst) / rate.
+ */
+ limit->tokens = div_u64(limit->nsecs * (limit->rate + limit->burst),
+ limit->rate);
+ limit->tokens_max = limit->tokens;
- limit->rate = rate;
- }
if (tb[NFTA_LIMIT_FLAGS]) {
u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS]));
@@ -95,9 +99,8 @@ static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit,
{
u32 flags = limit->invert ? NFT_LIMIT_F_INV : 0;
u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC);
- u64 rate = limit->rate - limit->burst;
- if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate),
+ if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(limit->rate),
NFTA_LIMIT_PAD) ||
nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs),
NFTA_LIMIT_PAD) ||
@@ -165,9 +168,9 @@ static const struct nft_expr_ops nft_limit_pkts_ops = {
.dump = nft_limit_pkts_dump,
};
-static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static void nft_limit_bytes_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
struct nft_limit *priv = nft_expr_priv(expr);
u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
@@ -176,29 +179,29 @@ static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
}
-static int nft_limit_pkt_bytes_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_limit_bytes_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_limit *priv = nft_expr_priv(expr);
return nft_limit_init(priv, tb);
}
-static int nft_limit_pkt_bytes_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+static int nft_limit_bytes_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
const struct nft_limit *priv = nft_expr_priv(expr);
return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
}
-static const struct nft_expr_ops nft_limit_pkt_bytes_ops = {
+static const struct nft_expr_ops nft_limit_bytes_ops = {
.type = &nft_limit_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_limit)),
- .eval = nft_limit_pkt_bytes_eval,
- .init = nft_limit_pkt_bytes_init,
- .dump = nft_limit_pkt_bytes_dump,
+ .eval = nft_limit_bytes_eval,
+ .init = nft_limit_bytes_init,
+ .dump = nft_limit_bytes_dump,
};
static const struct nft_expr_ops *
@@ -212,7 +215,7 @@ nft_limit_select_ops(const struct nft_ctx *ctx,
case NFT_LIMIT_PKTS:
return &nft_limit_pkts_ops;
case NFT_LIMIT_PKT_BYTES:
- return &nft_limit_pkt_bytes_ops;
+ return &nft_limit_bytes_ops;
}
return ERR_PTR(-EOPNOTSUPP);
}
@@ -226,14 +229,133 @@ static struct nft_expr_type nft_limit_type __read_mostly = {
.owner = THIS_MODULE,
};
+static void nft_limit_obj_pkts_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_limit_pkts *priv = nft_obj_data(obj);
+
+ if (nft_limit_eval(&priv->limit, priv->cost))
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_limit_pkts *priv = nft_obj_data(obj);
+ int err;
+
+ err = nft_limit_init(&priv->limit, tb);
+ if (err < 0)
+ return err;
+
+ priv->cost = div64_u64(priv->limit.nsecs, priv->limit.rate);
+ return 0;
+}
+
+static int nft_limit_obj_pkts_dump(struct sk_buff *skb,
+ struct nft_object *obj,
+ bool reset)
+{
+ const struct nft_limit_pkts *priv = nft_obj_data(obj);
+
+ return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
+}
+
+static struct nft_object_type nft_limit_obj_type;
+static const struct nft_object_ops nft_limit_obj_pkts_ops = {
+ .type = &nft_limit_obj_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)),
+ .init = nft_limit_obj_pkts_init,
+ .eval = nft_limit_obj_pkts_eval,
+ .dump = nft_limit_obj_pkts_dump,
+};
+
+static void nft_limit_obj_bytes_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_limit *priv = nft_obj_data(obj);
+ u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
+
+ if (nft_limit_eval(priv, cost))
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_limit *priv = nft_obj_data(obj);
+
+ return nft_limit_init(priv, tb);
+}
+
+static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
+ struct nft_object *obj,
+ bool reset)
+{
+ const struct nft_limit *priv = nft_obj_data(obj);
+
+ return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
+}
+
+static struct nft_object_type nft_limit_obj_type;
+static const struct nft_object_ops nft_limit_obj_bytes_ops = {
+ .type = &nft_limit_obj_type,
+ .size = sizeof(struct nft_limit),
+ .init = nft_limit_obj_bytes_init,
+ .eval = nft_limit_obj_bytes_eval,
+ .dump = nft_limit_obj_bytes_dump,
+};
+
+static const struct nft_object_ops *
+nft_limit_obj_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ if (!tb[NFTA_LIMIT_TYPE])
+ return &nft_limit_obj_pkts_ops;
+
+ switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) {
+ case NFT_LIMIT_PKTS:
+ return &nft_limit_obj_pkts_ops;
+ case NFT_LIMIT_PKT_BYTES:
+ return &nft_limit_obj_bytes_ops;
+ }
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static struct nft_object_type nft_limit_obj_type __read_mostly = {
+ .select_ops = nft_limit_obj_select_ops,
+ .type = NFT_OBJECT_LIMIT,
+ .maxattr = NFTA_LIMIT_MAX,
+ .policy = nft_limit_policy,
+ .owner = THIS_MODULE,
+};
+
static int __init nft_limit_module_init(void)
{
- return nft_register_expr(&nft_limit_type);
+ int err;
+
+ err = nft_register_obj(&nft_limit_obj_type);
+ if (err < 0)
+ return err;
+
+ err = nft_register_expr(&nft_limit_type);
+ if (err < 0)
+ goto err1;
+
+ return 0;
+err1:
+ nft_unregister_obj(&nft_limit_obj_type);
+ return err;
}
static void __exit nft_limit_module_exit(void)
{
nft_unregister_expr(&nft_limit_type);
+ nft_unregister_obj(&nft_limit_obj_type);
}
module_init(nft_limit_module_init);
@@ -242,3 +364,4 @@ module_exit(nft_limit_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("limit");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_LIMIT);
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 1dd428fbaaa3..7bcdc48f3d73 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -22,7 +22,7 @@ static void nft_objref_eval(const struct nft_expr *expr,
{
struct nft_object *obj = nft_objref_priv(expr);
- obj->type->eval(obj, regs, pkt);
+ obj->ops->eval(obj, regs, pkt);
}
static int nft_objref_init(const struct nft_ctx *ctx,
@@ -54,7 +54,8 @@ static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr)
const struct nft_object *obj = nft_objref_priv(expr);
if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) ||
- nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->type->type)))
+ nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE,
+ htonl(obj->ops->type->type)))
goto nla_put_failure;
return 0;
@@ -104,7 +105,7 @@ static void nft_objref_map_eval(const struct nft_expr *expr,
return;
}
obj = *nft_set_ext_obj(ext);
- obj->type->eval(obj, regs, pkt);
+ obj->ops->eval(obj, regs, pkt);
}
static int nft_objref_map_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 7d699bbd45b0..e110b0ebbf58 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -184,7 +184,7 @@ static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff)
if (!uh)
return false;
- return uh->check;
+ return (__force bool)uh->check;
}
static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 25e33159be57..0ed124a93fcf 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -151,14 +151,20 @@ static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj,
return nft_quota_do_dump(skb, priv, reset);
}
-static struct nft_object_type nft_quota_obj __read_mostly = {
- .type = NFT_OBJECT_QUOTA,
+static struct nft_object_type nft_quota_obj_type;
+static const struct nft_object_ops nft_quota_obj_ops = {
+ .type = &nft_quota_obj_type,
.size = sizeof(struct nft_quota),
- .maxattr = NFTA_QUOTA_MAX,
- .policy = nft_quota_policy,
.init = nft_quota_obj_init,
.eval = nft_quota_obj_eval,
.dump = nft_quota_obj_dump,
+};
+
+static struct nft_object_type nft_quota_obj_type __read_mostly = {
+ .type = NFT_OBJECT_QUOTA,
+ .ops = &nft_quota_obj_ops,
+ .maxattr = NFTA_QUOTA_MAX,
+ .policy = nft_quota_policy,
.owner = THIS_MODULE,
};
@@ -209,7 +215,7 @@ static int __init nft_quota_module_init(void)
{
int err;
- err = nft_register_obj(&nft_quota_obj);
+ err = nft_register_obj(&nft_quota_obj_type);
if (err < 0)
return err;
@@ -219,14 +225,14 @@ static int __init nft_quota_module_init(void)
return 0;
err1:
- nft_unregister_obj(&nft_quota_obj);
+ nft_unregister_obj(&nft_quota_obj_type);
return err;
}
static void __exit nft_quota_module_exit(void)
{
nft_unregister_expr(&nft_quota_type);
- nft_unregister_obj(&nft_quota_obj);
+ nft_unregister_obj(&nft_quota_obj_type);
}
module_init(nft_quota_module_init);
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index c7383d8f88d0..a6b7d05aeacf 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -23,6 +23,43 @@ struct nft_rt {
enum nft_registers dreg:8;
};
+static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst)
+{
+ u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst);
+ const struct sk_buff *skb = pkt->skb;
+ const struct nf_afinfo *ai;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = ip_hdr(skb)->saddr;
+ minlen = sizeof(struct iphdr) + sizeof(struct tcphdr);
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.daddr = ipv6_hdr(skb)->saddr;
+ minlen = sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+ break;
+ }
+
+ ai = nf_get_afinfo(nft_pf(pkt));
+ if (ai) {
+ struct dst_entry *dst = NULL;
+
+ ai->route(nft_net(pkt), &dst, &fl, false);
+ if (dst) {
+ mtu = min(mtu, dst_mtu(dst));
+ dst_release(dst);
+ }
+ }
+
+ if (mtu <= minlen || mtu > 0xffff)
+ return TCP_MSS_DEFAULT;
+
+ return mtu - minlen;
+}
+
static void nft_rt_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -46,8 +83,8 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
if (nft_pf(pkt) != NFPROTO_IPV4)
goto err;
- *dest = rt_nexthop((const struct rtable *)dst,
- ip_hdr(skb)->daddr);
+ *dest = (__force u32)rt_nexthop((const struct rtable *)dst,
+ ip_hdr(skb)->daddr);
break;
case NFT_RT_NEXTHOP6:
if (nft_pf(pkt) != NFPROTO_IPV6)
@@ -57,6 +94,9 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
&ipv6_hdr(skb)->daddr),
sizeof(struct in6_addr));
break;
+ case NFT_RT_TCPMSS:
+ nft_reg_store16(dest, get_tcpmss(pkt, dst));
+ break;
default:
WARN_ON(1);
goto err;
@@ -67,7 +107,7 @@ err:
regs->verdict.code = NFT_BREAK;
}
-const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
+static const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
[NFTA_RT_DREG] = { .type = NLA_U32 },
[NFTA_RT_KEY] = { .type = NLA_U32 },
};
@@ -94,6 +134,9 @@ static int nft_rt_get_init(const struct nft_ctx *ctx,
case NFT_RT_NEXTHOP6:
len = sizeof(struct in6_addr);
break;
+ case NFT_RT_TCPMSS:
+ len = sizeof(u16);
+ break;
default:
return -EOPNOTSUPP;
}
@@ -118,6 +161,29 @@ nla_put_failure:
return -1;
}
+static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ const struct nft_rt *priv = nft_expr_priv(expr);
+ unsigned int hooks;
+
+ switch (priv->key) {
+ case NFT_RT_NEXTHOP4:
+ case NFT_RT_NEXTHOP6:
+ case NFT_RT_CLASSID:
+ return 0;
+ case NFT_RT_TCPMSS:
+ hooks = (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
static struct nft_expr_type nft_rt_type;
static const struct nft_expr_ops nft_rt_get_ops = {
.type = &nft_rt_type,
@@ -125,6 +191,7 @@ static const struct nft_expr_ops nft_rt_get_ops = {
.eval = nft_rt_get_eval,
.init = nft_rt_get_init,
.dump = nft_rt_get_dump,
+ .validate = nft_rt_validate,
};
static struct nft_expr_type nft_rt_type __read_mostly = {
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 734989c40579..45fb2752fb63 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -106,6 +106,23 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
return NULL;
}
+static void *nft_bitmap_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
+{
+ const struct nft_bitmap *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_cur(net);
+ struct nft_bitmap_elem *be;
+
+ list_for_each_entry_rcu(be, &priv->list, head) {
+ if (memcmp(nft_set_ext_key(&be->ext), elem->key.val.data, set->klen) ||
+ !nft_set_elem_active(&be->ext, genmask))
+ continue;
+
+ return be;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_set_ext **ext)
@@ -294,6 +311,7 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = {
.activate = nft_bitmap_activate,
.lookup = nft_bitmap_lookup,
.walk = nft_bitmap_walk,
+ .get = nft_bitmap_get,
};
static struct nft_set_type nft_bitmap_type __read_mostly = {
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 0fa01d772c5e..f8166c1d5430 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -95,6 +95,24 @@ static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
return !!he;
}
+static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
+{
+ struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_elem *he;
+ struct nft_rhash_cmp_arg arg = {
+ .genmask = nft_genmask_cur(net),
+ .set = set,
+ .key = elem->key.val.data,
+ };
+
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+ if (he != NULL)
+ return he;
+
+ return ERR_PTR(-ENOENT);
+}
+
static bool nft_rhash_update(struct nft_set *set, const u32 *key,
void *(*new)(struct nft_set *,
const struct nft_expr *,
@@ -409,6 +427,24 @@ static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
return false;
}
+static void *nft_hash_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_cur(net);
+ struct nft_hash_elem *he;
+ u32 hash;
+
+ hash = jhash(elem->key.val.data, set->klen, priv->seed);
+ hash = reciprocal_scale(hash, priv->buckets);
+ hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
+ if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) &&
+ nft_set_elem_active(&he->ext, genmask))
+ return he;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */
static inline u32 nft_hash_key(const u32 *key, u32 klen)
{
@@ -494,7 +530,7 @@ static void *nft_hash_deactivate(const struct net *net,
hash = reciprocal_scale(hash, priv->buckets);
hlist_for_each_entry(he, &priv->table[hash], node) {
if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val,
- set->klen) ||
+ set->klen) &&
nft_set_elem_active(&he->ext, genmask)) {
nft_set_elem_change_active(net, set, &he->ext);
return he;
@@ -600,6 +636,7 @@ static struct nft_set_ops nft_rhash_ops __read_mostly = {
.lookup = nft_rhash_lookup,
.update = nft_rhash_update,
.walk = nft_rhash_walk,
+ .get = nft_rhash_get,
.features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
};
@@ -617,6 +654,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
.walk = nft_hash_walk,
+ .get = nft_hash_get,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
};
@@ -634,6 +672,7 @@ static struct nft_set_ops nft_hash_fast_ops __read_mostly = {
.remove = nft_hash_remove,
.lookup = nft_hash_lookup_fast,
.walk = nft_hash_walk,
+ .get = nft_hash_get,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
};
@@ -643,7 +682,6 @@ nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
{
if (desc->size) {
switch (desc->klen) {
- case 2:
case 4:
return &nft_hash_fast_ops;
default:
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index bce5382f1d49..e6f08bc5f359 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -19,8 +19,9 @@
#include <net/netfilter/nf_tables.h>
struct nft_rbtree {
- rwlock_t lock;
struct rb_root root;
+ rwlock_t lock;
+ seqcount_t count;
};
struct nft_rbtree_elem {
@@ -40,8 +41,9 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
}
-static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext,
+ unsigned int seq)
{
struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
@@ -50,15 +52,17 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const void *this;
int d;
- read_lock_bh(&priv->lock);
- parent = priv->root.rb_node;
+ parent = rcu_dereference_raw(priv->root.rb_node);
while (parent != NULL) {
+ if (read_seqcount_retry(&priv->count, seq))
+ return false;
+
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
this = nft_set_ext_key(&rbe->ext);
d = memcmp(this, key, set->klen);
if (d < 0) {
- parent = parent->rb_left;
+ parent = rcu_dereference_raw(parent->rb_left);
if (interval &&
nft_rbtree_equal(set, this, interval) &&
nft_rbtree_interval_end(this) &&
@@ -66,15 +70,14 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
continue;
interval = rbe;
} else if (d > 0)
- parent = parent->rb_right;
+ parent = rcu_dereference_raw(parent->rb_right);
else {
if (!nft_set_elem_active(&rbe->ext, genmask)) {
- parent = parent->rb_left;
+ parent = rcu_dereference_raw(parent->rb_left);
continue;
}
if (nft_rbtree_interval_end(rbe))
goto out;
- read_unlock_bh(&priv->lock);
*ext = &rbe->ext;
return true;
@@ -84,15 +87,104 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
!nft_rbtree_interval_end(interval)) {
- read_unlock_bh(&priv->lock);
*ext = &interval->ext;
return true;
}
out:
+ return false;
+}
+
+static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ unsigned int seq = read_seqcount_begin(&priv->count);
+ bool ret;
+
+ ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+ if (ret || !read_seqcount_retry(&priv->count, seq))
+ return ret;
+
+ read_lock_bh(&priv->lock);
+ seq = read_seqcount_begin(&priv->count);
+ ret = __nft_rbtree_lookup(net, set, key, ext, seq);
read_unlock_bh(&priv->lock);
+
+ return ret;
+}
+
+static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
+ const u32 *key, struct nft_rbtree_elem **elem,
+ unsigned int seq, unsigned int flags, u8 genmask)
+{
+ struct nft_rbtree_elem *rbe, *interval = NULL;
+ struct nft_rbtree *priv = nft_set_priv(set);
+ const struct rb_node *parent;
+ const void *this;
+ int d;
+
+ parent = rcu_dereference_raw(priv->root.rb_node);
+ while (parent != NULL) {
+ if (read_seqcount_retry(&priv->count, seq))
+ return false;
+
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+ this = nft_set_ext_key(&rbe->ext);
+ d = memcmp(this, key, set->klen);
+ if (d < 0) {
+ parent = rcu_dereference_raw(parent->rb_left);
+ interval = rbe;
+ } else if (d > 0) {
+ parent = rcu_dereference_raw(parent->rb_right);
+ } else {
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ parent = rcu_dereference_raw(parent->rb_left);
+
+ if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
+ (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
+ (flags & NFT_SET_ELEM_INTERVAL_END)) {
+ *elem = rbe;
+ return true;
+ }
+ return false;
+ }
+ }
+
+ if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+ nft_set_elem_active(&interval->ext, genmask) &&
+ !nft_rbtree_interval_end(interval)) {
+ *elem = interval;
+ return true;
+ }
+
return false;
}
+static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem, unsigned int flags)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ unsigned int seq = read_seqcount_begin(&priv->count);
+ struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT);
+ const u32 *key = (const u32 *)&elem->key.val;
+ u8 genmask = nft_genmask_cur(net);
+ bool ret;
+
+ ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
+ if (ret || !read_seqcount_retry(&priv->count, seq))
+ return rbe;
+
+ read_lock_bh(&priv->lock);
+ seq = read_seqcount_begin(&priv->count);
+ ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
+ if (!ret)
+ rbe = ERR_PTR(-ENOENT);
+ read_unlock_bh(&priv->lock);
+
+ return rbe;
+}
+
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_set_ext **ext)
@@ -130,7 +222,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
}
}
}
- rb_link_node(&new->node, parent, p);
+ rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
}
@@ -144,7 +236,9 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
int err;
write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
err = __nft_rbtree_insert(net, set, rbe, ext);
+ write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
return err;
@@ -158,7 +252,9 @@ static void nft_rbtree_remove(const struct net *net,
struct nft_rbtree_elem *rbe = elem->priv;
write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
rb_erase(&rbe->node, &priv->root);
+ write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
}
@@ -264,6 +360,7 @@ static int nft_rbtree_init(const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
rwlock_init(&priv->lock);
+ seqcount_init(&priv->count);
priv->root = RB_ROOT;
return 0;
}
@@ -311,6 +408,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.activate = nft_rbtree_activate,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
+ .get = nft_rbtree_get,
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
};
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e1648238a9c9..a77dd514297c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -892,7 +892,7 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
return ERR_PTR(-EFAULT);
- strlcpy(info->name, compat_tmp.name, sizeof(info->name));
+ memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1);
info->num_counters = compat_tmp.num_counters;
user += sizeof(compat_tmp);
} else
@@ -905,9 +905,9 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
if (copy_from_user(info, user, sizeof(*info)) != 0)
return ERR_PTR(-EFAULT);
- info->name[sizeof(info->name) - 1] = '\0';
user += sizeof(*info);
}
+ info->name[sizeof(info->name) - 1] = '\0';
size = sizeof(struct xt_counters);
size *= info->num_counters;
@@ -1153,6 +1153,7 @@ xt_replace_table(struct xt_table *table,
int *error)
{
struct xt_table_info *private;
+ unsigned int cpu;
int ret;
ret = xt_jumpstack_alloc(newinfo);
@@ -1182,26 +1183,34 @@ xt_replace_table(struct xt_table *table,
smp_wmb();
table->private = newinfo;
+ /* make sure all cpus see new ->private value */
+ smp_wmb();
+
/*
* Even though table entries have now been swapped, other CPU's
- * may still be using the old entries. This is okay, because
- * resynchronization happens because of the locking done
- * during the get_counters() routine.
+ * may still be using the old entries...
*/
local_bh_enable();
+ /* ... so wait for even xt_recseq on all cpus */
+ for_each_possible_cpu(cpu) {
+ seqcount_t *s = &per_cpu(xt_recseq, cpu);
+ u32 seq = raw_read_seqcount(s);
+
+ if (seq & 1) {
+ do {
+ cond_resched();
+ cpu_relax();
+ } while (seq == raw_read_seqcount(s));
+ }
+ }
+
#ifdef CONFIG_AUDIT
if (audit_enabled) {
- struct audit_buffer *ab;
-
- ab = audit_log_start(current->audit_context, GFP_KERNEL,
- AUDIT_NETFILTER_CFG);
- if (ab) {
- audit_log_format(ab, "table=%s family=%u entries=%u",
- table->name, table->af,
- private->number);
- audit_log_end(ab);
- }
+ audit_log(current->audit_context, GFP_KERNEL,
+ AUDIT_NETFILTER_CFG,
+ "table=%s family=%u entries=%u",
+ table->name, table->af, private->number);
}
#endif
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 623ef37de886..5a152e2acfd5 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -121,9 +121,9 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
{
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
+ const struct nf_conntrack_l4proto *l4proto;
struct ctnl_timeout *timeout;
struct nf_conn_timeout *timeout_ext;
- struct nf_conntrack_l4proto *l4proto;
int ret = 0;
u8 proto;
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index e45a01255e70..58aa9dd3c5b7 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c
@@ -77,10 +77,10 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
struct nf_nat_range newrange;
- NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING ||
- xt_hooknum(par) == NF_INET_POST_ROUTING ||
- xt_hooknum(par) == NF_INET_LOCAL_OUT ||
- xt_hooknum(par) == NF_INET_LOCAL_IN);
+ WARN_ON(xt_hooknum(par) != NF_INET_PRE_ROUTING &&
+ xt_hooknum(par) != NF_INET_POST_ROUTING &&
+ xt_hooknum(par) != NF_INET_LOCAL_OUT &&
+ xt_hooknum(par) != NF_INET_LOCAL_IN);
ct = nf_ct_get(skb, &ctinfo);
netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index c64aca611ac5..9dae4d665965 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -62,11 +62,9 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net,
memset(fl6, 0, sizeof(*fl6));
fl6->daddr = ipv6_hdr(skb)->saddr;
}
- rcu_read_lock();
ai = nf_get_afinfo(family);
if (ai != NULL)
ai->route(net, (struct dst_entry **)&rt, &fl, false);
- rcu_read_unlock();
if (rt != NULL) {
mtu = dst_mtu(&rt->dst);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d767e35fff6b..17d7705e3bd4 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -70,13 +70,11 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
return user_laddr;
laddr = 0;
- rcu_read_lock();
indev = __in_dev_get_rcu(skb->dev);
for_primary_ifa(indev) {
laddr = ifa->ifa_local;
break;
} endfor_ifa(indev);
- rcu_read_unlock();
return laddr ? laddr : daddr;
}
@@ -125,7 +123,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
__tcp_hdrlen(tcph),
saddr, sport,
daddr, dport,
- in->ifindex);
+ in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
@@ -195,7 +193,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
thoff + __tcp_hdrlen(tcph),
saddr, sport,
daddr, ntohs(dport),
- in->ifindex);
+ in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
@@ -208,7 +206,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
case NFT_LOOKUP_ESTABLISHED:
sk = __inet6_lookup_established(net, &tcp_hashinfo,
saddr, sport, daddr, ntohs(dport),
- in->ifindex);
+ in->ifindex, 0);
break;
default:
BUG();
@@ -391,7 +389,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
return user_laddr;
laddr = NULL;
- rcu_read_lock();
indev = __in6_dev_get(skb->dev);
if (indev) {
read_lock_bh(&indev->lock);
@@ -404,7 +401,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
}
read_unlock_bh(&indev->lock);
}
- rcu_read_unlock();
return laddr ? laddr : daddr;
}
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index e329dabde35f..3b2be2ae6987 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -47,8 +47,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
if (dev)
flow.flowi6_oif = dev->ifindex;
- rcu_read_lock();
-
afinfo = nf_get_afinfo(NFPROTO_IPV6);
if (afinfo != NULL) {
const struct nf_ipv6_ops *v6ops;
@@ -63,7 +61,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
} else {
route_err = 1;
}
- rcu_read_unlock();
if (route_err)
return XT_ADDRTYPE_UNREACHABLE;
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 38986a95216c..041da0d9c06f 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -8,6 +8,7 @@
*/
#include <linux/module.h>
+#include <linux/syscalls.h>
#include <linux/skbuff.h>
#include <linux/filter.h>
#include <linux/bpf.h>
@@ -49,6 +50,22 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
return 0;
}
+static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
+{
+ mm_segment_t oldfs = get_fs();
+ int retval, fd;
+
+ set_fs(KERNEL_DS);
+ fd = bpf_obj_get_user(path, 0);
+ set_fs(oldfs);
+ if (fd < 0)
+ return fd;
+
+ retval = __bpf_mt_check_fd(fd, ret);
+ sys_close(fd);
+ return retval;
+}
+
static int bpf_mt_check(const struct xt_mtchk_param *par)
{
struct xt_bpf_info *info = par->matchinfo;
@@ -66,9 +83,10 @@ static int bpf_mt_check_v1(const struct xt_mtchk_param *par)
return __bpf_mt_check_bytecode(info->bpf_program,
info->bpf_program_num_elem,
&info->filter);
- else if (info->mode == XT_BPF_MODE_FD_PINNED ||
- info->mode == XT_BPF_MODE_FD_ELF)
+ else if (info->mode == XT_BPF_MODE_FD_ELF)
return __bpf_mt_check_fd(info->fd, &info->filter);
+ else if (info->mode == XT_BPF_MODE_PATH_PINNED)
+ return __bpf_mt_check_path(info->path, &info->filter);
else
return -EINVAL;
}
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index b8fd4ab762ed..a6214f235333 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -46,7 +46,6 @@
struct xt_connlimit_conn {
struct hlist_node node;
struct nf_conntrack_tuple tuple;
- union nf_inet_addr addr;
};
struct xt_connlimit_rb {
@@ -58,8 +57,7 @@ struct xt_connlimit_rb {
static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp;
struct xt_connlimit_data {
- struct rb_root climit_root4[CONNLIMIT_SLOTS];
- struct rb_root climit_root6[CONNLIMIT_SLOTS];
+ struct rb_root climit_root[CONNLIMIT_SLOTS];
};
static u_int32_t connlimit_rnd __read_mostly;
@@ -73,16 +71,9 @@ static inline unsigned int connlimit_iphash(__be32 addr)
}
static inline unsigned int
-connlimit_iphash6(const union nf_inet_addr *addr,
- const union nf_inet_addr *mask)
+connlimit_iphash6(const union nf_inet_addr *addr)
{
- union nf_inet_addr res;
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
- res.ip6[i] = addr->ip6[i] & mask->ip6[i];
-
- return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6),
+ return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6),
connlimit_rnd) % CONNLIMIT_SLOTS;
}
@@ -96,24 +87,13 @@ static inline bool already_closed(const struct nf_conn *conn)
}
static int
-same_source_net(const union nf_inet_addr *addr,
- const union nf_inet_addr *mask,
- const union nf_inet_addr *u3, u_int8_t family)
+same_source(const union nf_inet_addr *addr,
+ const union nf_inet_addr *u3, u_int8_t family)
{
- if (family == NFPROTO_IPV4) {
- return ntohl(addr->ip & mask->ip) -
- ntohl(u3->ip & mask->ip);
- } else {
- union nf_inet_addr lh, rh;
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) {
- lh.ip6[i] = addr->ip6[i] & mask->ip6[i];
- rh.ip6[i] = u3->ip6[i] & mask->ip6[i];
- }
+ if (family == NFPROTO_IPV4)
+ return ntohl(addr->ip) - ntohl(u3->ip);
- return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6));
- }
+ return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6));
}
static bool add_hlist(struct hlist_head *head,
@@ -126,7 +106,6 @@ static bool add_hlist(struct hlist_head *head,
if (conn == NULL)
return false;
conn->tuple = *tuple;
- conn->addr = *addr;
hlist_add_head(&conn->node, head);
return true;
}
@@ -144,7 +123,6 @@ static unsigned int check_hlist(struct net *net,
unsigned int length = 0;
*addit = true;
- rcu_read_lock();
/* check the saved connections */
hlist_for_each_entry_safe(conn, n, head, node) {
@@ -179,8 +157,6 @@ static unsigned int check_hlist(struct net *net,
length++;
}
- rcu_read_unlock();
-
return length;
}
@@ -200,7 +176,7 @@ static void tree_nodes_free(struct rb_root *root,
static unsigned int
count_tree(struct net *net, struct rb_root *root,
const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr, const union nf_inet_addr *mask,
+ const union nf_inet_addr *addr,
u8 family, const struct nf_conntrack_zone *zone)
{
struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
@@ -221,7 +197,7 @@ count_tree(struct net *net, struct rb_root *root,
rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node);
parent = *rbnode;
- diff = same_source_net(addr, mask, &rbconn->addr, family);
+ diff = same_source(addr, &rbconn->addr, family);
if (diff < 0) {
rbnode = &((*rbnode)->rb_left);
} else if (diff > 0) {
@@ -274,7 +250,6 @@ count_tree(struct net *net, struct rb_root *root,
}
conn->tuple = *tuple;
- conn->addr = *addr;
rbconn->addr = *addr;
INIT_HLIST_HEAD(&rbconn->hhead);
@@ -289,7 +264,6 @@ static int count_them(struct net *net,
struct xt_connlimit_data *data,
const struct nf_conntrack_tuple *tuple,
const union nf_inet_addr *addr,
- const union nf_inet_addr *mask,
u_int8_t family,
const struct nf_conntrack_zone *zone)
{
@@ -297,17 +271,15 @@ static int count_them(struct net *net,
int count;
u32 hash;
- if (family == NFPROTO_IPV6) {
- hash = connlimit_iphash6(addr, mask);
- root = &data->climit_root6[hash];
- } else {
- hash = connlimit_iphash(addr->ip & mask->ip);
- root = &data->climit_root4[hash];
- }
+ if (family == NFPROTO_IPV6)
+ hash = connlimit_iphash6(addr);
+ else
+ hash = connlimit_iphash(addr->ip);
+ root = &data->climit_root[hash];
spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
- count = count_tree(net, root, tuple, addr, mask, family, zone);
+ count = count_tree(net, root, tuple, addr, family, zone);
spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
@@ -338,16 +310,23 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (xt_family(par) == NFPROTO_IPV6) {
const struct ipv6hdr *iph = ipv6_hdr(skb);
+ unsigned int i;
+
memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
&iph->daddr : &iph->saddr, sizeof(addr.ip6));
+
+ for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i)
+ addr.ip6[i] &= info->mask.ip6[i];
} else {
const struct iphdr *iph = ip_hdr(skb);
addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
iph->daddr : iph->saddr;
+
+ addr.ip &= info->mask.ip;
}
connections = count_them(net, info->data, tuple_ptr, &addr,
- &info->mask, xt_family(par), zone);
+ xt_family(par), zone);
if (connections == 0)
/* kmalloc failed, drop it entirely */
goto hotdrop;
@@ -382,10 +361,8 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
return -ENOMEM;
}
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
- info->data->climit_root4[i] = RB_ROOT;
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
- info->data->climit_root6[i] = RB_ROOT;
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
+ info->data->climit_root[i] = RB_ROOT;
return 0;
}
@@ -416,10 +393,8 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
nf_ct_netns_put(par->net, par->family);
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
- destroy_tree(&info->data->climit_root4[i]);
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
- destroy_tree(&info->data->climit_root6[i]);
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
+ destroy_tree(&info->data->climit_root[i]);
kfree(info->data);
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 762e1874f28b..5da8746f7b88 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -35,6 +35,7 @@
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/netfilter/xt_hashlimit.h>
#include <linux/mutex.h>
+#include <linux/kernel.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
@@ -56,6 +57,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
}
/* need to declare this at the top */
+static const struct file_operations dl_file_ops_v2;
static const struct file_operations dl_file_ops_v1;
static const struct file_operations dl_file_ops;
@@ -87,8 +89,19 @@ struct dsthash_ent {
unsigned long expires; /* precalculated expiry time */
struct {
unsigned long prev; /* last modification */
- u_int64_t credit;
- u_int64_t credit_cap, cost;
+ union {
+ struct {
+ u_int64_t credit;
+ u_int64_t credit_cap;
+ u_int64_t cost;
+ };
+ struct {
+ u_int32_t interval, prev_window;
+ u_int64_t current_rate;
+ u_int64_t rate;
+ int64_t burst;
+ };
+ };
} rateinfo;
struct rcu_head rcu;
};
@@ -99,7 +112,7 @@ struct xt_hashlimit_htable {
u_int8_t family;
bool rnd_initialized;
- struct hashlimit_cfg2 cfg; /* config */
+ struct hashlimit_cfg3 cfg; /* config */
/* used internally */
spinlock_t lock; /* lock for list_head */
@@ -116,10 +129,10 @@ struct xt_hashlimit_htable {
};
static int
-cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
+cfg_copy(struct hashlimit_cfg3 *to, const void *from, int revision)
{
if (revision == 1) {
- struct hashlimit_cfg1 *cfg = from;
+ struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from;
to->mode = cfg->mode;
to->avg = cfg->avg;
@@ -131,7 +144,19 @@ cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
to->srcmask = cfg->srcmask;
to->dstmask = cfg->dstmask;
} else if (revision == 2) {
- memcpy(to, from, sizeof(struct hashlimit_cfg2));
+ struct hashlimit_cfg2 *cfg = (struct hashlimit_cfg2 *)from;
+
+ to->mode = cfg->mode;
+ to->avg = cfg->avg;
+ to->burst = cfg->burst;
+ to->size = cfg->size;
+ to->max = cfg->max;
+ to->gc_interval = cfg->gc_interval;
+ to->expire = cfg->expire;
+ to->srcmask = cfg->srcmask;
+ to->dstmask = cfg->dstmask;
+ } else if (revision == 3) {
+ memcpy(to, from, sizeof(struct hashlimit_cfg3));
} else {
return -EINVAL;
}
@@ -240,13 +265,14 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
}
static void htable_gc(struct work_struct *work);
-static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
+static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
const char *name, u_int8_t family,
struct xt_hashlimit_htable **out_hinfo,
int revision)
{
struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
struct xt_hashlimit_htable *hinfo;
+ const struct file_operations *fops;
unsigned int size, i;
int ret;
@@ -254,7 +280,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
size = cfg->size;
} else {
size = (totalram_pages << PAGE_SHIFT) / 16384 /
- sizeof(struct list_head);
+ sizeof(struct hlist_head);
if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
size = 8192;
if (size < 16)
@@ -262,13 +288,13 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
}
/* FIXME: don't use vmalloc() here or anywhere else -HW */
hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
- sizeof(struct list_head) * size);
+ sizeof(struct hlist_head) * size);
if (hinfo == NULL)
return -ENOMEM;
*out_hinfo = hinfo;
/* copy match config into hashtable config */
- ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2);
+ ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3);
if (ret)
return ret;
@@ -293,11 +319,21 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
}
spin_lock_init(&hinfo->lock);
+ switch (revision) {
+ case 1:
+ fops = &dl_file_ops_v1;
+ break;
+ case 2:
+ fops = &dl_file_ops_v2;
+ break;
+ default:
+ fops = &dl_file_ops;
+ }
+
hinfo->pde = proc_create_data(name, 0,
(family == NFPROTO_IPV4) ?
hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
- (revision == 1) ? &dl_file_ops_v1 : &dl_file_ops,
- hinfo);
+ fops, hinfo);
if (hinfo->pde == NULL) {
kfree(hinfo->name);
vfree(hinfo);
@@ -482,6 +518,25 @@ static u32 user2credits_byte(u32 user)
return (u32) (us >> 32);
}
+static u64 user2rate(u64 user)
+{
+ if (user != 0) {
+ return div64_u64(XT_HASHLIMIT_SCALE_v2, user);
+ } else {
+ pr_warn("invalid rate from userspace: %llu\n", user);
+ return 0;
+ }
+}
+
+static u64 user2rate_bytes(u32 user)
+{
+ u64 r;
+
+ r = user ? U32_MAX / user : U32_MAX;
+ r = (r - 1) << XT_HASHLIMIT_BYTE_SHIFT;
+ return r;
+}
+
static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
u32 mode, int revision)
{
@@ -491,6 +546,21 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
if (delta == 0)
return;
+ if (revision >= 3 && mode & XT_HASHLIMIT_RATE_MATCH) {
+ u64 interval = dh->rateinfo.interval * HZ;
+
+ if (delta < interval)
+ return;
+
+ dh->rateinfo.prev = now;
+ dh->rateinfo.prev_window =
+ ((dh->rateinfo.current_rate * interval) >
+ (delta * dh->rateinfo.rate));
+ dh->rateinfo.current_rate = 0;
+
+ return;
+ }
+
dh->rateinfo.prev = now;
if (mode & XT_HASHLIMIT_BYTES) {
@@ -515,7 +585,24 @@ static void rateinfo_init(struct dsthash_ent *dh,
struct xt_hashlimit_htable *hinfo, int revision)
{
dh->rateinfo.prev = jiffies;
- if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+ if (revision >= 3 && hinfo->cfg.mode & XT_HASHLIMIT_RATE_MATCH) {
+ dh->rateinfo.prev_window = 0;
+ dh->rateinfo.current_rate = 0;
+ if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+ dh->rateinfo.rate =
+ user2rate_bytes((u32)hinfo->cfg.avg);
+ if (hinfo->cfg.burst)
+ dh->rateinfo.burst =
+ hinfo->cfg.burst * dh->rateinfo.rate;
+ else
+ dh->rateinfo.burst = dh->rateinfo.rate;
+ } else {
+ dh->rateinfo.rate = user2rate(hinfo->cfg.avg);
+ dh->rateinfo.burst =
+ hinfo->cfg.burst + dh->rateinfo.rate;
+ }
+ dh->rateinfo.interval = hinfo->cfg.interval;
+ } else if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ;
dh->rateinfo.cost = user2credits_byte(hinfo->cfg.avg);
dh->rateinfo.credit_cap = hinfo->cfg.burst;
@@ -648,7 +735,7 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
static bool
hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
struct xt_hashlimit_htable *hinfo,
- const struct hashlimit_cfg2 *cfg, int revision)
+ const struct hashlimit_cfg3 *cfg, int revision)
{
unsigned long now = jiffies;
struct dsthash_ent *dh;
@@ -659,12 +746,12 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
goto hotdrop;
- rcu_read_lock_bh();
+ local_bh_disable();
dh = dsthash_find(hinfo, &dst);
if (dh == NULL) {
dh = dsthash_alloc_init(hinfo, &dst, &race);
if (dh == NULL) {
- rcu_read_unlock_bh();
+ local_bh_enable();
goto hotdrop;
} else if (race) {
/* Already got an entry, update expiration timeout */
@@ -680,6 +767,20 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
}
+ if (cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
+ cost = (cfg->mode & XT_HASHLIMIT_BYTES) ? skb->len : 1;
+ dh->rateinfo.current_rate += cost;
+
+ if (!dh->rateinfo.prev_window &&
+ (dh->rateinfo.current_rate <= dh->rateinfo.burst)) {
+ spin_unlock(&dh->lock);
+ rcu_read_unlock_bh();
+ return !(cfg->mode & XT_HASHLIMIT_INVERT);
+ } else {
+ goto overlimit;
+ }
+ }
+
if (cfg->mode & XT_HASHLIMIT_BYTES)
cost = hashlimit_byte_cost(skb->len, dh);
else
@@ -689,12 +790,13 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
/* below the limit */
dh->rateinfo.credit -= cost;
spin_unlock(&dh->lock);
- rcu_read_unlock_bh();
+ local_bh_enable();
return !(cfg->mode & XT_HASHLIMIT_INVERT);
}
+overlimit:
spin_unlock(&dh->lock);
- rcu_read_unlock_bh();
+ local_bh_enable();
/* default match is underlimit - so over the limit, we need to invert */
return cfg->mode & XT_HASHLIMIT_INVERT;
@@ -708,7 +810,7 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
struct xt_hashlimit_htable *hinfo = info->hinfo;
- struct hashlimit_cfg2 cfg = {};
+ struct hashlimit_cfg3 cfg = {};
int ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
@@ -720,17 +822,33 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
}
static bool
-hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
struct xt_hashlimit_htable *hinfo = info->hinfo;
+ struct hashlimit_cfg3 cfg = {};
+ int ret;
+
+ ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
+
+ if (ret)
+ return ret;
+
+ return hashlimit_mt_common(skb, par, hinfo, &cfg, 2);
+}
+
+static bool
+hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
+ struct xt_hashlimit_htable *hinfo = info->hinfo;
- return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2);
+ return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3);
}
static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
struct xt_hashlimit_htable **hinfo,
- struct hashlimit_cfg2 *cfg,
+ struct hashlimit_cfg3 *cfg,
const char *name, int revision)
{
struct net *net = par->net;
@@ -753,7 +871,17 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
}
/* Check for overflow. */
- if (cfg->mode & XT_HASHLIMIT_BYTES) {
+ if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
+ if (cfg->avg == 0 || cfg->avg > U32_MAX) {
+ pr_info("hashlimit invalid rate\n");
+ return -ERANGE;
+ }
+
+ if (cfg->interval == 0) {
+ pr_info("hashlimit invalid interval\n");
+ return -EINVAL;
+ }
+ } else if (cfg->mode & XT_HASHLIMIT_BYTES) {
if (user2credits_byte(cfg->avg) == 0) {
pr_info("overflow, rate too high: %llu\n", cfg->avg);
return -EINVAL;
@@ -784,7 +912,7 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
{
struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
- struct hashlimit_cfg2 cfg = {};
+ struct hashlimit_cfg3 cfg = {};
int ret;
if (info->name[sizeof(info->name) - 1] != '\0')
@@ -799,15 +927,40 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
&cfg, info->name, 1);
}
-static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
{
struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+ struct hashlimit_cfg3 cfg = {};
+ int ret;
+
+ if (info->name[sizeof(info->name) - 1] != '\0')
+ return -EINVAL;
+
+ ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
+
+ if (ret)
+ return ret;
+
+ return hashlimit_mt_check_common(par, &info->hinfo,
+ &cfg, info->name, 2);
+}
+
+static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
if (info->name[sizeof(info->name) - 1] != '\0')
return -EINVAL;
return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
- info->name, 2);
+ info->name, 3);
+}
+
+static void hashlimit_mt_destroy_v2(const struct xt_mtdtor_param *par)
+{
+ const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+
+ htable_put(info->hinfo);
}
static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
@@ -819,7 +972,7 @@ static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
{
- const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+ const struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
htable_put(info->hinfo);
}
@@ -840,9 +993,20 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
.name = "hashlimit",
.revision = 2,
.family = NFPROTO_IPV4,
- .match = hashlimit_mt,
+ .match = hashlimit_mt_v2,
.matchsize = sizeof(struct xt_hashlimit_mtinfo2),
.usersize = offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+ .checkentry = hashlimit_mt_check_v2,
+ .destroy = hashlimit_mt_destroy_v2,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "hashlimit",
+ .revision = 3,
+ .family = NFPROTO_IPV4,
+ .match = hashlimit_mt,
+ .matchsize = sizeof(struct xt_hashlimit_mtinfo3),
+ .usersize = offsetof(struct xt_hashlimit_mtinfo3, hinfo),
.checkentry = hashlimit_mt_check,
.destroy = hashlimit_mt_destroy,
.me = THIS_MODULE,
@@ -863,9 +1027,20 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
.name = "hashlimit",
.revision = 2,
.family = NFPROTO_IPV6,
- .match = hashlimit_mt,
+ .match = hashlimit_mt_v2,
.matchsize = sizeof(struct xt_hashlimit_mtinfo2),
.usersize = offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+ .checkentry = hashlimit_mt_check_v2,
+ .destroy = hashlimit_mt_destroy_v2,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "hashlimit",
+ .revision = 3,
+ .family = NFPROTO_IPV6,
+ .match = hashlimit_mt,
+ .matchsize = sizeof(struct xt_hashlimit_mtinfo3),
+ .usersize = offsetof(struct xt_hashlimit_mtinfo3, hinfo),
.checkentry = hashlimit_mt_check,
.destroy = hashlimit_mt_destroy,
.me = THIS_MODULE,
@@ -947,6 +1122,21 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
}
}
+static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
+ struct seq_file *s)
+{
+ const struct xt_hashlimit_htable *ht = s->private;
+
+ spin_lock(&ent->lock);
+ /* recalculate to show accurate numbers */
+ rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+
+ dl_seq_print(ent, family, s);
+
+ spin_unlock(&ent->lock);
+ return seq_has_overflowed(s);
+}
+
static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
@@ -969,7 +1159,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
- rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+ rateinfo_recalc(ent, jiffies, ht->cfg.mode, 3);
dl_seq_print(ent, family, s);
@@ -977,6 +1167,20 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
return seq_has_overflowed(s);
}
+static int dl_seq_show_v2(struct seq_file *s, void *v)
+{
+ struct xt_hashlimit_htable *htable = s->private;
+ unsigned int *bucket = (unsigned int *)v;
+ struct dsthash_ent *ent;
+
+ if (!hlist_empty(&htable->hash[*bucket])) {
+ hlist_for_each_entry(ent, &htable->hash[*bucket], node)
+ if (dl_seq_real_show_v2(ent, htable->family, s))
+ return -1;
+ }
+ return 0;
+}
+
static int dl_seq_show_v1(struct seq_file *s, void *v)
{
struct xt_hashlimit_htable *htable = s->private;
@@ -1012,6 +1216,13 @@ static const struct seq_operations dl_seq_ops_v1 = {
.show = dl_seq_show_v1
};
+static const struct seq_operations dl_seq_ops_v2 = {
+ .start = dl_seq_start,
+ .next = dl_seq_next,
+ .stop = dl_seq_stop,
+ .show = dl_seq_show_v2
+};
+
static const struct seq_operations dl_seq_ops = {
.start = dl_seq_start,
.next = dl_seq_next,
@@ -1019,6 +1230,18 @@ static const struct seq_operations dl_seq_ops = {
.show = dl_seq_show
};
+static int dl_proc_open_v2(struct inode *inode, struct file *file)
+{
+ int ret = seq_open(file, &dl_seq_ops_v2);
+
+ if (!ret) {
+ struct seq_file *sf = file->private_data;
+
+ sf->private = PDE_DATA(inode);
+ }
+ return ret;
+}
+
static int dl_proc_open_v1(struct inode *inode, struct file *file)
{
int ret = seq_open(file, &dl_seq_ops_v1);
@@ -1042,6 +1265,14 @@ static int dl_proc_open(struct inode *inode, struct file *file)
return ret;
}
+static const struct file_operations dl_file_ops_v2 = {
+ .owner = THIS_MODULE,
+ .open = dl_proc_open_v2,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
static const struct file_operations dl_file_ops_v1 = {
.owner = THIS_MODULE,
.open = dl_proc_open_v1,
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index 8107b3eb865f..0fd14d1eb09d 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -58,9 +58,9 @@ xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct != NULL &&
- (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY));
+ WARN_ON(!(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY)));
xt_nat_convert_range(&range, &mr->range[0]);
return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
@@ -75,8 +75,8 @@ xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct != NULL &&
- (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+ WARN_ON(!(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
xt_nat_convert_range(&range, &mr->range[0]);
return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
@@ -90,9 +90,9 @@ xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct != NULL &&
- (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY));
+ WARN_ON(!(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY)));
return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
}
@@ -105,8 +105,8 @@ xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct != NULL &&
- (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+ WARN_ON(!(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
return nf_nat_setup_info(ct, range, NF_NAT_MANIP_DST);
}
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 71cfa9551d08..36e14b1f061d 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -226,7 +226,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
sizeof(struct tcphdr), optsize, opts);
}
- rcu_read_lock();
list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
int foptsize, optnum;
@@ -340,7 +339,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
info->loglevel == XT_OSF_LOGLEVEL_FIRST)
break;
}
- rcu_read_unlock();
if (!fcount && (info->flags & XT_OSF_LOG))
nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p),
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 3f6c4fa78bdb..245fa350a7a8 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -106,7 +106,7 @@ static DEFINE_SPINLOCK(recent_lock);
static DEFINE_MUTEX(recent_mutex);
#ifdef CONFIG_PROC_FS
-static const struct file_operations recent_old_fops, recent_mt_fops;
+static const struct file_operations recent_mt_fops;
#endif
static u_int32_t hash_rnd __read_mostly;
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
index 8fd324116e6f..68ccbe50bb1e 100644
--- a/net/netfilter/xt_repldata.h
+++ b/net/netfilter/xt_repldata.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Today's hack: quantum tunneling in structs
*
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index e75ef39669c5..575d2153e3b8 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -76,7 +76,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
transparent = nf_sk_is_transparent(sk);
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
- transparent)
+ transparent && sk_fullsock(sk))
pskb->mark = sk->sk_mark;
if (sk != skb->sk)
@@ -133,7 +133,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
transparent = nf_sk_is_transparent(sk);
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
- transparent)
+ transparent && sk_fullsock(sk))
pskb->mark = sk->sk_mark;
if (sk != skb->sk)
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
index d341ede0dca5..5a46381a64e7 100644
--- a/net/netlabel/Makefile
+++ b/net/netlabel/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the NetLabel subsystem.
#
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index d0f38bc9af6d..ac709f0f197b 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -87,7 +87,7 @@ static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s,
struct list_head *i = s;
struct netlbl_af4list *n = __af4list_entry(s);
while (i != h && !n->valid) {
- i = rcu_dereference(i->next);
+ i = rcu_dereference(list_next_rcu(i));
n = __af4list_entry(i);
}
return n;
@@ -154,7 +154,7 @@ static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s,
struct list_head *i = s;
struct netlbl_af6list *n = __af6list_entry(s);
while (i != h && !n->valid) {
- i = rcu_dereference(i->next);
+ i = rcu_dereference(list_next_rcu(i));
n = __af6list_entry(i);
}
return n;
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index d177dd066504..4d748975117d 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -393,7 +393,7 @@ EXPORT_SYMBOL(netlbl_calipso_ops_register);
static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void)
{
- return ACCESS_ONCE(calipso_ops);
+ return READ_ONCE(calipso_ops);
}
/**
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5acee49db90b..b9e0ee4e22f5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -128,7 +128,6 @@ static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
};
static int netlink_dump(struct sock *sk);
-static void netlink_skb_destructor(struct sk_buff *skb);
/* nl_table locking explained:
* Lookup and traversal are protected with an RCU read-side lock. Insertion
@@ -691,6 +690,9 @@ static void deferred_put_nlk_sk(struct rcu_head *head)
struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
struct sock *sk = &nlk->sk;
+ kfree(nlk->groups);
+ nlk->groups = NULL;
+
if (!refcount_dec_and_test(&sk->sk_refcnt))
return;
@@ -769,9 +771,6 @@ static int netlink_release(struct socket *sock)
netlink_table_ungrab();
}
- kfree(nlk->groups);
- nlk->groups = NULL;
-
local_bh_disable();
sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
local_bh_enable();
@@ -955,7 +954,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
struct net *net = sock_net(sk);
struct netlink_sock *nlk = nlk_sk(sk);
struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
- int err;
+ int err = 0;
long unsigned int groups = nladdr->nl_groups;
bool bound;
@@ -983,6 +982,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
}
+ netlink_lock_table();
if (nlk->netlink_bind && groups) {
int group;
@@ -993,7 +993,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
if (!err)
continue;
netlink_undo_bind(group, groups, sk);
- return err;
+ goto unlock;
}
}
@@ -1006,12 +1006,13 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
netlink_autobind(sock);
if (err) {
netlink_undo_bind(nlk->ngroups, groups, sk);
- return err;
+ goto unlock;
}
}
if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
- return 0;
+ goto unlock;
+ netlink_unlock_table();
netlink_table_grab();
netlink_update_subscriptions(sk, nlk->subscriptions +
@@ -1022,6 +1023,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
netlink_table_ungrab();
return 0;
+
+unlock:
+ netlink_unlock_table();
+ return err;
}
static int netlink_connect(struct socket *sock, struct sockaddr *addr,
@@ -1079,7 +1084,9 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
} else {
nladdr->nl_pid = nlk->portid;
+ netlink_lock_table();
nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
+ netlink_unlock_table();
}
return 0;
}
@@ -2128,7 +2135,7 @@ static int netlink_dump(struct sock *sk)
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
struct module *module;
- int len, err = -ENOBUFS;
+ int err = -ENOBUFS;
int alloc_min_size;
int alloc_size;
@@ -2175,9 +2182,11 @@ static int netlink_dump(struct sock *sk)
skb_reserve(skb, skb_tailroom(skb) - alloc_size);
netlink_skb_set_owner_r(skb, sk);
- len = cb->dump(skb, cb);
+ if (nlk->dump_done_errno > 0)
+ nlk->dump_done_errno = cb->dump(skb, cb);
- if (len > 0) {
+ if (nlk->dump_done_errno > 0 ||
+ skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
mutex_unlock(nlk->cb_mutex);
if (sk_filter(sk, skb))
@@ -2187,13 +2196,15 @@ static int netlink_dump(struct sock *sk)
return 0;
}
- nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
- if (!nlh)
+ nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE,
+ sizeof(nlk->dump_done_errno), NLM_F_MULTI);
+ if (WARN_ON(!nlh))
goto errout_skb;
nl_dump_check_consistent(cb, nlh);
- memcpy(nlmsg_data(nlh), &len, sizeof(len));
+ memcpy(nlmsg_data(nlh), &nlk->dump_done_errno,
+ sizeof(nlk->dump_done_errno));
if (sk_filter(sk, skb))
kfree_skb(skb);
@@ -2258,14 +2269,19 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->min_dump_alloc = control->min_dump_alloc;
cb->skb = skb;
+ if (cb->start) {
+ ret = cb->start(cb);
+ if (ret)
+ goto error_unlock;
+ }
+
nlk->cb_running = true;
+ nlk->dump_done_errno = INT_MAX;
mutex_unlock(nlk->cb_mutex);
- if (cb->start)
- cb->start(cb);
-
ret = netlink_dump(sk);
+
sock_put(sk);
if (ret)
@@ -2295,27 +2311,26 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
size_t tlvlen = 0;
struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
unsigned int flags = 0;
+ bool nlk_has_extack = nlk->flags & NETLINK_F_EXT_ACK;
/* Error messages get the original request appened, unless the user
* requests to cap the error message, and get extra error data if
* requested.
*/
+ if (nlk_has_extack && extack && extack->_msg)
+ tlvlen += nla_total_size(strlen(extack->_msg) + 1);
+
if (err) {
if (!(nlk->flags & NETLINK_F_CAP_ACK))
payload += nlmsg_len(nlh);
else
flags |= NLM_F_CAPPED;
- if (nlk->flags & NETLINK_F_EXT_ACK && extack) {
- if (extack->_msg)
- tlvlen += nla_total_size(strlen(extack->_msg) + 1);
- if (extack->bad_attr)
- tlvlen += nla_total_size(sizeof(u32));
- }
+ if (nlk_has_extack && extack && extack->bad_attr)
+ tlvlen += nla_total_size(sizeof(u32));
} else {
flags |= NLM_F_CAPPED;
- if (nlk->flags & NETLINK_F_EXT_ACK &&
- extack && extack->cookie_len)
+ if (nlk_has_extack && extack && extack->cookie_len)
tlvlen += nla_total_size(extack->cookie_len);
}
@@ -2324,16 +2339,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
if (!skb) {
- struct sock *sk;
-
- sk = netlink_lookup(sock_net(in_skb->sk),
- in_skb->sk->sk_protocol,
- NETLINK_CB(in_skb).portid);
- if (sk) {
- sk->sk_err = ENOBUFS;
- sk->sk_error_report(sk);
- sock_put(sk);
- }
+ NETLINK_CB(in_skb).sk->sk_err = ENOBUFS;
+ NETLINK_CB(in_skb).sk->sk_error_report(NETLINK_CB(in_skb).sk);
return;
}
@@ -2343,11 +2350,12 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
errmsg->error = err;
memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh));
- if (nlk->flags & NETLINK_F_EXT_ACK && extack) {
+ if (nlk_has_extack && extack) {
+ if (extack->_msg) {
+ WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
+ extack->_msg));
+ }
if (err) {
- if (extack->_msg)
- WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
- extack->_msg));
if (extack->bad_attr &&
!WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
(u8 *)extack->bad_attr >= in_skb->data +
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 3490f2430532..962de7b3c023 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _AF_NETLINK_H
#define _AF_NETLINK_H
@@ -33,6 +34,7 @@ struct netlink_sock {
wait_queue_head_t wait;
bool bound;
bool cb_running;
+ int dump_done_errno;
struct netlink_callback cb;
struct mutex *cb_mutex;
struct mutex cb_def_mutex;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 10f8b4cff40a..d444daf1ac04 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* NETLINK Generic Netlink Family
*
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ebf16f7f9089..2dec3583c97d 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -241,9 +241,9 @@ void nr_destroy_socket(struct sock *);
/*
* Handler for deferred kills.
*/
-static void nr_destroy_timer(unsigned long data)
+static void nr_destroy_timer(struct timer_list *t)
{
- struct sock *sk=(struct sock *)data;
+ struct sock *sk = from_timer(sk, t, sk_timer);
bh_lock_sock(sk);
sock_hold(sk);
nr_destroy_socket(sk);
@@ -284,7 +284,7 @@ void nr_destroy_socket(struct sock *sk)
if (sk_has_allocations(sk)) {
/* Defer: outstanding buffers */
- sk->sk_timer.function = nr_destroy_timer;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)nr_destroy_timer;
sk->sk_timer.expires = jiffies + 2 * HZ;
add_timer(&sk->sk_timer);
} else
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 80dbd0beb516..fbfdae452ff9 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -125,7 +125,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
case NR_DISCREQ:
nr_write_internal(sk, NR_DISCACK);
-
+ /* fall through */
case NR_DISCACK:
nr_disconnect(sk, 0);
break;
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index 94d4e922af53..989ae647825e 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -18,7 +18,7 @@
static void nr_loopback_timer(unsigned long);
static struct sk_buff_head loopback_queue;
-static DEFINE_TIMER(loopback_timer, nr_loopback_timer, 0, 0);
+static DEFINE_TIMER(loopback_timer, nr_loopback_timer);
void __init nr_loopback_init(void)
{
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 0c59354e280e..75e6ba970fde 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -80,6 +80,19 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign,
static void nr_remove_neigh(struct nr_neigh *);
+/* re-sort the routes in quality order. */
+static void re_sort_routes(struct nr_node *nr_node, int x, int y)
+{
+ if (nr_node->routes[y].quality > nr_node->routes[x].quality) {
+ if (nr_node->which == x)
+ nr_node->which = y;
+ else if (nr_node->which == y)
+ nr_node->which = x;
+
+ swap(nr_node->routes[x], nr_node->routes[y]);
+ }
+}
+
/*
* Add a new route to a node, and in the process add the node and the
* neighbour if it is new.
@@ -90,7 +103,6 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
{
struct nr_node *nr_node;
struct nr_neigh *nr_neigh;
- struct nr_route nr_route;
int i, found;
struct net_device *odev;
@@ -251,49 +263,11 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
/* Now re-sort the routes in quality order */
switch (nr_node->count) {
case 3:
- if (nr_node->routes[1].quality > nr_node->routes[0].quality) {
- switch (nr_node->which) {
- case 0:
- nr_node->which = 1;
- break;
- case 1:
- nr_node->which = 0;
- break;
- }
- nr_route = nr_node->routes[0];
- nr_node->routes[0] = nr_node->routes[1];
- nr_node->routes[1] = nr_route;
- }
- if (nr_node->routes[2].quality > nr_node->routes[1].quality) {
- switch (nr_node->which) {
- case 1: nr_node->which = 2;
- break;
-
- case 2: nr_node->which = 1;
- break;
-
- default:
- break;
- }
- nr_route = nr_node->routes[1];
- nr_node->routes[1] = nr_node->routes[2];
- nr_node->routes[2] = nr_route;
- }
+ re_sort_routes(nr_node, 0, 1);
+ re_sort_routes(nr_node, 1, 2);
+ /* fall through */
case 2:
- if (nr_node->routes[1].quality > nr_node->routes[0].quality) {
- switch (nr_node->which) {
- case 0: nr_node->which = 1;
- break;
-
- case 1: nr_node->which = 0;
- break;
-
- default: break;
- }
- nr_route = nr_node->routes[0];
- nr_node->routes[0] = nr_node->routes[1];
- nr_node->routes[1] = nr_route;
- }
+ re_sort_routes(nr_node, 0, 1);
case 1:
break;
}
@@ -384,6 +358,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
switch (i) {
case 0:
nr_node->routes[0] = nr_node->routes[1];
+ /* fall through */
case 1:
nr_node->routes[1] = nr_node->routes[2];
case 2:
@@ -553,6 +528,7 @@ void nr_rt_device_down(struct net_device *dev)
switch (i) {
case 0:
t->routes[0] = t->routes[1];
+ /* fall through */
case 1:
t->routes[1] = t->routes[2];
case 2:
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 94d05806a9a2..43569aea0f5e 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -29,24 +29,23 @@
#include <linux/interrupt.h>
#include <net/netrom.h>
-static void nr_heartbeat_expiry(unsigned long);
-static void nr_t1timer_expiry(unsigned long);
-static void nr_t2timer_expiry(unsigned long);
-static void nr_t4timer_expiry(unsigned long);
-static void nr_idletimer_expiry(unsigned long);
+static void nr_heartbeat_expiry(struct timer_list *);
+static void nr_t1timer_expiry(struct timer_list *);
+static void nr_t2timer_expiry(struct timer_list *);
+static void nr_t4timer_expiry(struct timer_list *);
+static void nr_idletimer_expiry(struct timer_list *);
void nr_init_timers(struct sock *sk)
{
struct nr_sock *nr = nr_sk(sk);
- setup_timer(&nr->t1timer, nr_t1timer_expiry, (unsigned long)sk);
- setup_timer(&nr->t2timer, nr_t2timer_expiry, (unsigned long)sk);
- setup_timer(&nr->t4timer, nr_t4timer_expiry, (unsigned long)sk);
- setup_timer(&nr->idletimer, nr_idletimer_expiry, (unsigned long)sk);
+ timer_setup(&nr->t1timer, nr_t1timer_expiry, 0);
+ timer_setup(&nr->t2timer, nr_t2timer_expiry, 0);
+ timer_setup(&nr->t4timer, nr_t4timer_expiry, 0);
+ timer_setup(&nr->idletimer, nr_idletimer_expiry, 0);
/* initialized by sock_init_data */
- sk->sk_timer.data = (unsigned long)sk;
- sk->sk_timer.function = &nr_heartbeat_expiry;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)nr_heartbeat_expiry;
}
void nr_start_t1timer(struct sock *sk)
@@ -113,9 +112,9 @@ int nr_t1timer_running(struct sock *sk)
return timer_pending(&nr_sk(sk)->t1timer);
}
-static void nr_heartbeat_expiry(unsigned long param)
+static void nr_heartbeat_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct sock *sk = from_timer(sk, t, sk_timer);
struct nr_sock *nr = nr_sk(sk);
bh_lock_sock(sk);
@@ -152,10 +151,10 @@ static void nr_heartbeat_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void nr_t2timer_expiry(unsigned long param)
+static void nr_t2timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
- struct nr_sock *nr = nr_sk(sk);
+ struct nr_sock *nr = from_timer(nr, t, t2timer);
+ struct sock *sk = &nr->sock;
bh_lock_sock(sk);
if (nr->condition & NR_COND_ACK_PENDING) {
@@ -165,19 +164,20 @@ static void nr_t2timer_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void nr_t4timer_expiry(unsigned long param)
+static void nr_t4timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct nr_sock *nr = from_timer(nr, t, t4timer);
+ struct sock *sk = &nr->sock;
bh_lock_sock(sk);
nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY;
bh_unlock_sock(sk);
}
-static void nr_idletimer_expiry(unsigned long param)
+static void nr_idletimer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
- struct nr_sock *nr = nr_sk(sk);
+ struct nr_sock *nr = from_timer(nr, t, idletimer);
+ struct sock *sk = &nr->sock;
bh_lock_sock(sk);
@@ -202,10 +202,10 @@ static void nr_idletimer_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void nr_t1timer_expiry(unsigned long param)
+static void nr_t1timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
- struct nr_sock *nr = nr_sk(sk);
+ struct nr_sock *nr = from_timer(nr, t, t1timer);
+ struct sock *sk = &nr->sock;
bh_lock_sock(sk);
switch (nr->state) {
diff --git a/net/nfc/Makefile b/net/nfc/Makefile
index 2555ff8e7219..2ffc69b473fc 100644
--- a/net/nfc/Makefile
+++ b/net/nfc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux NFC subsystem.
#
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 5cf33df888c3..947a470f929d 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1015,9 +1015,9 @@ exit:
device_unlock(&dev->dev);
}
-static void nfc_check_pres_timeout(unsigned long data)
+static void nfc_check_pres_timeout(struct timer_list *t)
{
- struct nfc_dev *dev = (struct nfc_dev *)data;
+ struct nfc_dev *dev = from_timer(dev, t, check_pres_timer);
schedule_work(&dev->check_pres_work);
}
@@ -1094,10 +1094,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
dev->targets_generation = 1;
if (ops->check_presence) {
- init_timer(&dev->check_pres_timer);
- dev->check_pres_timer.data = (unsigned long)dev;
- dev->check_pres_timer.function = nfc_check_pres_timeout;
-
+ timer_setup(&dev->check_pres_timer, nfc_check_pres_timeout, 0);
INIT_WORK(&dev->check_pres_work, nfc_check_pres_work);
}
@@ -1106,7 +1103,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
err_free_dev:
kfree(dev);
- return ERR_PTR(rc);
+ return NULL;
}
EXPORT_SYMBOL(nfc_allocate_device);
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index de6dd37d04c7..ec0a8998e52d 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -650,6 +650,7 @@ static void digital_deactivate_target(struct nfc_dev *nfc_dev,
return;
}
+ digital_abort_cmd(ddev);
ddev->curr_protocol = 0;
}
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index b740fef0acc5..ac8030c4bcf8 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -428,9 +428,9 @@ exit_noskb:
nfc_hci_driver_failure(hdev, r);
}
-static void nfc_hci_cmd_timeout(unsigned long data)
+static void nfc_hci_cmd_timeout(struct timer_list *t)
{
- struct nfc_hci_dev *hdev = (struct nfc_hci_dev *)data;
+ struct nfc_hci_dev *hdev = from_timer(hdev, t, cmd_timer);
schedule_work(&hdev->msg_tx_work);
}
@@ -1004,9 +1004,7 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev)
INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work);
- init_timer(&hdev->cmd_timer);
- hdev->cmd_timer.data = (unsigned long)hdev;
- hdev->cmd_timer.function = nfc_hci_cmd_timeout;
+ timer_setup(&hdev->cmd_timer, nfc_hci_cmd_timeout, 0);
skb_queue_head_init(&hdev->rx_hcp_frags);
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 17e59a009ce6..fe988936ad92 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -580,27 +580,27 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc)
}
}
-static void llc_shdlc_connect_timeout(unsigned long data)
+static void llc_shdlc_connect_timeout(struct timer_list *t)
{
- struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
+ struct llc_shdlc *shdlc = from_timer(shdlc, t, connect_timer);
pr_debug("\n");
schedule_work(&shdlc->sm_work);
}
-static void llc_shdlc_t1_timeout(unsigned long data)
+static void llc_shdlc_t1_timeout(struct timer_list *t)
{
- struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
+ struct llc_shdlc *shdlc = from_timer(shdlc, t, t1_timer);
pr_debug("SoftIRQ: need to send ack\n");
schedule_work(&shdlc->sm_work);
}
-static void llc_shdlc_t2_timeout(unsigned long data)
+static void llc_shdlc_t2_timeout(struct timer_list *t)
{
- struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
+ struct llc_shdlc *shdlc = from_timer(shdlc, t, t2_timer);
pr_debug("SoftIRQ: need to retransmit\n");
@@ -763,17 +763,9 @@ static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
mutex_init(&shdlc->state_mutex);
shdlc->state = SHDLC_DISCONNECTED;
- init_timer(&shdlc->connect_timer);
- shdlc->connect_timer.data = (unsigned long)shdlc;
- shdlc->connect_timer.function = llc_shdlc_connect_timeout;
-
- init_timer(&shdlc->t1_timer);
- shdlc->t1_timer.data = (unsigned long)shdlc;
- shdlc->t1_timer.function = llc_shdlc_t1_timeout;
-
- init_timer(&shdlc->t2_timer);
- shdlc->t2_timer.data = (unsigned long)shdlc;
- shdlc->t2_timer.function = llc_shdlc_t2_timeout;
+ timer_setup(&shdlc->connect_timer, llc_shdlc_connect_timeout, 0);
+ timer_setup(&shdlc->t1_timer, llc_shdlc_t1_timeout, 0);
+ timer_setup(&shdlc->t2_timer, llc_shdlc_t2_timeout, 0);
shdlc->w = SHDLC_MAX_WINDOW;
shdlc->srej_support = SHDLC_SREJ_SUPPORT;
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 02eef5cf3cce..ef4026a23e80 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -242,9 +242,9 @@ static void nfc_llcp_timeout_work(struct work_struct *work)
nfc_dep_link_down(local->dev);
}
-static void nfc_llcp_symm_timer(unsigned long data)
+static void nfc_llcp_symm_timer(struct timer_list *t)
{
- struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
+ struct nfc_llcp_local *local = from_timer(local, t, link_timer);
pr_err("SYMM timeout\n");
@@ -285,9 +285,9 @@ static void nfc_llcp_sdreq_timeout_work(struct work_struct *work)
nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
}
-static void nfc_llcp_sdreq_timer(unsigned long data)
+static void nfc_llcp_sdreq_timer(struct timer_list *t)
{
- struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
+ struct nfc_llcp_local *local = from_timer(local, t, sdreq_timer);
schedule_work(&local->sdreq_timeout_work);
}
@@ -1573,9 +1573,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
INIT_LIST_HEAD(&local->list);
kref_init(&local->ref);
mutex_init(&local->sdp_lock);
- init_timer(&local->link_timer);
- local->link_timer.data = (unsigned long) local;
- local->link_timer.function = nfc_llcp_symm_timer;
+ timer_setup(&local->link_timer, nfc_llcp_symm_timer, 0);
skb_queue_head_init(&local->tx_queue);
INIT_WORK(&local->tx_work, nfc_llcp_tx_work);
@@ -1601,9 +1599,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
mutex_init(&local->sdreq_lock);
INIT_HLIST_HEAD(&local->pending_sdreqs);
- init_timer(&local->sdreq_timer);
- local->sdreq_timer.data = (unsigned long) local;
- local->sdreq_timer.function = nfc_llcp_sdreq_timer;
+ timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0);
INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
list_add(&local->list, &llcp_devices);
diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile
index 0ca31d9bf741..c3362c499281 100644
--- a/net/nfc/nci/Makefile
+++ b/net/nfc/nci/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux NFC NCI layer.
#
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index b251fb936a27..c0b83dc9d993 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -75,7 +75,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
if (!hdr)
return -EMSGSIZE;
- genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+ genl_dump_check_consistent(cb, hdr);
if (nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target->idx) ||
nla_put_u32(msg, NFC_ATTR_PROTOCOLS, target->supported_protocols) ||
@@ -603,7 +603,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
return -EMSGSIZE;
if (cb)
- genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+ genl_dump_check_consistent(cb, hdr);
if (nfc_genl_setup_device_added(dev, msg))
goto nla_put_failure;
@@ -928,6 +928,30 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
return rc;
}
+static int nfc_genl_deactivate_target(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nfc_dev *dev;
+ u32 device_idx, target_idx;
+ int rc;
+
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+ return -EINVAL;
+
+ device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+
+ dev = nfc_get_device(device_idx);
+ if (!dev)
+ return -ENODEV;
+
+ target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]);
+
+ rc = nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP);
+
+ nfc_put_device(dev);
+ return rc;
+}
+
static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info)
{
struct nfc_dev *dev;
@@ -1332,7 +1356,7 @@ static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev,
goto nla_put_failure;
if (cb)
- genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+ genl_dump_check_consistent(cb, hdr);
if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) ||
@@ -1751,6 +1775,11 @@ static const struct genl_ops nfc_genl_ops[] = {
.doit = nfc_genl_vendor_cmd,
.policy = nfc_genl_policy,
},
+ {
+ .cmd = NFC_CMD_DEACTIVATE_TARGET,
+ .doit = nfc_genl_deactivate_target,
+ .policy = nfc_genl_policy,
+ },
};
static struct genl_family nfc_genl_family __ro_after_init = {
diff --git a/net/nsh/Kconfig b/net/nsh/Kconfig
new file mode 100644
index 000000000000..bafc3dd60c2c
--- /dev/null
+++ b/net/nsh/Kconfig
@@ -0,0 +1,9 @@
+menuconfig NET_NSH
+ tristate "Network Service Header (NSH) protocol"
+ default n
+ ---help---
+ Network Service Header is an implementation of Service Function
+ Chaining (RFC 7665). The current implementation in Linux supports
+ only MD type 1 and only with the openvswitch module.
+
+ If unsure, say N.
diff --git a/net/nsh/Makefile b/net/nsh/Makefile
new file mode 100644
index 000000000000..c93c787385ca
--- /dev/null
+++ b/net/nsh/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_NET_NSH) += nsh.o
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
new file mode 100644
index 000000000000..d7da99a0b0b8
--- /dev/null
+++ b/net/nsh/nsh.c
@@ -0,0 +1,151 @@
+/*
+ * Network Service Header
+ *
+ * Copyright (c) 2017 Red Hat, Inc. -- Jiri Benc <jbenc@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/nsh.h>
+#include <net/tun_proto.h>
+
+int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh)
+{
+ struct nshhdr *nh;
+ size_t length = nsh_hdr_len(pushed_nh);
+ u8 next_proto;
+
+ if (skb->mac_len) {
+ next_proto = TUN_P_ETHERNET;
+ } else {
+ next_proto = tun_p_from_eth_p(skb->protocol);
+ if (!next_proto)
+ return -EAFNOSUPPORT;
+ }
+
+ /* Add the NSH header */
+ if (skb_cow_head(skb, length) < 0)
+ return -ENOMEM;
+
+ skb_push(skb, length);
+ nh = (struct nshhdr *)(skb->data);
+ memcpy(nh, pushed_nh, length);
+ nh->np = next_proto;
+ skb_postpush_rcsum(skb, nh, length);
+
+ skb->protocol = htons(ETH_P_NSH);
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb_reset_mac_len(skb);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nsh_push);
+
+int nsh_pop(struct sk_buff *skb)
+{
+ struct nshhdr *nh;
+ size_t length;
+ __be16 inner_proto;
+
+ if (!pskb_may_pull(skb, NSH_BASE_HDR_LEN))
+ return -ENOMEM;
+ nh = (struct nshhdr *)(skb->data);
+ length = nsh_hdr_len(nh);
+ inner_proto = tun_p_to_eth_p(nh->np);
+ if (!pskb_may_pull(skb, length))
+ return -ENOMEM;
+
+ if (!inner_proto)
+ return -EAFNOSUPPORT;
+
+ skb_pull_rcsum(skb, length);
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb_reset_mac_len(skb);
+ skb->protocol = inner_proto;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nsh_pop);
+
+static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ unsigned int nsh_len, mac_len;
+ __be16 proto;
+ int nhoff;
+
+ skb_reset_network_header(skb);
+
+ nhoff = skb->network_header - skb->mac_header;
+ mac_len = skb->mac_len;
+
+ if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
+ goto out;
+ nsh_len = nsh_hdr_len(nsh_hdr(skb));
+ if (unlikely(!pskb_may_pull(skb, nsh_len)))
+ goto out;
+
+ proto = tun_p_to_eth_p(nsh_hdr(skb)->np);
+ if (!proto)
+ goto out;
+
+ __skb_pull(skb, nsh_len);
+
+ skb_reset_mac_header(skb);
+ skb_reset_mac_len(skb);
+ skb->protocol = proto;
+
+ features &= NETIF_F_SG;
+ segs = skb_mac_gso_segment(skb, features);
+ if (IS_ERR_OR_NULL(segs)) {
+ skb_gso_error_unwind(skb, htons(ETH_P_NSH), nsh_len,
+ skb->network_header - nhoff,
+ mac_len);
+ goto out;
+ }
+
+ for (skb = segs; skb; skb = skb->next) {
+ skb->protocol = htons(ETH_P_NSH);
+ __skb_push(skb, nsh_len);
+ skb_set_mac_header(skb, -nhoff);
+ skb->network_header = skb->mac_header + mac_len;
+ skb->mac_len = mac_len;
+ }
+
+out:
+ return segs;
+}
+
+static struct packet_offload nsh_packet_offload __read_mostly = {
+ .type = htons(ETH_P_NSH),
+ .priority = 15,
+ .callbacks = {
+ .gso_segment = nsh_gso_segment,
+ },
+};
+
+static int __init nsh_init_module(void)
+{
+ dev_add_offload(&nsh_packet_offload);
+ return 0;
+}
+
+static void __exit nsh_cleanup_module(void)
+{
+ dev_remove_offload(&nsh_packet_offload);
+}
+
+module_init(nsh_init_module);
+module_exit(nsh_cleanup_module);
+
+MODULE_AUTHOR("Jiri Benc <jbenc@redhat.com>");
+MODULE_DESCRIPTION("NSH protocol");
+MODULE_LICENSE("GPL v2");
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index ce947292ae77..2650205cdaf9 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -14,6 +14,7 @@ config OPENVSWITCH
select MPLS
select NET_MPLS_GSO
select DST_CACHE
+ select NET_NSH
---help---
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments. In addition to supporting a variety of features
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 60f809085b92..41109c326f3a 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Open vSwitch.
#
@@ -11,6 +12,7 @@ openvswitch-y := \
flow.o \
flow_netlink.o \
flow_table.o \
+ meter.o \
vport.o \
vport-internal_dev.o \
vport-netdev.o
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index e4610676299b..30a5df27116e 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -43,6 +43,7 @@
#include "flow.h"
#include "conntrack.h"
#include "vport.h"
+#include "flow_netlink.h"
struct deferred_action {
struct sk_buff *skb;
@@ -380,6 +381,38 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
return 0;
}
+static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct nshhdr *nh)
+{
+ int err;
+
+ err = nsh_push(skb, nh);
+ if (err)
+ return err;
+
+ /* safe right before invalidate_flow_key */
+ key->mac_proto = MAC_PROTO_NONE;
+ invalidate_flow_key(key);
+ return 0;
+}
+
+static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ int err;
+
+ err = nsh_pop(skb);
+ if (err)
+ return err;
+
+ /* safe right before invalidate_flow_key */
+ if (skb->protocol == htons(ETH_P_TEB))
+ key->mac_proto = MAC_PROTO_ETHERNET;
+ else
+ key->mac_proto = MAC_PROTO_NONE;
+ invalidate_flow_key(key);
+ return 0;
+}
+
static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
__be32 addr, __be32 new_addr)
{
@@ -602,6 +635,69 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
return 0;
}
+static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct nlattr *a)
+{
+ struct nshhdr *nh;
+ size_t length;
+ int err;
+ u8 flags;
+ u8 ttl;
+ int i;
+
+ struct ovs_key_nsh key;
+ struct ovs_key_nsh mask;
+
+ err = nsh_key_from_nlattr(a, &key, &mask);
+ if (err)
+ return err;
+
+ /* Make sure the NSH base header is there */
+ if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN))
+ return -ENOMEM;
+
+ nh = nsh_hdr(skb);
+ length = nsh_hdr_len(nh);
+
+ /* Make sure the whole NSH header is there */
+ err = skb_ensure_writable(skb, skb_network_offset(skb) +
+ length);
+ if (unlikely(err))
+ return err;
+
+ nh = nsh_hdr(skb);
+ skb_postpull_rcsum(skb, nh, length);
+ flags = nsh_get_flags(nh);
+ flags = OVS_MASKED(flags, key.base.flags, mask.base.flags);
+ flow_key->nsh.base.flags = flags;
+ ttl = nsh_get_ttl(nh);
+ ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl);
+ flow_key->nsh.base.ttl = ttl;
+ nsh_set_flags_and_ttl(nh, flags, ttl);
+ nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr,
+ mask.base.path_hdr);
+ flow_key->nsh.base.path_hdr = nh->path_hdr;
+ switch (nh->mdtype) {
+ case NSH_M_TYPE1:
+ for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) {
+ nh->md1.context[i] =
+ OVS_MASKED(nh->md1.context[i], key.context[i],
+ mask.context[i]);
+ }
+ memcpy(flow_key->nsh.context, nh->md1.context,
+ sizeof(nh->md1.context));
+ break;
+ case NSH_M_TYPE2:
+ memset(flow_key->nsh.context, 0,
+ sizeof(flow_key->nsh.context));
+ break;
+ default:
+ return -EINVAL;
+ }
+ skb_postpush_rcsum(skb, nh, length);
+ return 0;
+}
+
/* Must follow skb_ensure_writable() since that can move the skb data. */
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
@@ -1024,6 +1120,10 @@ static int execute_masked_set_action(struct sk_buff *skb,
get_mask(a, struct ovs_key_ethernet *));
break;
+ case OVS_KEY_ATTR_NSH:
+ err = set_nsh(skb, flow_key, a);
+ break;
+
case OVS_KEY_ATTR_IPV4:
err = set_ipv4(skb, flow_key, nla_data(a),
get_mask(a, struct ovs_key_ipv4 *));
@@ -1203,6 +1303,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return err == -EINPROGRESS ? 0 : err;
break;
+ case OVS_ACTION_ATTR_CT_CLEAR:
+ err = ovs_ct_clear(skb, key);
+ break;
+
case OVS_ACTION_ATTR_PUSH_ETH:
err = push_eth(skb, key, nla_data(a));
break;
@@ -1210,6 +1314,28 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_POP_ETH:
err = pop_eth(skb, key);
break;
+
+ case OVS_ACTION_ATTR_PUSH_NSH: {
+ u8 buffer[NSH_HDR_MAX_LEN];
+ struct nshhdr *nh = (struct nshhdr *)buffer;
+
+ err = nsh_hdr_from_nlattr(nla_data(a), nh,
+ NSH_HDR_MAX_LEN);
+ if (unlikely(err))
+ break;
+ err = push_nsh(skb, key, nh);
+ break;
+ }
+
+ case OVS_ACTION_ATTR_POP_NSH:
+ err = pop_nsh(skb, key);
+ break;
+
+ case OVS_ACTION_ATTR_METER:
+ if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
+ consume_skb(skb);
+ return 0;
+ }
}
if (unlikely(err)) {
@@ -1337,6 +1463,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
goto out;
}
+ OVS_CB(skb)->acts_origlen = acts->orig_len;
err = do_execute_actions(dp, skb, key,
acts->actions, acts->actions_len);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 03859e386b47..b27c5c6d9cab 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -579,8 +579,8 @@ static struct nf_conn *
ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
u8 l3num, struct sk_buff *skb, bool natted)
{
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
@@ -752,6 +752,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
}
}
/* Non-ICMP, fall thru to initialize if needed. */
+ /* fall through */
case IP_CT_NEW:
/* Seen it before? This can happen for loopback, retrans,
* or local packets.
@@ -1129,6 +1130,17 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
return err;
}
+int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ if (skb_nfct(skb)) {
+ nf_conntrack_put(skb_nfct(skb));
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+ ovs_ct_fill_key(skb, key);
+ }
+
+ return 0;
+}
+
static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
const struct sw_flow_key *key, bool log)
{
@@ -1180,15 +1192,13 @@ static int parse_nat(const struct nlattr *attr,
int type = nla_type(a);
if (type > OVS_NAT_ATTR_MAX) {
- OVS_NLERR(log,
- "Unknown NAT attribute (type=%d, max=%d).\n",
+ OVS_NLERR(log, "Unknown NAT attribute (type=%d, max=%d)",
type, OVS_NAT_ATTR_MAX);
return -EINVAL;
}
if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) {
- OVS_NLERR(log,
- "NAT attribute type %d has unexpected length (%d != %d).\n",
+ OVS_NLERR(log, "NAT attribute type %d has unexpected length (%d != %d)",
type, nla_len(a),
ovs_nat_attr_lens[type][ip_vers]);
return -EINVAL;
@@ -1198,9 +1208,7 @@ static int parse_nat(const struct nlattr *attr,
case OVS_NAT_ATTR_SRC:
case OVS_NAT_ATTR_DST:
if (info->nat) {
- OVS_NLERR(log,
- "Only one type of NAT may be specified.\n"
- );
+ OVS_NLERR(log, "Only one type of NAT may be specified");
return -ERANGE;
}
info->nat |= OVS_CT_NAT;
@@ -1245,13 +1253,13 @@ static int parse_nat(const struct nlattr *attr,
break;
default:
- OVS_NLERR(log, "Unknown nat attribute (%d).\n", type);
+ OVS_NLERR(log, "Unknown nat attribute (%d)", type);
return -EINVAL;
}
}
if (rem > 0) {
- OVS_NLERR(log, "NAT attribute has %d unknown bytes.\n", rem);
+ OVS_NLERR(log, "NAT attribute has %d unknown bytes", rem);
return -EINVAL;
}
if (!info->nat) {
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index bc7efd1867ab..399dfdd2c4f9 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -30,6 +30,7 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *);
int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
const struct ovs_conntrack_info *);
+int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key);
void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
int ovs_ct_put_key(const struct sw_flow_key *swkey,
@@ -73,6 +74,12 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
return -ENOTSUPP;
}
+static inline int ovs_ct_clear(struct sk_buff *skb,
+ struct sw_flow_key *key)
+{
+ return -ENOTSUPP;
+}
+
static inline void ovs_ct_fill_key(const struct sk_buff *skb,
struct sw_flow_key *key)
{
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 45fe8c8a884d..0dab33fb9844 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -55,6 +55,7 @@
#include "flow.h"
#include "flow_table.h"
#include "flow_netlink.h"
+#include "meter.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
@@ -142,35 +143,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
const struct dp_upcall_info *,
uint32_t cutlen);
-/* Must be called with rcu_read_lock. */
-static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
-{
- struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
-
- if (dev) {
- struct vport *vport = ovs_internal_dev_get_vport(dev);
- if (vport)
- return vport->dp;
- }
-
- return NULL;
-}
-
-/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
- * returned dp pointer valid.
- */
-static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
-{
- struct datapath *dp;
-
- WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
- rcu_read_lock();
- dp = get_dp_rcu(net, dp_ifindex);
- rcu_read_unlock();
-
- return dp;
-}
-
/* Must be called with rcu_read_lock or ovs_mutex. */
const char *ovs_dp_name(const struct datapath *dp)
{
@@ -203,6 +175,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
ovs_flow_tbl_destroy(&dp->table);
free_percpu(dp->stats_percpu);
kfree(dp->ports);
+ ovs_meters_exit(dp);
kfree(dp);
}
@@ -335,8 +308,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
const struct dp_upcall_info *upcall_info,
uint32_t cutlen)
{
- unsigned short gso_type = skb_shinfo(skb)->gso_type;
- struct sw_flow_key later_key;
struct sk_buff *segs, *nskb;
int err;
@@ -347,21 +318,9 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
if (segs == NULL)
return -EINVAL;
- if (gso_type & SKB_GSO_UDP) {
- /* The initial flow key extracted by ovs_flow_key_extract()
- * in this case is for a first fragment, so we need to
- * properly mark later fragments.
- */
- later_key = *key;
- later_key.ip.frag = OVS_FRAG_TYPE_LATER;
- }
-
/* Queue all of the segments. */
skb = segs;
do {
- if (gso_type & SKB_GSO_UDP && skb != segs)
- key = &later_key;
-
err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
if (err)
break;
@@ -381,7 +340,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
}
static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
- unsigned int hdrlen)
+ unsigned int hdrlen, int actions_attrlen)
{
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
@@ -398,7 +357,7 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
/* OVS_PACKET_ATTR_ACTIONS */
if (upcall_info->actions_len)
- size += nla_total_size(upcall_info->actions_len);
+ size += nla_total_size(actions_attrlen);
/* OVS_PACKET_ATTR_MRU */
if (upcall_info->mru)
@@ -465,7 +424,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
else
hlen = skb->len;
- len = upcall_msg_size(upcall_info, hlen - cutlen);
+ len = upcall_msg_size(upcall_info, hlen - cutlen,
+ OVS_CB(skb)->acts_origlen);
user_skb = genlmsg_new(len, GFP_ATOMIC);
if (!user_skb) {
err = -ENOMEM;
@@ -1125,7 +1085,8 @@ static int ovs_nla_init_match_and_action(struct net *net,
if (!a[OVS_FLOW_ATTR_KEY]) {
OVS_NLERR(log,
"Flow key attribute not present in set flow.");
- return -EINVAL;
+ error = -EINVAL;
+ goto error;
}
*acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
@@ -1613,6 +1574,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&dp->ports[i]);
+ err = ovs_meters_init(dp);
+ if (err)
+ goto err_destroy_ports_array;
+
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
parms.type = OVS_VPORT_TYPE_INTERNAL;
@@ -1641,7 +1606,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_reset_user_features(skb, info);
}
- goto err_destroy_ports_array;
+ goto err_destroy_meters;
}
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
@@ -1656,8 +1621,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
-err_destroy_ports_array:
+err_destroy_meters:
ovs_unlock();
+ ovs_meters_exit(dp);
+err_destroy_ports_array:
kfree(dp->ports);
err_destroy_percpu:
free_percpu(dp->stats_percpu);
@@ -1860,7 +1827,8 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = {
/* Called with ovs_mutex or RCU read lock. */
static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
- u32 portid, u32 seq, u32 flags, u8 cmd)
+ struct net *net, u32 portid, u32 seq,
+ u32 flags, u8 cmd)
{
struct ovs_header *ovs_header;
struct ovs_vport_stats vport_stats;
@@ -1876,9 +1844,17 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
nla_put_string(skb, OVS_VPORT_ATTR_NAME,
- ovs_vport_name(vport)))
+ ovs_vport_name(vport)) ||
+ nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
goto nla_put_failure;
+ if (!net_eq(net, dev_net(vport->dev))) {
+ int id = peernet2id_alloc(net, dev_net(vport->dev));
+
+ if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
+ goto nla_put_failure;
+ }
+
ovs_vport_get_stats(vport, &vport_stats);
if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
sizeof(struct ovs_vport_stats), &vport_stats,
@@ -1908,8 +1884,8 @@ static struct sk_buff *ovs_vport_cmd_alloc_info(void)
}
/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
-struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
- u32 seq, u8 cmd)
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
+ u32 portid, u32 seq, u8 cmd)
{
struct sk_buff *skb;
int retval;
@@ -1918,7 +1894,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
if (!skb)
return ERR_PTR(-ENOMEM);
- retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
+ retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
BUG_ON(retval < 0);
return skb;
@@ -1932,6 +1908,8 @@ static struct vport *lookup_vport(struct net *net,
struct datapath *dp;
struct vport *vport;
+ if (a[OVS_VPORT_ATTR_IFINDEX])
+ return ERR_PTR(-EOPNOTSUPP);
if (a[OVS_VPORT_ATTR_NAME]) {
vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
if (!vport)
@@ -1956,6 +1934,7 @@ static struct vport *lookup_vport(struct net *net,
return vport;
} else
return ERR_PTR(-EINVAL);
+
}
/* Called with ovs_mutex */
@@ -1995,6 +1974,8 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
!a[OVS_VPORT_ATTR_UPCALL_PID])
return -EINVAL;
+ if (a[OVS_VPORT_ATTR_IFINDEX])
+ return -EOPNOTSUPP;
port_no = a[OVS_VPORT_ATTR_PORT_NO]
? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
@@ -2044,8 +2025,9 @@ restart:
goto exit_unlock_free;
}
- err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
- info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+ err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+ info->snd_portid, info->snd_seq, 0,
+ OVS_VPORT_CMD_NEW);
if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
update_headroom(dp);
@@ -2102,8 +2084,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock_free;
}
- err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
- info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+ err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+ info->snd_portid, info->snd_seq, 0,
+ OVS_VPORT_CMD_NEW);
BUG_ON(err < 0);
ovs_unlock();
@@ -2140,8 +2123,9 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock_free;
}
- err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
- info->snd_seq, 0, OVS_VPORT_CMD_DEL);
+ err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+ info->snd_portid, info->snd_seq, 0,
+ OVS_VPORT_CMD_DEL);
BUG_ON(err < 0);
/* the vport deletion may trigger dp headroom update */
@@ -2181,8 +2165,9 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock_free;
- err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
- info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+ err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+ info->snd_portid, info->snd_seq, 0,
+ OVS_VPORT_CMD_NEW);
BUG_ON(err < 0);
rcu_read_unlock();
@@ -2214,6 +2199,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
if (j >= skip &&
ovs_vport_cmd_fill_info(vport, skb,
+ sock_net(skb->sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI,
@@ -2240,6 +2226,8 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
+ [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
};
static const struct genl_ops dp_vport_genl_ops[] = {
@@ -2285,6 +2273,7 @@ static struct genl_family * const dp_genl_families[] = {
&dp_vport_genl_family,
&dp_flow_genl_family,
&dp_packet_genl_family,
+ &dp_meter_genl_family,
};
static void dp_unregister_genl(int n_families)
@@ -2465,3 +2454,4 @@ MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 5d8dcd88815f..523d65526766 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -30,6 +30,8 @@
#include "conntrack.h"
#include "flow.h"
#include "flow_table.h"
+#include "meter.h"
+#include "vport-internal_dev.h"
#define DP_MAX_PORTS USHRT_MAX
#define DP_VPORT_HASH_BUCKETS 1024
@@ -91,6 +93,9 @@ struct datapath {
u32 user_features;
u32 max_headroom;
+
+ /* Switch meters. */
+ struct hlist_head *meters;
};
/**
@@ -99,11 +104,13 @@ struct datapath {
* when a packet is received by OVS.
* @mru: The maximum received fragement size; 0 if the packet is not
* fragmented.
+ * @acts_origlen: The netlink size of the flow actions applied to this skb.
* @cutlen: The number of bytes from the packet end to be removed.
*/
struct ovs_skb_cb {
struct vport *input_vport;
u16 mru;
+ u16 acts_origlen;
u32 cutlen;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
@@ -188,6 +195,36 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n
return ovs_lookup_vport(dp, port_no);
}
+/* Must be called with rcu_read_lock. */
+static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
+{
+ struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
+
+ if (dev) {
+ struct vport *vport = ovs_internal_dev_get_vport(dev);
+
+ if (vport)
+ return vport->dp;
+ }
+
+ return NULL;
+}
+
+/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
+ * returned dp pointer valid.
+ */
+static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
+{
+ struct datapath *dp;
+
+ WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
+ rcu_read_lock();
+ dp = get_dp_rcu(net, dp_ifindex);
+ rcu_read_unlock();
+
+ return dp;
+}
+
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_family dp_vport_genl_family;
@@ -198,8 +235,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *,
uint32_t cutlen);
const char *ovs_dp_name(const struct datapath *dp);
-struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
- u8 cmd);
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
+ u32 portid, u32 seq, u8 cmd);
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_actions *, struct sw_flow_key *);
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 653d073bae45..f3ee2f2825c0 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -30,8 +30,8 @@ static void dp_detach_port_notify(struct vport *vport)
struct datapath *dp;
dp = vport->dp;
- notify = ovs_vport_cmd_build_info(vport, 0, 0,
- OVS_VPORT_CMD_DEL);
+ notify = ovs_vport_cmd_build_info(vport, ovs_dp_get_net(dp),
+ 0, 0, OVS_VPORT_CMD_DEL);
ovs_dp_detach_port(vport);
if (IS_ERR(notify)) {
genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 3f76cb765e5b..864ddb1e3642 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -46,6 +46,7 @@
#include <net/ipv6.h>
#include <net/mpls.h>
#include <net/ndisc.h>
+#include <net/nsh.h>
#include "conntrack.h"
#include "datapath.h"
@@ -72,8 +73,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
const struct sk_buff *skb)
{
struct flow_stats *stats;
- int node = numa_node_id();
- int cpu = smp_processor_id();
+ unsigned int cpu = smp_processor_id();
int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
stats = rcu_dereference(flow->stats[cpu]);
@@ -108,7 +108,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
__GFP_THISNODE |
__GFP_NOWARN |
__GFP_NOMEMALLOC,
- node);
+ numa_node_id());
if (likely(new_stats)) {
new_stats->used = jiffies;
new_stats->packet_count = 1;
@@ -118,6 +118,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
rcu_assign_pointer(flow->stats[cpu],
new_stats);
+ cpumask_set_cpu(cpu, &flow->cpu_used_mask);
goto unlock;
}
}
@@ -145,7 +146,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
memset(ovs_stats, 0, sizeof(*ovs_stats));
/* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+ for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
if (stats) {
@@ -169,7 +170,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
int cpu;
/* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+ for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
if (stats) {
@@ -490,6 +491,52 @@ invalid:
return 0;
}
+static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ struct nshhdr *nh;
+ unsigned int nh_ofs = skb_network_offset(skb);
+ u8 version, length;
+ int err;
+
+ err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN);
+ if (unlikely(err))
+ return err;
+
+ nh = nsh_hdr(skb);
+ version = nsh_get_ver(nh);
+ length = nsh_hdr_len(nh);
+
+ if (version != 0)
+ return -EINVAL;
+
+ err = check_header(skb, nh_ofs + length);
+ if (unlikely(err))
+ return err;
+
+ nh = nsh_hdr(skb);
+ key->nsh.base.flags = nsh_get_flags(nh);
+ key->nsh.base.ttl = nsh_get_ttl(nh);
+ key->nsh.base.mdtype = nh->mdtype;
+ key->nsh.base.np = nh->np;
+ key->nsh.base.path_hdr = nh->path_hdr;
+ switch (key->nsh.base.mdtype) {
+ case NSH_M_TYPE1:
+ if (length != NSH_M_TYPE1_LEN)
+ return -EINVAL;
+ memcpy(key->nsh.context, nh->md1.context,
+ sizeof(nh->md1));
+ break;
+ case NSH_M_TYPE2:
+ memset(key->nsh.context, 0,
+ sizeof(nh->md1));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/**
* key_extract - extracts a flow key from an Ethernet frame.
* @skb: sk_buff that contains the frame, with skb->data pointing to the
@@ -584,8 +631,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
key->ip.frag = OVS_FRAG_TYPE_LATER;
return 0;
}
- if (nh->frag_off & htons(IP_MF) ||
- skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ if (nh->frag_off & htons(IP_MF))
key->ip.frag = OVS_FRAG_TYPE_FIRST;
else
key->ip.frag = OVS_FRAG_TYPE_NONE;
@@ -701,9 +747,6 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
if (key->ip.frag == OVS_FRAG_TYPE_LATER)
return 0;
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
- key->ip.frag = OVS_FRAG_TYPE_FIRST;
-
/* Transport layer. */
if (key->ip.proto == NEXTHDR_TCP) {
if (tcphdr_ok(skb)) {
@@ -739,6 +782,10 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
memset(&key->tp, 0, sizeof(key->tp));
}
}
+ } else if (key->eth.type == htons(ETH_P_NSH)) {
+ error = parse_nsh(skb, key);
+ if (error)
+ return error;
}
return 0;
}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a9bc1c875965..c670dd24b8b7 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -31,9 +31,11 @@
#include <linux/jiffies.h>
#include <linux/time.h>
#include <linux/flex_array.h>
+#include <linux/cpumask.h>
#include <net/inet_ecn.h>
#include <net/ip_tunnels.h>
#include <net/dst_metadata.h>
+#include <net/nsh.h>
struct sk_buff;
@@ -65,6 +67,11 @@ struct vlan_head {
(offsetof(struct sw_flow_key, recirc_id) + \
FIELD_SIZEOF(struct sw_flow_key, recirc_id))
+struct ovs_key_nsh {
+ struct ovs_nsh_key_base base;
+ __be32 context[NSH_MD1_CONTEXT_SIZE];
+};
+
struct sw_flow_key {
u8 tun_opts[IP_TUNNEL_OPTS_MAX];
u8 tun_opts_len;
@@ -142,6 +149,7 @@ struct sw_flow_key {
} nd;
};
} ipv6;
+ struct ovs_key_nsh nsh; /* network service header */
};
struct {
/* Connection tracking fields not packed above. */
@@ -219,6 +227,7 @@ struct sw_flow {
*/
struct sw_flow_key key;
struct sw_flow_id id;
+ struct cpumask cpu_used_mask;
struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
struct flow_stats __rcu *stats[]; /* One for each CPU. First one
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index f07d10ac35d8..dc424798ba6f 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,8 @@
#include <net/ndisc.h>
#include <net/mpls.h>
#include <net/vxlan.h>
+#include <net/tun_proto.h>
+#include <net/erspan.h>
#include "flow_netlink.h"
@@ -75,16 +77,20 @@ static bool actions_may_change_flow(const struct nlattr *actions)
break;
case OVS_ACTION_ATTR_CT:
+ case OVS_ACTION_ATTR_CT_CLEAR:
case OVS_ACTION_ATTR_HASH:
case OVS_ACTION_ATTR_POP_ETH:
case OVS_ACTION_ATTR_POP_MPLS:
+ case OVS_ACTION_ATTR_POP_NSH:
case OVS_ACTION_ATTR_POP_VLAN:
case OVS_ACTION_ATTR_PUSH_ETH:
case OVS_ACTION_ATTR_PUSH_MPLS:
+ case OVS_ACTION_ATTR_PUSH_NSH:
case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_SAMPLE:
case OVS_ACTION_ATTR_SET:
case OVS_ACTION_ATTR_SET_MASKED:
+ case OVS_ACTION_ATTR_METER:
default:
return true;
}
@@ -173,7 +179,8 @@ static bool match_validate(const struct sw_flow_match *match,
| (1 << OVS_KEY_ATTR_ICMPV6)
| (1 << OVS_KEY_ATTR_ARP)
| (1 << OVS_KEY_ATTR_ND)
- | (1 << OVS_KEY_ATTR_MPLS));
+ | (1 << OVS_KEY_ATTR_MPLS)
+ | (1 << OVS_KEY_ATTR_NSH));
/* Always allowed mask fields. */
mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
@@ -282,6 +289,14 @@ static bool match_validate(const struct sw_flow_match *match,
}
}
+ if (match->key->eth.type == htons(ETH_P_NSH)) {
+ key_expected |= 1 << OVS_KEY_ATTR_NSH;
+ if (match->mask &&
+ match->mask->key.eth.type == htons(0xffff)) {
+ mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
+ }
+ }
+
if ((key_attrs & key_expected) != key_expected) {
/* Key attributes check failed. */
OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
@@ -319,7 +334,21 @@ size_t ovs_tun_key_attr_size(void)
* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
*/
+ nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
- + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
+ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_DST */
+ + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
+}
+
+static size_t ovs_nsh_key_attr_size(void)
+{
+ /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
+ * updating this function.
+ */
+ return nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
+ /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
+ * mutually exclusive, so the bigger one can cover
+ * the small one.
+ */
+ + nla_total_size(NSH_CTX_HDRS_MAX_LEN);
}
size_t ovs_key_attr_size(void)
@@ -327,7 +356,7 @@ size_t ovs_key_attr_size(void)
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function.
*/
- BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28);
+ BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
@@ -341,6 +370,8 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
+ nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */
+ nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
+ + nla_total_size(0) /* OVS_KEY_ATTR_NSH */
+ + ovs_nsh_key_attr_size()
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
@@ -371,6 +402,14 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
.next = ovs_vxlan_ext_key_lens },
[OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
[OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+ [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) },
+};
+
+static const struct ovs_len_tbl
+ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
+ [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
+ [OVS_NSH_KEY_ATTR_MD1] = { .len = sizeof(struct ovs_nsh_key_md1) },
+ [OVS_NSH_KEY_ATTR_MD2] = { .len = OVS_ATTR_VARIABLE },
};
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
@@ -405,6 +444,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
.len = sizeof(struct ovs_key_ct_tuple_ipv4) },
[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
.len = sizeof(struct ovs_key_ct_tuple_ipv6) },
+ [OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED,
+ .next = ovs_nsh_key_attr_lens, },
};
static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
@@ -593,6 +634,33 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
return 0;
}
+static int erspan_tun_opt_from_nlattr(const struct nlattr *attr,
+ struct sw_flow_match *match, bool is_mask,
+ bool log)
+{
+ unsigned long opt_key_offset;
+ struct erspan_metadata opts;
+
+ BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
+
+ memset(&opts, 0, sizeof(opts));
+ opts.index = nla_get_be32(attr);
+
+ /* Index has only 20-bit */
+ if (ntohl(opts.index) & ~INDEX_MASK) {
+ OVS_NLERR(log, "ERSPAN index number %x too large.",
+ ntohl(opts.index));
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask);
+ opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
+ is_mask);
+
+ return 0;
+}
+
static int ip_tun_from_nlattr(const struct nlattr *attr,
struct sw_flow_match *match, bool is_mask,
bool log)
@@ -700,6 +768,19 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
break;
case OVS_TUNNEL_KEY_ATTR_PAD:
break;
+ case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+ if (opts_type) {
+ OVS_NLERR(log, "Multiple metadata blocks provided");
+ return -EINVAL;
+ }
+
+ err = erspan_tun_opt_from_nlattr(a, match, is_mask, log);
+ if (err)
+ return err;
+
+ tun_flags |= TUNNEL_ERSPAN_OPT;
+ opts_type = type;
+ break;
default:
OVS_NLERR(log, "Unknown IP tunnel attribute %d",
type);
@@ -824,6 +905,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
return -EMSGSIZE;
+ else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
+ nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
+ ((struct erspan_metadata *)tun_opts)->index))
+ return -EMSGSIZE;
}
return 0;
@@ -1179,6 +1264,221 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
return 0;
}
+int nsh_hdr_from_nlattr(const struct nlattr *attr,
+ struct nshhdr *nh, size_t size)
+{
+ struct nlattr *a;
+ int rem;
+ u8 flags = 0;
+ u8 ttl = 0;
+ int mdlen = 0;
+
+ /* validate_nsh has check this, so we needn't do duplicate check here
+ */
+ if (size < NSH_BASE_HDR_LEN)
+ return -ENOBUFS;
+
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+
+ switch (type) {
+ case OVS_NSH_KEY_ATTR_BASE: {
+ const struct ovs_nsh_key_base *base = nla_data(a);
+
+ flags = base->flags;
+ ttl = base->ttl;
+ nh->np = base->np;
+ nh->mdtype = base->mdtype;
+ nh->path_hdr = base->path_hdr;
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD1:
+ mdlen = nla_len(a);
+ if (mdlen > size - NSH_BASE_HDR_LEN)
+ return -ENOBUFS;
+ memcpy(&nh->md1, nla_data(a), mdlen);
+ break;
+
+ case OVS_NSH_KEY_ATTR_MD2:
+ mdlen = nla_len(a);
+ if (mdlen > size - NSH_BASE_HDR_LEN)
+ return -ENOBUFS;
+ memcpy(&nh->md2, nla_data(a), mdlen);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ }
+
+ /* nsh header length = NSH_BASE_HDR_LEN + mdlen */
+ nh->ver_flags_ttl_len = 0;
+ nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
+
+ return 0;
+}
+
+int nsh_key_from_nlattr(const struct nlattr *attr,
+ struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
+{
+ struct nlattr *a;
+ int rem;
+
+ /* validate_nsh has check this, so we needn't do duplicate check here
+ */
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+
+ switch (type) {
+ case OVS_NSH_KEY_ATTR_BASE: {
+ const struct ovs_nsh_key_base *base = nla_data(a);
+ const struct ovs_nsh_key_base *base_mask = base + 1;
+
+ nsh->base = *base;
+ nsh_mask->base = *base_mask;
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD1: {
+ const struct ovs_nsh_key_md1 *md1 = nla_data(a);
+ const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
+
+ memcpy(nsh->context, md1->context, sizeof(*md1));
+ memcpy(nsh_mask->context, md1_mask->context,
+ sizeof(*md1_mask));
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD2:
+ /* Not supported yet */
+ return -ENOTSUPP;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int nsh_key_put_from_nlattr(const struct nlattr *attr,
+ struct sw_flow_match *match, bool is_mask,
+ bool is_push_nsh, bool log)
+{
+ struct nlattr *a;
+ int rem;
+ bool has_base = false;
+ bool has_md1 = false;
+ bool has_md2 = false;
+ u8 mdtype = 0;
+ int mdlen = 0;
+
+ if (WARN_ON(is_push_nsh && is_mask))
+ return -EINVAL;
+
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+ int i;
+
+ if (type > OVS_NSH_KEY_ATTR_MAX) {
+ OVS_NLERR(log, "nsh attr %d is out of range max %d",
+ type, OVS_NSH_KEY_ATTR_MAX);
+ return -EINVAL;
+ }
+
+ if (!check_attr_len(nla_len(a),
+ ovs_nsh_key_attr_lens[type].len)) {
+ OVS_NLERR(
+ log,
+ "nsh attr %d has unexpected len %d expected %d",
+ type,
+ nla_len(a),
+ ovs_nsh_key_attr_lens[type].len
+ );
+ return -EINVAL;
+ }
+
+ switch (type) {
+ case OVS_NSH_KEY_ATTR_BASE: {
+ const struct ovs_nsh_key_base *base = nla_data(a);
+
+ has_base = true;
+ mdtype = base->mdtype;
+ SW_FLOW_KEY_PUT(match, nsh.base.flags,
+ base->flags, is_mask);
+ SW_FLOW_KEY_PUT(match, nsh.base.ttl,
+ base->ttl, is_mask);
+ SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
+ base->mdtype, is_mask);
+ SW_FLOW_KEY_PUT(match, nsh.base.np,
+ base->np, is_mask);
+ SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
+ base->path_hdr, is_mask);
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD1: {
+ const struct ovs_nsh_key_md1 *md1 = nla_data(a);
+
+ has_md1 = true;
+ for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
+ SW_FLOW_KEY_PUT(match, nsh.context[i],
+ md1->context[i], is_mask);
+ break;
+ }
+ case OVS_NSH_KEY_ATTR_MD2:
+ if (!is_push_nsh) /* Not supported MD type 2 yet */
+ return -ENOTSUPP;
+
+ has_md2 = true;
+ mdlen = nla_len(a);
+ if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
+ OVS_NLERR(
+ log,
+ "Invalid MD length %d for MD type %d",
+ mdlen,
+ mdtype
+ );
+ return -EINVAL;
+ }
+ break;
+ default:
+ OVS_NLERR(log, "Unknown nsh attribute %d",
+ type);
+ return -EINVAL;
+ }
+ }
+
+ if (rem > 0) {
+ OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
+ return -EINVAL;
+ }
+
+ if (has_md1 && has_md2) {
+ OVS_NLERR(
+ 1,
+ "invalid nsh attribute: md1 and md2 are exclusive."
+ );
+ return -EINVAL;
+ }
+
+ if (!is_mask) {
+ if ((has_md1 && mdtype != NSH_M_TYPE1) ||
+ (has_md2 && mdtype != NSH_M_TYPE2)) {
+ OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
+ mdtype);
+ return -EINVAL;
+ }
+
+ if (is_push_nsh &&
+ (!has_base || (!has_md1 && !has_md2))) {
+ OVS_NLERR(
+ 1,
+ "push_nsh: missing base or metadata attributes"
+ );
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
u64 attrs, const struct nlattr **a,
bool is_mask, bool log)
@@ -1255,7 +1555,7 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
}
if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
- OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n",
+ OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)",
ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
return -EINVAL;
}
@@ -1306,6 +1606,13 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
attrs &= ~(1 << OVS_KEY_ATTR_ARP);
}
+ if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
+ if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
+ is_mask, false, log) < 0)
+ return -EINVAL;
+ attrs &= ~(1 << OVS_KEY_ATTR_NSH);
+ }
+
if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
const struct ovs_key_mpls *mpls_key;
@@ -1622,6 +1929,34 @@ static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
return 0;
}
+static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
+ struct sk_buff *skb)
+{
+ struct nlattr *start;
+
+ start = nla_nest_start(skb, OVS_KEY_ATTR_NSH);
+ if (!start)
+ return -EMSGSIZE;
+
+ if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
+ goto nla_put_failure;
+
+ if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
+ if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
+ sizeof(nsh->context), nsh->context))
+ goto nla_put_failure;
+ }
+
+ /* Don't support MD type 2 yet */
+
+ nla_nest_end(skb, start);
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output, bool is_mask,
struct sk_buff *skb)
@@ -1750,6 +2085,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
ipv6_key->ipv6_tclass = output->ip.tos;
ipv6_key->ipv6_hlimit = output->ip.ttl;
ipv6_key->ipv6_frag = output->ip.frag;
+ } else if (swkey->eth.type == htons(ETH_P_NSH)) {
+ if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
+ goto nla_put_failure;
} else if (swkey->eth.type == htons(ETH_P_ARP) ||
swkey->eth.type == htons(ETH_P_RARP)) {
struct ovs_key_arp *arp_key;
@@ -2195,6 +2533,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
break;
+ case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+ break;
}
};
@@ -2242,6 +2582,19 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
return err;
}
+static bool validate_nsh(const struct nlattr *attr, bool is_mask,
+ bool is_push_nsh, bool log)
+{
+ struct sw_flow_match match;
+ struct sw_flow_key key;
+ int ret = 0;
+
+ ovs_match_init(&match, &key, true, NULL);
+ ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
+ is_push_nsh, log);
+ return !ret;
+}
+
/* Return false if there are any non-masked bits set.
* Mask follows data immediately, before any netlink padding.
*/
@@ -2384,6 +2737,13 @@ static int validate_set(const struct nlattr *a,
break;
+ case OVS_KEY_ATTR_NSH:
+ if (eth_type != htons(ETH_P_NSH))
+ return -EINVAL;
+ if (!validate_nsh(nla_data(a), masked, false, log))
+ return -EINVAL;
+ break;
+
default:
return -EINVAL;
}
@@ -2479,9 +2839,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
[OVS_ACTION_ATTR_CT] = (u32)-1,
+ [OVS_ACTION_ATTR_CT_CLEAR] = 0,
[OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
[OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
[OVS_ACTION_ATTR_POP_ETH] = 0,
+ [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
+ [OVS_ACTION_ATTR_POP_NSH] = 0,
+ [OVS_ACTION_ATTR_METER] = sizeof(u32),
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -2620,6 +2984,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
skip_copy = true;
break;
+ case OVS_ACTION_ATTR_CT_CLEAR:
+ break;
+
case OVS_ACTION_ATTR_PUSH_ETH:
/* Disallow pushing an Ethernet header if one
* is already present */
@@ -2636,6 +3003,38 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
mac_proto = MAC_PROTO_ETHERNET;
break;
+ case OVS_ACTION_ATTR_PUSH_NSH:
+ if (mac_proto != MAC_PROTO_ETHERNET) {
+ u8 next_proto;
+
+ next_proto = tun_p_from_eth_p(eth_type);
+ if (!next_proto)
+ return -EINVAL;
+ }
+ mac_proto = MAC_PROTO_NONE;
+ if (!validate_nsh(nla_data(a), false, true, true))
+ return -EINVAL;
+ break;
+
+ case OVS_ACTION_ATTR_POP_NSH: {
+ __be16 inner_proto;
+
+ if (eth_type != htons(ETH_P_NSH))
+ return -EINVAL;
+ inner_proto = tun_p_to_eth_p(key->nsh.base.np);
+ if (!inner_proto)
+ return -EINVAL;
+ if (key->nsh.base.np == TUN_P_ETHERNET)
+ mac_proto = MAC_PROTO_ETHERNET;
+ else
+ mac_proto = MAC_PROTO_NONE;
+ break;
+ }
+
+ case OVS_ACTION_ATTR_METER:
+ /* Non-existent meters are simply ignored. */
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 929c665ac3aa..6657606b2b47 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -79,4 +79,9 @@ int ovs_nla_put_actions(const struct nlattr *attr,
void ovs_nla_free_flow_actions(struct sw_flow_actions *);
void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *);
+int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh,
+ struct ovs_key_nsh *nsh_mask);
+int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh,
+ size_t size);
+
#endif /* flow_netlink.h */
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index ea7a8073fa02..80ea2a71852e 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -98,6 +98,8 @@ struct sw_flow *ovs_flow_alloc(void)
RCU_INIT_POINTER(flow->stats[0], stats);
+ cpumask_set_cpu(0, &flow->cpu_used_mask);
+
return flow;
err:
kmem_cache_free(flow_cache, flow);
@@ -141,7 +143,7 @@ static void flow_free(struct sw_flow *flow)
if (flow->sf_acts)
ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts);
/* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask))
+ for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
if (flow->stats[cpu])
kmem_cache_free(flow_stats_cache,
(struct flow_stats __force *)flow->stats[cpu]);
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
new file mode 100644
index 000000000000..3fbfc78991ac
--- /dev/null
+++ b/net/openvswitch/meter.c
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2017 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/openvswitch.h>
+#include <linux/netlink.h>
+#include <linux/rculist.h>
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "datapath.h"
+#include "meter.h"
+
+#define METER_HASH_BUCKETS 1024
+
+static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
+ [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
+ [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
+ [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
+ [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
+ [OVS_METER_ATTR_USED] = { .type = NLA_U64 },
+ [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
+ [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
+ [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
+ [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
+ [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
+ [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
+ [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
+};
+
+static void ovs_meter_free(struct dp_meter *meter)
+{
+ if (!meter)
+ return;
+
+ kfree_rcu(meter, rcu);
+}
+
+static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
+ u32 meter_id)
+{
+ return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
+}
+
+/* Call with ovs_mutex or RCU read lock. */
+static struct dp_meter *lookup_meter(const struct datapath *dp,
+ u32 meter_id)
+{
+ struct dp_meter *meter;
+ struct hlist_head *head;
+
+ head = meter_hash_bucket(dp, meter_id);
+ hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
+ if (meter->id == meter_id)
+ return meter;
+ }
+ return NULL;
+}
+
+static void attach_meter(struct datapath *dp, struct dp_meter *meter)
+{
+ struct hlist_head *head = meter_hash_bucket(dp, meter->id);
+
+ hlist_add_head_rcu(&meter->dp_hash_node, head);
+}
+
+static void detach_meter(struct dp_meter *meter)
+{
+ ASSERT_OVSL();
+ if (meter)
+ hlist_del_rcu(&meter->dp_hash_node);
+}
+
+static struct sk_buff *
+ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
+ struct ovs_header **ovs_reply_header)
+{
+ struct sk_buff *skb;
+ struct ovs_header *ovs_header = info->userhdr;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ *ovs_reply_header = genlmsg_put(skb, info->snd_portid,
+ info->snd_seq,
+ &dp_meter_genl_family, 0, cmd);
+ if (!*ovs_reply_header) {
+ nlmsg_free(skb);
+ return ERR_PTR(-EMSGSIZE);
+ }
+ (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
+
+ return skb;
+}
+
+static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
+ struct dp_meter *meter)
+{
+ struct nlattr *nla;
+ struct dp_meter_band *band;
+ u16 i;
+
+ if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
+ goto error;
+
+ if (!meter)
+ return 0;
+
+ if (nla_put(reply, OVS_METER_ATTR_STATS,
+ sizeof(struct ovs_flow_stats), &meter->stats) ||
+ nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
+ OVS_METER_ATTR_PAD))
+ goto error;
+
+ nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
+ if (!nla)
+ goto error;
+
+ band = meter->bands;
+
+ for (i = 0; i < meter->n_bands; ++i, ++band) {
+ struct nlattr *band_nla;
+
+ band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
+ if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
+ sizeof(struct ovs_flow_stats),
+ &band->stats))
+ goto error;
+ nla_nest_end(reply, band_nla);
+ }
+ nla_nest_end(reply, nla);
+
+ return 0;
+error:
+ return -EMSGSIZE;
+}
+
+static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *reply;
+ struct ovs_header *ovs_reply_header;
+ struct nlattr *nla, *band_nla;
+ int err;
+
+ reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
+ &ovs_reply_header);
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+
+ if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
+ nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
+ goto nla_put_failure;
+
+ nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
+ if (!nla)
+ goto nla_put_failure;
+
+ band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
+ if (!band_nla)
+ goto nla_put_failure;
+ /* Currently only DROP band type is supported. */
+ if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
+ goto nla_put_failure;
+ nla_nest_end(reply, band_nla);
+ nla_nest_end(reply, nla);
+
+ genlmsg_end(reply, ovs_reply_header);
+ return genlmsg_reply(reply, info);
+
+nla_put_failure:
+ nlmsg_free(reply);
+ err = -EMSGSIZE;
+ return err;
+}
+
+static struct dp_meter *dp_meter_create(struct nlattr **a)
+{
+ struct nlattr *nla;
+ int rem;
+ u16 n_bands = 0;
+ struct dp_meter *meter;
+ struct dp_meter_band *band;
+ int err;
+
+ /* Validate attributes, count the bands. */
+ if (!a[OVS_METER_ATTR_BANDS])
+ return ERR_PTR(-EINVAL);
+
+ nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
+ if (++n_bands > DP_MAX_BANDS)
+ return ERR_PTR(-EINVAL);
+
+ /* Allocate and set up the meter before locking anything. */
+ meter = kzalloc(n_bands * sizeof(struct dp_meter_band) +
+ sizeof(*meter), GFP_KERNEL);
+ if (!meter)
+ return ERR_PTR(-ENOMEM);
+
+ meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
+ meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
+ meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
+ spin_lock_init(&meter->lock);
+ if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
+ meter->stats = *(struct ovs_flow_stats *)
+ nla_data(a[OVS_METER_ATTR_STATS]);
+ }
+ meter->n_bands = n_bands;
+
+ /* Set up meter bands. */
+ band = meter->bands;
+ nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
+ struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
+ u32 band_max_delta_t;
+
+ err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX,
+ nla_data(nla), nla_len(nla), band_policy,
+ NULL);
+ if (err)
+ goto exit_free_meter;
+
+ if (!attr[OVS_BAND_ATTR_TYPE] ||
+ !attr[OVS_BAND_ATTR_RATE] ||
+ !attr[OVS_BAND_ATTR_BURST]) {
+ err = -EINVAL;
+ goto exit_free_meter;
+ }
+
+ band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
+ band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
+ band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
+ /* Figure out max delta_t that is enough to fill any bucket.
+ * Keep max_delta_t size to the bucket units:
+ * pkts => 1/1000 packets, kilobits => bits.
+ */
+ band_max_delta_t = (band->burst_size + band->rate) * 1000;
+ /* Start with a full bucket. */
+ band->bucket = band_max_delta_t;
+ if (band_max_delta_t > meter->max_delta_t)
+ meter->max_delta_t = band_max_delta_t;
+ band++;
+ }
+
+ return meter;
+
+exit_free_meter:
+ kfree(meter);
+ return ERR_PTR(err);
+}
+
+static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ struct dp_meter *meter, *old_meter;
+ struct sk_buff *reply;
+ struct ovs_header *ovs_reply_header;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct datapath *dp;
+ int err;
+ u32 meter_id;
+ bool failed;
+
+ meter = dp_meter_create(a);
+ if (IS_ERR_OR_NULL(meter))
+ return PTR_ERR(meter);
+
+ reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
+ &ovs_reply_header);
+ if (IS_ERR(reply)) {
+ err = PTR_ERR(reply);
+ goto exit_free_meter;
+ }
+
+ ovs_lock();
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ if (!dp) {
+ err = -ENODEV;
+ goto exit_unlock;
+ }
+
+ if (!a[OVS_METER_ATTR_ID]) {
+ err = -ENODEV;
+ goto exit_unlock;
+ }
+
+ meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+
+ /* Cannot fail after this. */
+ old_meter = lookup_meter(dp, meter_id);
+ detach_meter(old_meter);
+ attach_meter(dp, meter);
+ ovs_unlock();
+
+ /* Build response with the meter_id and stats from
+ * the old meter, if any.
+ */
+ failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
+ WARN_ON(failed);
+ if (old_meter) {
+ spin_lock_bh(&old_meter->lock);
+ if (old_meter->keep_stats) {
+ err = ovs_meter_cmd_reply_stats(reply, meter_id,
+ old_meter);
+ WARN_ON(err);
+ }
+ spin_unlock_bh(&old_meter->lock);
+ ovs_meter_free(old_meter);
+ }
+
+ genlmsg_end(reply, ovs_reply_header);
+ return genlmsg_reply(reply, info);
+
+exit_unlock:
+ ovs_unlock();
+ nlmsg_free(reply);
+exit_free_meter:
+ kfree(meter);
+ return err;
+}
+
+static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ u32 meter_id;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_reply_header;
+ struct datapath *dp;
+ int err;
+ struct sk_buff *reply;
+ struct dp_meter *meter;
+
+ if (!a[OVS_METER_ATTR_ID])
+ return -EINVAL;
+
+ meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+
+ reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
+ &ovs_reply_header);
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+
+ ovs_lock();
+
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ if (!dp) {
+ err = -ENODEV;
+ goto exit_unlock;
+ }
+
+ /* Locate meter, copy stats. */
+ meter = lookup_meter(dp, meter_id);
+ if (!meter) {
+ err = -ENOENT;
+ goto exit_unlock;
+ }
+
+ spin_lock_bh(&meter->lock);
+ err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
+ spin_unlock_bh(&meter->lock);
+ if (err)
+ goto exit_unlock;
+
+ ovs_unlock();
+
+ genlmsg_end(reply, ovs_reply_header);
+ return genlmsg_reply(reply, info);
+
+exit_unlock:
+ ovs_unlock();
+ nlmsg_free(reply);
+ return err;
+}
+
+static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ u32 meter_id;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_reply_header;
+ struct datapath *dp;
+ int err;
+ struct sk_buff *reply;
+ struct dp_meter *old_meter;
+
+ if (!a[OVS_METER_ATTR_ID])
+ return -EINVAL;
+ meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+
+ reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
+ &ovs_reply_header);
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+
+ ovs_lock();
+
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ if (!dp) {
+ err = -ENODEV;
+ goto exit_unlock;
+ }
+
+ old_meter = lookup_meter(dp, meter_id);
+ if (old_meter) {
+ spin_lock_bh(&old_meter->lock);
+ err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
+ WARN_ON(err);
+ spin_unlock_bh(&old_meter->lock);
+ detach_meter(old_meter);
+ }
+ ovs_unlock();
+ ovs_meter_free(old_meter);
+ genlmsg_end(reply, ovs_reply_header);
+ return genlmsg_reply(reply, info);
+
+exit_unlock:
+ ovs_unlock();
+ nlmsg_free(reply);
+ return err;
+}
+
+/* Meter action execution.
+ *
+ * Return true 'meter_id' drop band is triggered. The 'skb' should be
+ * dropped by the caller'.
+ */
+bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key, u32 meter_id)
+{
+ struct dp_meter *meter;
+ struct dp_meter_band *band;
+ long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
+ long long int long_delta_ms;
+ u32 delta_ms;
+ u32 cost;
+ int i, band_exceeded_max = -1;
+ u32 band_exceeded_rate = 0;
+
+ meter = lookup_meter(dp, meter_id);
+ /* Do not drop the packet when there is no meter. */
+ if (!meter)
+ return false;
+
+ /* Lock the meter while using it. */
+ spin_lock(&meter->lock);
+
+ long_delta_ms = (now_ms - meter->used); /* ms */
+
+ /* Make sure delta_ms will not be too large, so that bucket will not
+ * wrap around below.
+ */
+ delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
+ ? meter->max_delta_t : (u32)long_delta_ms;
+
+ /* Update meter statistics.
+ */
+ meter->used = now_ms;
+ meter->stats.n_packets += 1;
+ meter->stats.n_bytes += skb->len;
+
+ /* Bucket rate is either in kilobits per second, or in packets per
+ * second. We maintain the bucket in the units of either bits or
+ * 1/1000th of a packet, correspondingly.
+ * Then, when rate is multiplied with milliseconds, we get the
+ * bucket units:
+ * msec * kbps = bits, and
+ * msec * packets/sec = 1/1000 packets.
+ *
+ * 'cost' is the number of bucket units in this packet.
+ */
+ cost = (meter->kbps) ? skb->len * 8 : 1000;
+
+ /* Update all bands and find the one hit with the highest rate. */
+ for (i = 0; i < meter->n_bands; ++i) {
+ long long int max_bucket_size;
+
+ band = &meter->bands[i];
+ max_bucket_size = (band->burst_size + band->rate) * 1000;
+
+ band->bucket += delta_ms * band->rate;
+ if (band->bucket > max_bucket_size)
+ band->bucket = max_bucket_size;
+
+ if (band->bucket >= cost) {
+ band->bucket -= cost;
+ } else if (band->rate > band_exceeded_rate) {
+ band_exceeded_rate = band->rate;
+ band_exceeded_max = i;
+ }
+ }
+
+ if (band_exceeded_max >= 0) {
+ /* Update band statistics. */
+ band = &meter->bands[band_exceeded_max];
+ band->stats.n_packets += 1;
+ band->stats.n_bytes += skb->len;
+
+ /* Drop band triggered, let the caller drop the 'skb'. */
+ if (band->type == OVS_METER_BAND_TYPE_DROP) {
+ spin_unlock(&meter->lock);
+ return true;
+ }
+ }
+
+ spin_unlock(&meter->lock);
+ return false;
+}
+
+static struct genl_ops dp_meter_genl_ops[] = {
+ { .cmd = OVS_METER_CMD_FEATURES,
+ .flags = 0, /* OK for unprivileged users. */
+ .policy = meter_policy,
+ .doit = ovs_meter_cmd_features
+ },
+ { .cmd = OVS_METER_CMD_SET,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+ * privilege.
+ */
+ .policy = meter_policy,
+ .doit = ovs_meter_cmd_set,
+ },
+ { .cmd = OVS_METER_CMD_GET,
+ .flags = 0, /* OK for unprivileged users. */
+ .policy = meter_policy,
+ .doit = ovs_meter_cmd_get,
+ },
+ { .cmd = OVS_METER_CMD_DEL,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+ * privilege.
+ */
+ .policy = meter_policy,
+ .doit = ovs_meter_cmd_del
+ },
+};
+
+static const struct genl_multicast_group ovs_meter_multicast_group = {
+ .name = OVS_METER_MCGROUP,
+};
+
+struct genl_family dp_meter_genl_family __ro_after_init = {
+ .hdrsize = sizeof(struct ovs_header),
+ .name = OVS_METER_FAMILY,
+ .version = OVS_METER_VERSION,
+ .maxattr = OVS_METER_ATTR_MAX,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = dp_meter_genl_ops,
+ .n_ops = ARRAY_SIZE(dp_meter_genl_ops),
+ .mcgrps = &ovs_meter_multicast_group,
+ .n_mcgrps = 1,
+ .module = THIS_MODULE,
+};
+
+int ovs_meters_init(struct datapath *dp)
+{
+ int i;
+
+ dp->meters = kmalloc_array(METER_HASH_BUCKETS,
+ sizeof(struct hlist_head), GFP_KERNEL);
+
+ if (!dp->meters)
+ return -ENOMEM;
+
+ for (i = 0; i < METER_HASH_BUCKETS; i++)
+ INIT_HLIST_HEAD(&dp->meters[i]);
+
+ return 0;
+}
+
+void ovs_meters_exit(struct datapath *dp)
+{
+ int i;
+
+ for (i = 0; i < METER_HASH_BUCKETS; i++) {
+ struct hlist_head *head = &dp->meters[i];
+ struct dp_meter *meter;
+ struct hlist_node *n;
+
+ hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
+ kfree(meter);
+ }
+
+ kfree(dp->meters);
+}
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
new file mode 100644
index 000000000000..964ace2650f8
--- /dev/null
+++ b/net/openvswitch/meter.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2017 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#ifndef METER_H
+#define METER_H 1
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/genetlink.h>
+#include <linux/skbuff.h>
+
+#include "flow.h"
+struct datapath;
+
+#define DP_MAX_BANDS 1
+
+struct dp_meter_band {
+ u32 type;
+ u32 rate;
+ u32 burst_size;
+ u32 bucket; /* 1/1000 packets, or in bits */
+ struct ovs_flow_stats stats;
+};
+
+struct dp_meter {
+ spinlock_t lock; /* Per meter lock */
+ struct rcu_head rcu;
+ struct hlist_node dp_hash_node; /*Element in datapath->meters
+ * hash table.
+ */
+ u32 id;
+ u16 kbps:1, keep_stats:1;
+ u16 n_bands;
+ u32 max_delta_t;
+ u64 used;
+ struct ovs_flow_stats stats;
+ struct dp_meter_band bands[];
+};
+
+extern struct genl_family dp_meter_genl_family;
+int ovs_meters_init(struct datapath *dp);
+void ovs_meters_exit(struct datapath *dp);
+bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key, u32 meter_id);
+
+#endif /* meter.h */
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 0389398fa4ab..2e5e7a41d8ef 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -108,7 +108,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
rtnl_lock();
err = netdev_master_upper_dev_link(vport->dev,
- get_dpdev(vport->dp), NULL, NULL);
+ get_dpdev(vport->dp),
+ NULL, NULL, NULL);
if (err)
goto error_unlock;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 008a45ca3112..737092ca9b4e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -177,8 +177,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
#define BLK_PLUS_PRIV(sz_of_priv) \
(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
-#define PGV_FROM_VMALLOC 1
-
#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt)
@@ -203,11 +201,8 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *,
static int prb_queue_frozen(struct tpacket_kbdq_core *);
static void prb_open_block(struct tpacket_kbdq_core *,
struct tpacket_block_desc *);
-static void prb_retire_rx_blk_timer_expired(unsigned long);
+static void prb_retire_rx_blk_timer_expired(struct timer_list *);
static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
-static void prb_init_blk_timer(struct packet_sock *,
- struct tpacket_kbdq_core *,
- void (*func) (unsigned long));
static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
static void prb_clear_rxhash(struct tpacket_kbdq_core *,
struct tpacket3_hdr *);
@@ -542,22 +537,14 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
prb_del_retire_blk_timer(pkc);
}
-static void prb_init_blk_timer(struct packet_sock *po,
- struct tpacket_kbdq_core *pkc,
- void (*func) (unsigned long))
-{
- init_timer(&pkc->retire_blk_timer);
- pkc->retire_blk_timer.data = (long)po;
- pkc->retire_blk_timer.function = func;
- pkc->retire_blk_timer.expires = jiffies;
-}
-
static void prb_setup_retire_blk_timer(struct packet_sock *po)
{
struct tpacket_kbdq_core *pkc;
pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
- prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
+ timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
+ 0);
+ pkc->retire_blk_timer.expires = jiffies;
}
static int prb_calc_retire_blk_tmo(struct packet_sock *po,
@@ -675,9 +662,10 @@ static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
* prb_calc_retire_blk_tmo() calculates the tmo.
*
*/
-static void prb_retire_rx_blk_timer_expired(unsigned long data)
+static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
{
- struct packet_sock *po = (struct packet_sock *)data;
+ struct packet_sock *po =
+ from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer);
struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
unsigned int frozen;
struct tpacket_block_desc *pbd;
@@ -1686,10 +1674,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
mutex_lock(&fanout_mutex);
- err = -EINVAL;
- if (!po->running)
- goto out;
-
err = -EALREADY;
if (po->fanout)
goto out;
@@ -1751,7 +1735,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
list_add(&match->list, &fanout_list);
}
err = -EINVAL;
- if (match->type == type &&
+
+ spin_lock(&po->bind_lock);
+ if (po->running &&
+ match->type == type &&
match->prot_hook.type == po->prot_hook.type &&
match->prot_hook.dev == po->prot_hook.dev) {
err = -ENOSPC;
@@ -1763,9 +1750,16 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
err = 0;
}
}
+ spin_unlock(&po->bind_lock);
+
+ if (err && !refcount_read(&match->sk_ref)) {
+ list_del(&match->list);
+ kfree(match);
+ }
+
out:
if (err && rollover) {
- kfree(rollover);
+ kfree_rcu(rollover, rcu);
po->rollover = NULL;
}
mutex_unlock(&fanout_mutex);
@@ -1792,8 +1786,10 @@ static struct packet_fanout *fanout_release(struct sock *sk)
else
f = NULL;
- if (po->rollover)
+ if (po->rollover) {
kfree_rcu(po->rollover, rcu);
+ po->rollover = NULL;
+ }
}
mutex_unlock(&fanout_mutex);
@@ -2191,6 +2187,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct timespec ts;
__u32 ts_status;
bool is_drop_n_account = false;
+ bool do_vnet = false;
/* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
* We may add members to them until current aligned size without forcing
@@ -2241,8 +2238,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
netoff = TPACKET_ALIGN(po->tp_hdrlen +
(maclen < 16 ? 16 : maclen)) +
po->tp_reserve;
- if (po->has_vnet_hdr)
+ if (po->has_vnet_hdr) {
netoff += sizeof(struct virtio_net_hdr);
+ do_vnet = true;
+ }
macoff = netoff - maclen;
}
if (po->tp_version <= TPACKET_V2) {
@@ -2259,8 +2258,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
skb_set_owner_r(copy_skb, sk);
}
snaplen = po->rx_ring.frame_size - macoff;
- if ((int)snaplen < 0)
+ if ((int)snaplen < 0) {
snaplen = 0;
+ do_vnet = false;
+ }
}
} else if (unlikely(macoff + snaplen >
GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
@@ -2273,6 +2274,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if (unlikely((int)snaplen < 0)) {
snaplen = 0;
macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
+ do_vnet = false;
}
}
spin_lock(&sk->sk_receive_queue.lock);
@@ -2298,7 +2300,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
}
spin_unlock(&sk->sk_receive_queue.lock);
- if (po->has_vnet_hdr) {
+ if (do_vnet) {
if (virtio_net_hdr_from_skb(skb, h.raw + macoff -
sizeof(struct virtio_net_hdr),
vio_le(), true)) {
@@ -2830,6 +2832,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
struct virtio_net_hdr vnet_hdr = { 0 };
int offset = 0;
struct packet_sock *po = pkt_sk(sk);
+ bool has_vnet_hdr = false;
int hlen, tlen, linear;
int extra_len = 0;
@@ -2873,6 +2876,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
if (err)
goto out_unlock;
+ has_vnet_hdr = true;
}
if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
@@ -2931,7 +2935,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->priority = sk->sk_priority;
skb->mark = sockc.mark;
- if (po->has_vnet_hdr) {
+ if (has_vnet_hdr) {
err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
if (err)
goto out_free;
@@ -3059,13 +3063,15 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
int ret = 0;
bool unlisted = false;
- if (po->fanout)
- return -EINVAL;
-
lock_sock(sk);
spin_lock(&po->bind_lock);
rcu_read_lock();
+ if (po->fanout) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
if (name) {
dev = dev_get_by_name_rcu(sock_net(sk), name);
if (!dev) {
@@ -3837,6 +3843,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
void *data = &val;
union tpacket_stats_u st;
struct tpacket_rollover_stats rstats;
+ struct packet_rollover *rollover;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3915,13 +3922,18 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
0);
break;
case PACKET_ROLLOVER_STATS:
- if (!po->rollover)
+ rcu_read_lock();
+ rollover = rcu_dereference(po->rollover);
+ if (rollover) {
+ rstats.tp_all = atomic_long_read(&rollover->num);
+ rstats.tp_huge = atomic_long_read(&rollover->num_huge);
+ rstats.tp_failed = atomic_long_read(&rollover->num_failed);
+ data = &rstats;
+ lv = sizeof(rstats);
+ }
+ rcu_read_unlock();
+ if (!rollover)
return -EINVAL;
- rstats.tp_all = atomic_long_read(&po->rollover->num);
- rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
- rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
- data = &rstats;
- lv = sizeof(rstats);
break;
case PACKET_TX_HAS_OFF:
val = po->tp_tx_has_off;
@@ -4548,6 +4560,7 @@ static int __net_init packet_net_init(struct net *net)
static void __net_exit packet_net_exit(struct net *net)
{
remove_proc_entry("packet", net->proc_net);
+ WARN_ON_ONCE(!hlist_empty(&net->packet.sklist));
}
static struct pernet_operations packet_net_ops = {
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 94d1d405a116..562fbc155006 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PACKET_INTERNAL_H__
#define __PACKET_INTERNAL_H__
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index e10b1b182ce3..444f875932b9 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_PHONET) += phonet.o pn_pep.o
phonet-y := \
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index f925753668a7..3b0ef691f5b1 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -35,11 +35,11 @@
#include <net/phonet/pn_dev.h>
/* Transport protocol registration */
-static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
+static const struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
-static struct phonet_protocol *phonet_proto_get(unsigned int protocol)
+static const struct phonet_protocol *phonet_proto_get(unsigned int protocol)
{
- struct phonet_protocol *pp;
+ const struct phonet_protocol *pp;
if (protocol >= PHONET_NPROTO)
return NULL;
@@ -53,7 +53,7 @@ static struct phonet_protocol *phonet_proto_get(unsigned int protocol)
return pp;
}
-static inline void phonet_proto_put(struct phonet_protocol *pp)
+static inline void phonet_proto_put(const struct phonet_protocol *pp)
{
module_put(pp->prot->owner);
}
@@ -65,7 +65,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
{
struct sock *sk;
struct pn_sock *pn;
- struct phonet_protocol *pnp;
+ const struct phonet_protocol *pnp;
int err;
if (!capable(CAP_SYS_ADMIN))
@@ -149,7 +149,7 @@ static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr)
return 1;
}
-struct header_ops phonet_header_ops = {
+const struct header_ops phonet_header_ops = {
.create = pn_header_create,
.parse = pn_header_parse,
};
@@ -470,7 +470,7 @@ static struct packet_type phonet_packet_type __read_mostly = {
static DEFINE_MUTEX(proto_tab_lock);
int __init_or_module phonet_proto_register(unsigned int protocol,
- struct phonet_protocol *pp)
+ const struct phonet_protocol *pp)
{
int err = 0;
@@ -492,7 +492,8 @@ int __init_or_module phonet_proto_register(unsigned int protocol,
}
EXPORT_SYMBOL(phonet_proto_register);
-void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp)
+void phonet_proto_unregister(unsigned int protocol,
+ const struct phonet_protocol *pp)
{
mutex_lock(&proto_tab_lock);
BUG_ON(proto_tab[protocol] != pp);
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 5e710435ffa9..b44fb9018fb8 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -195,7 +195,7 @@ static struct proto pn_proto = {
.name = "PHONET",
};
-static struct phonet_protocol pn_dgram_proto = {
+static const struct phonet_protocol pn_dgram_proto = {
.ops = &phonet_dgram_ops,
.prot = &pn_proto,
.sock_type = SOCK_DGRAM,
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e81537991ddf..9fc76b19cd3c 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1351,7 +1351,7 @@ static struct proto pep_proto = {
.name = "PNPIPE",
};
-static struct phonet_protocol pep_pn_proto = {
+static const struct phonet_protocol pep_pn_proto = {
.ops = &phonet_stream_ops,
.prot = &pep_proto,
.sock_type = SOCK_SEQPACKET,
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 2cb4c5dfad6f..77787512fc32 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -331,7 +331,10 @@ static int __net_init phonet_init_net(struct net *net)
static void __net_exit phonet_exit_net(struct net *net)
{
+ struct phonet_net *pnn = phonet_pernet(net);
+
remove_proc_entry("phonet", net->proc_net);
+ WARN_ON_ONCE(!list_empty(&pnn->pndevs.list));
}
static struct pernet_operations phonet_net_ops = {
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 45b3af3080d8..da754fc926e7 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -300,15 +300,15 @@ out:
int __init phonet_netlink_register(void)
{
int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit,
- NULL, NULL);
+ NULL, 0);
if (err)
return err;
/* Further __rtnl_register() cannot fail */
- __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, NULL);
- __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, NULL);
- __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, NULL);
- __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, NULL);
- __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, NULL);
+ __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, 0);
+ __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, 0);
+ __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, 0);
+ __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0);
+ __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, 0);
return 0;
}
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 3a6ad0f438dc..64f95624f219 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -296,6 +296,6 @@ static void __exit psample_module_exit(void)
module_init(psample_module_init);
module_exit(psample_module_exit);
-MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>");
+MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
MODULE_DESCRIPTION("netlink channel for packet sampling");
MODULE_LICENSE("GPL v2");
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 5586609afa27..77ab05e23001 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -20,26 +20,15 @@
#include "qrtr.h"
-#define QRTR_PROTO_VER 1
+#define QRTR_PROTO_VER_1 1
+#define QRTR_PROTO_VER_2 3
/* auto-bind range */
#define QRTR_MIN_EPH_SOCKET 0x4000
#define QRTR_MAX_EPH_SOCKET 0x7fff
-enum qrtr_pkt_type {
- QRTR_TYPE_DATA = 1,
- QRTR_TYPE_HELLO = 2,
- QRTR_TYPE_BYE = 3,
- QRTR_TYPE_NEW_SERVER = 4,
- QRTR_TYPE_DEL_SERVER = 5,
- QRTR_TYPE_DEL_CLIENT = 6,
- QRTR_TYPE_RESUME_TX = 7,
- QRTR_TYPE_EXIT = 8,
- QRTR_TYPE_PING = 9,
-};
-
/**
- * struct qrtr_hdr - (I|R)PCrouter packet header
+ * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
* @version: protocol version
* @type: packet type; one of QRTR_TYPE_*
* @src_node_id: source node
@@ -49,7 +38,7 @@ enum qrtr_pkt_type {
* @dst_node_id: destination node
* @dst_port_id: destination port
*/
-struct qrtr_hdr {
+struct qrtr_hdr_v1 {
__le32 version;
__le32 type;
__le32 src_node_id;
@@ -60,9 +49,44 @@ struct qrtr_hdr {
__le32 dst_port_id;
} __packed;
-#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr)
-#define QRTR_NODE_BCAST ((unsigned int)-1)
-#define QRTR_PORT_CTRL ((unsigned int)-2)
+/**
+ * struct qrtr_hdr_v2 - (I|R)PCrouter packet header later versions
+ * @version: protocol version
+ * @type: packet type; one of QRTR_TYPE_*
+ * @flags: bitmask of QRTR_FLAGS_*
+ * @optlen: length of optional header data
+ * @size: length of packet, excluding this header and optlen
+ * @src_node_id: source node
+ * @src_port_id: source port
+ * @dst_node_id: destination node
+ * @dst_port_id: destination port
+ */
+struct qrtr_hdr_v2 {
+ u8 version;
+ u8 type;
+ u8 flags;
+ u8 optlen;
+ __le32 size;
+ __le16 src_node_id;
+ __le16 src_port_id;
+ __le16 dst_node_id;
+ __le16 dst_port_id;
+};
+
+#define QRTR_FLAGS_CONFIRM_RX BIT(0)
+
+struct qrtr_cb {
+ u32 src_node;
+ u32 src_port;
+ u32 dst_node;
+ u32 dst_port;
+
+ u8 type;
+ u8 confirm_rx;
+};
+
+#define QRTR_HDR_MAX_SIZE max_t(size_t, sizeof(struct qrtr_hdr_v1), \
+ sizeof(struct qrtr_hdr_v2))
struct qrtr_sock {
/* WARNING: sk must be the first member */
@@ -111,8 +135,12 @@ struct qrtr_node {
struct list_head item;
};
-static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb);
-static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb);
+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to);
+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to);
/* Release node resources and free the node.
*
@@ -150,10 +178,27 @@ static void qrtr_node_release(struct qrtr_node *node)
}
/* Pass an outgoing packet socket buffer to the endpoint driver. */
-static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to)
{
+ struct qrtr_hdr_v1 *hdr;
+ size_t len = skb->len;
int rc = -ENODEV;
+ hdr = skb_push(skb, sizeof(*hdr));
+ hdr->version = cpu_to_le32(QRTR_PROTO_VER_1);
+ hdr->type = cpu_to_le32(type);
+ hdr->src_node_id = cpu_to_le32(from->sq_node);
+ hdr->src_port_id = cpu_to_le32(from->sq_port);
+ hdr->dst_node_id = cpu_to_le32(to->sq_node);
+ hdr->dst_port_id = cpu_to_le32(to->sq_port);
+
+ hdr->size = cpu_to_le32(len);
+ hdr->confirm_rx = 0;
+
+ skb_put_padto(skb, ALIGN(len, 4));
+
mutex_lock(&node->ep_lock);
if (node->ep)
rc = node->ep->xmit(node->ep, skb);
@@ -207,125 +252,103 @@ static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid)
int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
{
struct qrtr_node *node = ep->node;
- const struct qrtr_hdr *phdr = data;
+ const struct qrtr_hdr_v1 *v1;
+ const struct qrtr_hdr_v2 *v2;
struct sk_buff *skb;
- unsigned int psize;
+ struct qrtr_cb *cb;
unsigned int size;
- unsigned int type;
unsigned int ver;
- unsigned int dst;
-
- if (len < QRTR_HDR_SIZE || len & 3)
- return -EINVAL;
-
- ver = le32_to_cpu(phdr->version);
- size = le32_to_cpu(phdr->size);
- type = le32_to_cpu(phdr->type);
- dst = le32_to_cpu(phdr->dst_port_id);
-
- psize = (size + 3) & ~3;
+ size_t hdrlen;
- if (ver != QRTR_PROTO_VER)
- return -EINVAL;
-
- if (len != psize + QRTR_HDR_SIZE)
- return -EINVAL;
-
- if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA)
+ if (len & 3)
return -EINVAL;
skb = netdev_alloc_skb(NULL, len);
if (!skb)
return -ENOMEM;
- skb_reset_transport_header(skb);
- skb_put_data(skb, data, len);
-
- skb_queue_tail(&node->rx_queue, skb);
- schedule_work(&node->work);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
+ cb = (struct qrtr_cb *)skb->cb;
-static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len,
- u32 src_node, u32 dst_node)
-{
- struct qrtr_hdr *hdr;
- struct sk_buff *skb;
-
- skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
- if (!skb)
- return NULL;
- skb_reset_transport_header(skb);
+ /* Version field in v1 is little endian, so this works for both cases */
+ ver = *(u8*)data;
- hdr = skb_put(skb, QRTR_HDR_SIZE);
- hdr->version = cpu_to_le32(QRTR_PROTO_VER);
- hdr->type = cpu_to_le32(type);
- hdr->src_node_id = cpu_to_le32(src_node);
- hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL);
- hdr->confirm_rx = cpu_to_le32(0);
- hdr->size = cpu_to_le32(pkt_len);
- hdr->dst_node_id = cpu_to_le32(dst_node);
- hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
+ switch (ver) {
+ case QRTR_PROTO_VER_1:
+ v1 = data;
+ hdrlen = sizeof(*v1);
- return skb;
-}
+ cb->type = le32_to_cpu(v1->type);
+ cb->src_node = le32_to_cpu(v1->src_node_id);
+ cb->src_port = le32_to_cpu(v1->src_port_id);
+ cb->confirm_rx = !!v1->confirm_rx;
+ cb->dst_node = le32_to_cpu(v1->dst_node_id);
+ cb->dst_port = le32_to_cpu(v1->dst_port_id);
-/* Allocate and construct a resume-tx packet. */
-static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
- u32 dst_node, u32 port)
-{
- const int pkt_len = 20;
- struct sk_buff *skb;
- __le32 *buf;
+ size = le32_to_cpu(v1->size);
+ break;
+ case QRTR_PROTO_VER_2:
+ v2 = data;
+ hdrlen = sizeof(*v2) + v2->optlen;
+
+ cb->type = v2->type;
+ cb->confirm_rx = !!(v2->flags & QRTR_FLAGS_CONFIRM_RX);
+ cb->src_node = le16_to_cpu(v2->src_node_id);
+ cb->src_port = le16_to_cpu(v2->src_port_id);
+ cb->dst_node = le16_to_cpu(v2->dst_node_id);
+ cb->dst_port = le16_to_cpu(v2->dst_port_id);
+
+ if (cb->src_port == (u16)QRTR_PORT_CTRL)
+ cb->src_port = QRTR_PORT_CTRL;
+ if (cb->dst_port == (u16)QRTR_PORT_CTRL)
+ cb->dst_port = QRTR_PORT_CTRL;
+
+ size = le32_to_cpu(v2->size);
+ break;
+ default:
+ pr_err("qrtr: Invalid version %d\n", ver);
+ goto err;
+ }
- skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_RESUME_TX, pkt_len,
- src_node, dst_node);
- if (!skb)
- return NULL;
+ if (len != ALIGN(size, 4) + hdrlen)
+ goto err;
- buf = skb_put_zero(skb, pkt_len);
- buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX);
- buf[1] = cpu_to_le32(src_node);
- buf[2] = cpu_to_le32(port);
+ if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA)
+ goto err;
- return skb;
-}
+ skb_put_data(skb, data + hdrlen, size);
-/* Allocate and construct a BYE message to signal remote termination */
-static struct sk_buff *qrtr_alloc_local_bye(u32 src_node)
-{
- const int pkt_len = 20;
- struct sk_buff *skb;
- __le32 *buf;
+ skb_queue_tail(&node->rx_queue, skb);
+ schedule_work(&node->work);
- skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_BYE, pkt_len,
- src_node, qrtr_local_nid);
- if (!skb)
- return NULL;
+ return 0;
- buf = skb_put_zero(skb, pkt_len);
- buf[0] = cpu_to_le32(QRTR_TYPE_BYE);
+err:
+ kfree_skb(skb);
+ return -EINVAL;
- return skb;
}
+EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
-static struct sk_buff *qrtr_alloc_del_client(struct sockaddr_qrtr *sq)
+/**
+ * qrtr_alloc_ctrl_packet() - allocate control packet skb
+ * @pkt: reference to qrtr_ctrl_pkt pointer
+ *
+ * Returns newly allocated sk_buff, or NULL on failure
+ *
+ * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and
+ * on success returns a reference to the control packet in @pkt.
+ */
+static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt)
{
- const int pkt_len = 20;
+ const int pkt_len = sizeof(struct qrtr_ctrl_pkt);
struct sk_buff *skb;
- __le32 *buf;
- skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_DEL_CLIENT, pkt_len,
- sq->sq_node, QRTR_NODE_BCAST);
+ skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, GFP_KERNEL);
if (!skb)
return NULL;
- buf = skb_put_zero(skb, pkt_len);
- buf[0] = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
- buf[1] = cpu_to_le32(sq->sq_node);
- buf[2] = cpu_to_le32(sq->sq_port);
+ skb_reserve(skb, QRTR_HDR_MAX_SIZE);
+ *pkt = skb_put_zero(skb, pkt_len);
return skb;
}
@@ -340,24 +363,26 @@ static void qrtr_port_put(struct qrtr_sock *ipc);
static void qrtr_node_rx_work(struct work_struct *work)
{
struct qrtr_node *node = container_of(work, struct qrtr_node, work);
+ struct qrtr_ctrl_pkt *pkt;
+ struct sockaddr_qrtr dst;
+ struct sockaddr_qrtr src;
struct sk_buff *skb;
while ((skb = skb_dequeue(&node->rx_queue)) != NULL) {
- const struct qrtr_hdr *phdr;
- u32 dst_node, dst_port;
struct qrtr_sock *ipc;
- u32 src_node;
+ struct qrtr_cb *cb;
int confirm;
- phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
- src_node = le32_to_cpu(phdr->src_node_id);
- dst_node = le32_to_cpu(phdr->dst_node_id);
- dst_port = le32_to_cpu(phdr->dst_port_id);
- confirm = !!phdr->confirm_rx;
+ cb = (struct qrtr_cb *)skb->cb;
+ src.sq_node = cb->src_node;
+ src.sq_port = cb->src_port;
+ dst.sq_node = cb->dst_node;
+ dst.sq_port = cb->dst_port;
+ confirm = !!cb->confirm_rx;
- qrtr_node_assign(node, src_node);
+ qrtr_node_assign(node, cb->src_node);
- ipc = qrtr_port_lookup(dst_port);
+ ipc = qrtr_port_lookup(cb->dst_port);
if (!ipc) {
kfree_skb(skb);
} else {
@@ -368,10 +393,16 @@ static void qrtr_node_rx_work(struct work_struct *work)
}
if (confirm) {
- skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port);
+ skb = qrtr_alloc_ctrl_packet(&pkt);
if (!skb)
break;
- if (qrtr_node_enqueue(node, skb))
+
+ pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX);
+ pkt->client.node = cpu_to_le32(dst.sq_node);
+ pkt->client.port = cpu_to_le32(dst.sq_port);
+
+ if (qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX,
+ &dst, &src))
break;
}
}
@@ -421,6 +452,9 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_register);
void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
{
struct qrtr_node *node = ep->node;
+ struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL};
+ struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL};
+ struct qrtr_ctrl_pkt *pkt;
struct sk_buff *skb;
mutex_lock(&node->ep_lock);
@@ -428,9 +462,11 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
mutex_unlock(&node->ep_lock);
/* Notify the local controller about the event */
- skb = qrtr_alloc_local_bye(node->nid);
- if (skb)
- qrtr_local_enqueue(NULL, skb);
+ skb = qrtr_alloc_ctrl_packet(&pkt);
+ if (skb) {
+ pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE);
+ qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst);
+ }
qrtr_node_release(node);
ep->node = NULL;
@@ -466,13 +502,24 @@ static void qrtr_port_put(struct qrtr_sock *ipc)
/* Remove port assignment. */
static void qrtr_port_remove(struct qrtr_sock *ipc)
{
+ struct qrtr_ctrl_pkt *pkt;
struct sk_buff *skb;
int port = ipc->us.sq_port;
+ struct sockaddr_qrtr to;
- skb = qrtr_alloc_del_client(&ipc->us);
+ to.sq_family = AF_QIPCRTR;
+ to.sq_node = QRTR_NODE_BCAST;
+ to.sq_port = QRTR_PORT_CTRL;
+
+ skb = qrtr_alloc_ctrl_packet(&pkt);
if (skb) {
+ pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
+ pkt->client.node = cpu_to_le32(ipc->us.sq_node);
+ pkt->client.port = cpu_to_le32(ipc->us.sq_port);
+
skb_set_owner_w(skb, &ipc->sk);
- qrtr_bcast_enqueue(NULL, skb);
+ qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us,
+ &to);
}
if (port == QRTR_PORT_CTRL)
@@ -541,7 +588,7 @@ static void qrtr_reset_ports(void)
sock_hold(&ipc->sk);
ipc->sk.sk_err = ENETRESET;
- wake_up_interruptible(sk_sleep(&ipc->sk));
+ ipc->sk.sk_error_report(&ipc->sk);
sock_put(&ipc->sk);
}
mutex_unlock(&qrtr_port_lock);
@@ -620,19 +667,23 @@ static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len)
}
/* Queue packet to local peer socket. */
-static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to)
{
- const struct qrtr_hdr *phdr;
struct qrtr_sock *ipc;
+ struct qrtr_cb *cb;
- phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
-
- ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id));
+ ipc = qrtr_port_lookup(to->sq_port);
if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
kfree_skb(skb);
return -ENODEV;
}
+ cb = (struct qrtr_cb *)skb->cb;
+ cb->src_node = from->sq_node;
+ cb->src_port = from->sq_port;
+
if (sock_queue_rcv_skb(&ipc->sk, skb)) {
qrtr_port_put(ipc);
kfree_skb(skb);
@@ -645,7 +696,9 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb)
}
/* Queue packet for broadcast. */
-static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to)
{
struct sk_buff *skbn;
@@ -655,11 +708,11 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
if (!skbn)
break;
skb_set_owner_w(skbn, skb->sk);
- qrtr_node_enqueue(node, skbn);
+ qrtr_node_enqueue(node, skbn, type, from, to);
}
mutex_unlock(&qrtr_node_lock);
- qrtr_local_enqueue(node, skb);
+ qrtr_local_enqueue(node, skb, type, from, to);
return 0;
}
@@ -667,13 +720,14 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
- int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *);
+ int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int,
+ struct sockaddr_qrtr *, struct sockaddr_qrtr *);
struct qrtr_sock *ipc = qrtr_sk(sock->sk);
struct sock *sk = sock->sk;
struct qrtr_node *node;
- struct qrtr_hdr *hdr;
struct sk_buff *skb;
size_t plen;
+ u32 type = QRTR_TYPE_DATA;
int rc;
if (msg->msg_flags & ~(MSG_DONTWAIT))
@@ -722,37 +776,19 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
}
plen = (len + 3) & ~3;
- skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE,
+ skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE,
msg->msg_flags & MSG_DONTWAIT, &rc);
if (!skb)
goto out_node;
- skb_reset_transport_header(skb);
- skb_put(skb, len + QRTR_HDR_SIZE);
-
- hdr = (struct qrtr_hdr *)skb_transport_header(skb);
- hdr->version = cpu_to_le32(QRTR_PROTO_VER);
- hdr->src_node_id = cpu_to_le32(ipc->us.sq_node);
- hdr->src_port_id = cpu_to_le32(ipc->us.sq_port);
- hdr->confirm_rx = cpu_to_le32(0);
- hdr->size = cpu_to_le32(len);
- hdr->dst_node_id = cpu_to_le32(addr->sq_node);
- hdr->dst_port_id = cpu_to_le32(addr->sq_port);
+ skb_reserve(skb, QRTR_HDR_MAX_SIZE);
- rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE,
- &msg->msg_iter, len);
+ rc = memcpy_from_msg(skb_put(skb, len), msg, len);
if (rc) {
kfree_skb(skb);
goto out_node;
}
- if (plen != len) {
- rc = skb_pad(skb, plen - len);
- if (rc)
- goto out_node;
- skb_put(skb, plen - len);
- }
-
if (ipc->us.sq_port == QRTR_PORT_CTRL) {
if (len < 4) {
rc = -EINVAL;
@@ -761,12 +797,11 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
}
/* control messages already require the type as 'command' */
- skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4);
- } else {
- hdr->type = cpu_to_le32(QRTR_TYPE_DATA);
+ skb_copy_bits(skb, 0, &type, 4);
+ type = le32_to_cpu(type);
}
- rc = enqueue_fn(node, skb);
+ rc = enqueue_fn(node, skb, type, &ipc->us, addr);
if (rc >= 0)
rc = len;
@@ -781,9 +816,9 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
- const struct qrtr_hdr *phdr;
struct sock *sk = sock->sk;
struct sk_buff *skb;
+ struct qrtr_cb *cb;
int copied, rc;
lock_sock(sk);
@@ -800,22 +835,22 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
return rc;
}
- phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
- copied = le32_to_cpu(phdr->size);
+ copied = skb->len;
if (copied > size) {
copied = size;
msg->msg_flags |= MSG_TRUNC;
}
- rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied);
+ rc = skb_copy_datagram_msg(skb, 0, msg, copied);
if (rc < 0)
goto out;
rc = copied;
if (addr) {
+ cb = (struct qrtr_cb *)skb->cb;
addr->sq_family = AF_QIPCRTR;
- addr->sq_node = le32_to_cpu(phdr->src_node_id);
- addr->sq_port = le32_to_cpu(phdr->src_port_id);
+ addr->sq_node = cb->src_node;
+ addr->sq_port = cb->src_port;
msg->msg_namelen = sizeof(*addr);
}
@@ -908,7 +943,7 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case TIOCINQ:
skb = skb_peek(&sk->sk_receive_queue);
if (skb)
- len = skb->len - QRTR_HDR_SIZE;
+ len = skb->len;
rc = put_user(len, (int __user *)argp);
break;
case SIOCGIFADDR:
@@ -1081,11 +1116,11 @@ static int __init qrtr_proto_init(void)
return rc;
}
- rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, NULL);
+ rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0);
return 0;
}
-module_init(qrtr_proto_init);
+postcore_initcall(qrtr_proto_init);
static void __exit qrtr_proto_fini(void)
{
diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h
index 2b848718f8fe..b81e6953c04b 100644
--- a/net/qrtr/qrtr.h
+++ b/net/qrtr/qrtr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __QRTR_H_
#define __QRTR_H_
diff --git a/net/rds/Makefile b/net/rds/Makefile
index 56c7d27eefee..b5d568bd479c 100644
--- a/net/rds/Makefile
+++ b/net/rds/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_RDS) += rds.o
rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \
recv.o send.o stats.o sysctl.o threads.o transport.o \
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 3a915bedb76c..75d43dc8e96b 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -40,7 +40,7 @@
static struct rhashtable bind_hash_table;
-static struct rhashtable_params ht_parms = {
+static const struct rhashtable_params ht_parms = {
.nelem_hint = 768,
.key_len = sizeof(u64),
.key_offset = offsetof(struct rds_sock, rs_bound_key),
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 50a3789ac23e..7ee2d5d68b78 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -151,6 +151,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
struct rds_transport *loop_trans;
unsigned long flags;
int ret, i;
+ int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
rcu_read_lock();
conn = rds_conn_lookup(net, head, laddr, faddr, trans);
@@ -172,6 +173,12 @@ static struct rds_connection *__rds_conn_create(struct net *net,
conn = ERR_PTR(-ENOMEM);
goto out;
}
+ conn->c_path = kcalloc(npaths, sizeof(struct rds_conn_path), gfp);
+ if (!conn->c_path) {
+ kmem_cache_free(rds_conn_slab, conn);
+ conn = ERR_PTR(-ENOMEM);
+ goto out;
+ }
INIT_HLIST_NODE(&conn->c_hash_node);
conn->c_laddr = laddr;
@@ -181,6 +188,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
ret = rds_cong_get_maps(conn);
if (ret) {
+ kfree(conn->c_path);
kmem_cache_free(rds_conn_slab, conn);
conn = ERR_PTR(ret);
goto out;
@@ -207,13 +215,14 @@ static struct rds_connection *__rds_conn_create(struct net *net,
conn->c_trans = trans;
init_waitqueue_head(&conn->c_hs_waitq);
- for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ for (i = 0; i < npaths; i++) {
__rds_conn_path_init(conn, &conn->c_path[i],
is_outgoing);
conn->c_path[i].cp_index = i;
}
ret = trans->conn_alloc(conn, gfp);
if (ret) {
+ kfree(conn->c_path);
kmem_cache_free(rds_conn_slab, conn);
conn = ERR_PTR(ret);
goto out;
@@ -236,6 +245,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
/* Creating passive conn */
if (parent->c_passive) {
trans->conn_free(conn->c_path[0].cp_transport_data);
+ kfree(conn->c_path);
kmem_cache_free(rds_conn_slab, conn);
conn = parent->c_passive;
} else {
@@ -252,7 +262,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
struct rds_conn_path *cp;
int i;
- for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ for (i = 0; i < npaths; i++) {
cp = &conn->c_path[i];
/* The ->conn_alloc invocation may have
* allocated resource for all paths, so all
@@ -261,6 +271,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
if (cp->cp_transport_data)
trans->conn_free(cp->cp_transport_data);
}
+ kfree(conn->c_path);
kmem_cache_free(rds_conn_slab, conn);
conn = found;
} else {
@@ -374,13 +385,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
if (!cp->cp_transport_data)
return;
- rds_conn_path_drop(cp);
- flush_work(&cp->cp_down_w);
-
/* make sure lingering queued work won't try to ref the conn */
cancel_delayed_work_sync(&cp->cp_send_w);
cancel_delayed_work_sync(&cp->cp_recv_w);
+ rds_conn_path_drop(cp, true);
+ flush_work(&cp->cp_down_w);
+
/* tear down queued messages */
list_for_each_entry_safe(rm, rtmp,
&cp->cp_send_queue,
@@ -407,6 +418,7 @@ void rds_conn_destroy(struct rds_connection *conn)
unsigned long flags;
int i;
struct rds_conn_path *cp;
+ int npaths = (conn->c_trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
rdsdebug("freeing conn %p for %pI4 -> "
"%pI4\n", conn, &conn->c_laddr,
@@ -420,7 +432,7 @@ void rds_conn_destroy(struct rds_connection *conn)
synchronize_rcu();
/* shut the connection down */
- for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ for (i = 0; i < npaths; i++) {
cp = &conn->c_path[i];
rds_conn_path_destroy(cp);
BUG_ON(!list_empty(&cp->cp_retrans));
@@ -434,6 +446,7 @@ void rds_conn_destroy(struct rds_connection *conn)
rds_cong_remove_conn(conn);
put_net(conn->c_net);
+ kfree(conn->c_path);
kmem_cache_free(rds_conn_slab, conn);
spin_lock_irqsave(&rds_conn_lock, flags);
@@ -464,8 +477,12 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
i++, head++) {
hlist_for_each_entry_rcu(conn, head, c_hash_node) {
struct rds_conn_path *cp;
+ int npaths;
+
+ npaths = (conn->c_trans->t_mp_capable ?
+ RDS_MPATH_WORKERS : 1);
- for (j = 0; j < RDS_MPATH_WORKERS; j++) {
+ for (j = 0; j < npaths; j++) {
cp = &conn->c_path[j];
if (want_send)
list = &cp->cp_send_queue;
@@ -486,8 +503,6 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
}
spin_unlock_irqrestore(&cp->cp_lock, flags);
- if (!conn->c_trans->t_mp_capable)
- break;
}
}
}
@@ -571,15 +586,16 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
i++, head++) {
hlist_for_each_entry_rcu(conn, head, c_hash_node) {
struct rds_conn_path *cp;
+ int npaths;
- for (j = 0; j < RDS_MPATH_WORKERS; j++) {
+ npaths = (conn->c_trans->t_mp_capable ?
+ RDS_MPATH_WORKERS : 1);
+ for (j = 0; j < npaths; j++) {
cp = &conn->c_path[j];
/* XXX no cp_lock usage.. */
if (!visitor(cp, buffer))
continue;
- if (!conn->c_trans->t_mp_capable)
- break;
}
/* We copy as much as we can fit in the buffer,
@@ -664,9 +680,13 @@ void rds_conn_exit(void)
/*
* Force a disconnect
*/
-void rds_conn_path_drop(struct rds_conn_path *cp)
+void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
{
atomic_set(&cp->cp_state, RDS_CONN_ERROR);
+
+ if (!destroy && cp->cp_conn->c_destroy_in_prog)
+ return;
+
queue_work(rds_wq, &cp->cp_down_w);
}
EXPORT_SYMBOL_GPL(rds_conn_path_drop);
@@ -674,7 +694,7 @@ EXPORT_SYMBOL_GPL(rds_conn_path_drop);
void rds_conn_drop(struct rds_connection *conn)
{
WARN_ON(conn->c_trans->t_mp_capable);
- rds_conn_path_drop(&conn->c_path[0]);
+ rds_conn_path_drop(&conn->c_path[0], false);
}
EXPORT_SYMBOL_GPL(rds_conn_drop);
@@ -706,5 +726,5 @@ __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
vprintk(fmt, ap);
va_end(ap);
- rds_conn_path_drop(cp);
+ rds_conn_path_drop(cp, false);
}
diff --git a/net/rds/ib.c b/net/rds/ib.c
index a0954ace3774..36dd2099048a 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -126,6 +126,7 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
static void rds_ib_add_one(struct ib_device *device)
{
struct rds_ib_device *rds_ibdev;
+ bool has_fr, has_fmr;
/* Only handle IB (no iWARP) devices */
if (device->node_type != RDMA_NODE_IB_CA)
@@ -143,11 +144,11 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->max_wrs = device->attrs.max_qp_wr;
rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
- rds_ibdev->has_fr = (device->attrs.device_cap_flags &
- IB_DEVICE_MEM_MGT_EXTENSIONS);
- rds_ibdev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
- device->map_phys_fmr && device->unmap_fmr);
- rds_ibdev->use_fastreg = (rds_ibdev->has_fr && !rds_ibdev->has_fmr);
+ has_fr = (device->attrs.device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS);
+ has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
+ device->map_phys_fmr && device->unmap_fmr);
+ rds_ibdev->use_fastreg = (has_fr && !has_fmr);
rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
rds_ibdev->max_1m_mrs = device->attrs.max_mr ?
diff --git a/net/rds/ib.h b/net/rds/ib.h
index bf4822407567..a6f4d7d68e95 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDS_IB_H
#define _RDS_IB_H
@@ -215,8 +216,6 @@ struct rds_ib_device {
struct list_head conn_list;
struct ib_device *dev;
struct ib_pd *pd;
- bool has_fmr;
- bool has_fr;
bool use_fastreg;
unsigned int max_mrs;
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index 86ef907067bb..e0f70c4051b6 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -139,8 +139,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
return -EINVAL;
}
- dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
- rdsibdev_to_node(rds_ibdev));
+ dma_pages = kmalloc_array_node(sizeof(u64), page_cnt, GFP_ATOMIC,
+ rdsibdev_to_node(rds_ibdev));
if (!dma_pages) {
ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
return -ENOMEM;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 9a3c54e659e9..e678699268a2 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -601,11 +601,11 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
if (pool_type == RDS_IB_MR_1M_POOL) {
/* +1 allows for unaligned MRs */
pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1;
- pool->max_items = RDS_MR_1M_POOL_SIZE;
+ pool->max_items = rds_ibdev->max_1m_mrs;
} else {
/* pool_type == RDS_IB_MR_8K_POOL */
pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1;
- pool->max_items = RDS_MR_8K_POOL_SIZE;
+ pool->max_items = rds_ibdev->max_8k_mrs;
}
pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 9722bf839d9d..b4e421aa9727 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -410,14 +410,14 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
break;
}
- /* XXX when can this fail? */
- ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
- rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
+ rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
(long) ib_sg_dma_address(
ic->i_cm_id->device,
- &recv->r_frag->f_sg),
- ret);
+ &recv->r_frag->f_sg));
+
+ /* XXX when can this fail? */
+ ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
if (ret) {
rds_ib_conn_error(conn, "recv post on "
"%pI4 returned %d, disconnecting and "
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 6ab39dbcca01..8557a1cae041 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -661,13 +661,15 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
}
}
- rds_ib_set_wr_signal_state(ic, send, 0);
+ rds_ib_set_wr_signal_state(ic, send, false);
/*
* Always signal the last one if we're stopping due to flow control.
*/
- if (ic->i_flowctl && flow_controlled && i == (work_alloc-1))
- send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ if (ic->i_flowctl && flow_controlled && i == (work_alloc - 1)) {
+ rds_ib_set_wr_signal_state(ic, send, true);
+ send->s_wr.send_flags |= IB_SEND_SOLICITED;
+ }
if (send->s_wr.send_flags & IB_SEND_SIGNALED)
nr_sig++;
@@ -705,11 +707,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
if (scat == &rm->data.op_sg[rm->data.op_count]) {
prev->s_op = ic->i_data_op;
prev->s_wr.send_flags |= IB_SEND_SOLICITED;
- if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) {
- ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
- prev->s_wr.send_flags |= IB_SEND_SIGNALED;
- nr_sig++;
- }
+ if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED))
+ nr_sig += rds_ib_set_wr_signal_state(ic, prev, true);
ic->i_data_op = NULL;
}
@@ -792,6 +791,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
send->s_atomic_wr.swap_mask = 0;
}
+ send->s_wr.send_flags = 0;
nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
send->s_atomic_wr.wr.num_sge = 1;
send->s_atomic_wr.wr.next = NULL;
diff --git a/net/rds/info.h b/net/rds/info.h
index b6c052ca7d22..a069b51c4679 100644
--- a/net/rds/info.h
+++ b/net/rds/info.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDS_INFO_H
#define _RDS_INFO_H
diff --git a/net/rds/loop.h b/net/rds/loop.h
index f32b0939a04d..469fa4b2da4f 100644
--- a/net/rds/loop.h
+++ b/net/rds/loop.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDS_LOOP_H
#define _RDS_LOOP_H
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index ff2010e9d20c..d309c4430124 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDMA_TRANSPORT_H
#define _RDMA_TRANSPORT_H
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 516bcc89b46f..c349c71babff 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDS_RDS_H
#define _RDS_RDS_H
@@ -154,7 +155,7 @@ struct rds_connection {
struct list_head c_map_item;
unsigned long c_map_queued;
- struct rds_conn_path c_path[RDS_MPATH_WORKERS];
+ struct rds_conn_path *c_path;
wait_queue_head_t c_hs_waitq; /* handshake waitq */
u32 c_my_gen_num;
@@ -700,7 +701,7 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
void rds_conn_shutdown(struct rds_conn_path *cpath);
void rds_conn_destroy(struct rds_connection *conn);
void rds_conn_drop(struct rds_connection *conn);
-void rds_conn_path_drop(struct rds_conn_path *cpath);
+void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy);
void rds_conn_connect_if_down(struct rds_connection *conn);
void rds_conn_path_connect_if_down(struct rds_conn_path *cp);
void rds_for_each_conn_info(struct socket *sock, unsigned int len,
diff --git a/net/rds/rds_single_path.h b/net/rds/rds_single_path.h
index e1241af7c1ad..9521f6e99bef 100644
--- a/net/rds/rds_single_path.h
+++ b/net/rds/rds_single_path.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDS_RDS_SINGLE_H
#define _RDS_RDS_SINGLE_H
diff --git a/net/rds/send.c b/net/rds/send.c
index 41b9f0f5bb9c..b52cdc8ae428 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -273,7 +273,7 @@ restart:
len = ntohl(rm->m_inc.i_hdr.h_len);
if (cp->cp_unacked_packets == 0 ||
cp->cp_unacked_bytes < len) {
- __set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
+ set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
cp->cp_unacked_packets =
rds_sysctl_max_unacked_packets;
@@ -428,14 +428,18 @@ over_batch:
* some work and we will skip our goto
*/
if (ret == 0) {
+ bool raced;
+
smp_mb();
+ raced = send_gen != READ_ONCE(cp->cp_send_gen);
+
if ((test_bit(0, &conn->c_map_queued) ||
- !list_empty(&cp->cp_send_queue)) &&
- send_gen == READ_ONCE(cp->cp_send_gen)) {
- rds_stats_inc(s_send_lock_queue_raced);
+ !list_empty(&cp->cp_send_queue)) && !raced) {
if (batch_count < send_batch_count)
goto restart;
queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
+ } else if (raced) {
+ rds_stats_inc(s_send_lock_queue_raced);
}
}
out:
@@ -829,7 +833,7 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
* throughput hits a certain threshold.
*/
if (rs->rs_snd_bytes >= rds_sk_sndbuf(rs) / 2)
- __set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
+ set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
list_add_tail(&rm->m_sock_item, &rs->rs_send_queue);
set_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 431404dbdad1..6b7ee71f40c6 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -592,7 +592,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
continue;
/* reconnect with new parameters */
- rds_conn_path_drop(tc->t_cpath);
+ rds_conn_path_drop(tc->t_cpath, false);
}
spin_unlock_irq(&rds_tcp_conn_lock);
}
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index f8800b7ce79c..1aafbf7c3011 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDS_TCP_H
#define _RDS_TCP_H
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index cbe08a1fa4c7..46f74dad0e16 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -69,14 +69,14 @@ void rds_tcp_state_change(struct sock *sk)
if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) &&
rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
RDS_CONN_ERROR)) {
- rds_conn_path_drop(cp);
+ rds_conn_path_drop(cp, false);
} else {
rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
}
break;
case TCP_CLOSE_WAIT:
case TCP_CLOSE:
- rds_conn_path_drop(cp);
+ rds_conn_path_drop(cp, false);
default:
break;
}
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 0d8616aa5bad..dc860d1bb608 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -157,7 +157,7 @@ out:
"returned %d, "
"disconnecting and reconnecting\n",
&conn->c_faddr, cp->cp_index, ret);
- rds_conn_path_drop(cp);
+ rds_conn_path_drop(cp, false);
}
}
}
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 2852bc1d37d4..f121daa402c8 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -78,7 +78,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
"current state is %d\n",
__func__,
atomic_read(&cp->cp_state));
- rds_conn_path_drop(cp);
+ rds_conn_path_drop(cp, false);
return;
}
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 4a9729257023..6a5c4992cf61 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -318,9 +318,11 @@ void rose_destroy_socket(struct sock *);
/*
* Handler for deferred kills.
*/
-static void rose_destroy_timer(unsigned long data)
+static void rose_destroy_timer(struct timer_list *t)
{
- rose_destroy_socket((struct sock *)data);
+ struct sock *sk = from_timer(sk, t, sk_timer);
+
+ rose_destroy_socket(sk);
}
/*
@@ -353,8 +355,7 @@ void rose_destroy_socket(struct sock *sk)
if (sk_has_allocations(sk)) {
/* Defer: outstanding buffers */
- setup_timer(&sk->sk_timer, rose_destroy_timer,
- (unsigned long)sk);
+ timer_setup(&sk->sk_timer, rose_destroy_timer, 0);
sk->sk_timer.expires = jiffies + 10 * HZ;
add_timer(&sk->sk_timer);
} else
@@ -538,8 +539,8 @@ static int rose_create(struct net *net, struct socket *sock, int protocol,
sock->ops = &rose_proto_ops;
sk->sk_protocol = protocol;
- init_timer(&rose->timer);
- init_timer(&rose->idletimer);
+ timer_setup(&rose->timer, NULL, 0);
+ timer_setup(&rose->idletimer, NULL, 0);
rose->t1 = msecs_to_jiffies(sysctl_rose_call_request_timeout);
rose->t2 = msecs_to_jiffies(sysctl_rose_reset_request_timeout);
@@ -582,8 +583,8 @@ static struct sock *rose_make_new(struct sock *osk)
sk->sk_state = TCP_ESTABLISHED;
sock_copy_flags(sk, osk);
- init_timer(&rose->timer);
- init_timer(&rose->idletimer);
+ timer_setup(&rose->timer, NULL, 0);
+ timer_setup(&rose->idletimer, NULL, 0);
orose = rose_sk(osk);
rose->t1 = orose->t1;
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index 0a6394754e81..9bbbfe325c5a 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -219,6 +219,7 @@ static int rose_state4_machine(struct sock *sk, struct sk_buff *skb, int framety
switch (frametype) {
case ROSE_RESET_REQUEST:
rose_write_internal(sk, ROSE_RESET_CONFIRMATION);
+ /* fall through */
case ROSE_RESET_CONFIRMATION:
rose_stop_timer(sk);
rose_start_idletimer(sk);
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index c76638cc2cd5..cda4c6678ef1 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -27,8 +27,8 @@
#include <linux/interrupt.h>
#include <net/rose.h>
-static void rose_ftimer_expiry(unsigned long);
-static void rose_t0timer_expiry(unsigned long);
+static void rose_ftimer_expiry(struct timer_list *);
+static void rose_t0timer_expiry(struct timer_list *);
static void rose_transmit_restart_confirmation(struct rose_neigh *neigh);
static void rose_transmit_restart_request(struct rose_neigh *neigh);
@@ -37,8 +37,7 @@ void rose_start_ftimer(struct rose_neigh *neigh)
{
del_timer(&neigh->ftimer);
- neigh->ftimer.data = (unsigned long)neigh;
- neigh->ftimer.function = &rose_ftimer_expiry;
+ neigh->ftimer.function = (TIMER_FUNC_TYPE)rose_ftimer_expiry;
neigh->ftimer.expires =
jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout);
@@ -49,8 +48,7 @@ static void rose_start_t0timer(struct rose_neigh *neigh)
{
del_timer(&neigh->t0timer);
- neigh->t0timer.data = (unsigned long)neigh;
- neigh->t0timer.function = &rose_t0timer_expiry;
+ neigh->t0timer.function = (TIMER_FUNC_TYPE)rose_t0timer_expiry;
neigh->t0timer.expires =
jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout);
@@ -77,13 +75,13 @@ static int rose_t0timer_running(struct rose_neigh *neigh)
return timer_pending(&neigh->t0timer);
}
-static void rose_ftimer_expiry(unsigned long param)
+static void rose_ftimer_expiry(struct timer_list *t)
{
}
-static void rose_t0timer_expiry(unsigned long param)
+static void rose_t0timer_expiry(struct timer_list *t)
{
- struct rose_neigh *neigh = (struct rose_neigh *)param;
+ struct rose_neigh *neigh = from_timer(neigh, t, t0timer);
rose_transmit_restart_request(neigh);
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 344456206b70..7af4f99c4a93 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -19,12 +19,13 @@ static struct sk_buff_head loopback_queue;
static struct timer_list loopback_timer;
static void rose_set_loopback_timer(void);
+static void rose_loopback_timer(struct timer_list *unused);
void rose_loopback_init(void)
{
skb_queue_head_init(&loopback_queue);
- init_timer(&loopback_timer);
+ timer_setup(&loopback_timer, rose_loopback_timer, 0);
}
static int rose_loopback_running(void)
@@ -50,20 +51,16 @@ int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
return 1;
}
-static void rose_loopback_timer(unsigned long);
static void rose_set_loopback_timer(void)
{
del_timer(&loopback_timer);
- loopback_timer.data = 0;
- loopback_timer.function = &rose_loopback_timer;
loopback_timer.expires = jiffies + 10;
-
add_timer(&loopback_timer);
}
-static void rose_loopback_timer(unsigned long param)
+static void rose_loopback_timer(struct timer_list *unused)
{
struct sk_buff *skb;
struct net_device *dev;
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 452bbb38d943..8ca3124df83f 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -104,8 +104,8 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route,
skb_queue_head_init(&rose_neigh->queue);
- init_timer(&rose_neigh->ftimer);
- init_timer(&rose_neigh->t0timer);
+ timer_setup(&rose_neigh->ftimer, NULL, 0);
+ timer_setup(&rose_neigh->t0timer, NULL, 0);
if (rose_route->ndigis != 0) {
rose_neigh->digipeat =
@@ -346,6 +346,7 @@ static int rose_del_node(struct rose_route_struct *rose_route,
case 0:
rose_node->neighbour[0] =
rose_node->neighbour[1];
+ /* fall through */
case 1:
rose_node->neighbour[1] =
rose_node->neighbour[2];
@@ -390,8 +391,8 @@ void rose_add_loopback_neigh(void)
skb_queue_head_init(&sn->queue);
- init_timer(&sn->ftimer);
- init_timer(&sn->t0timer);
+ timer_setup(&sn->ftimer, NULL, 0);
+ timer_setup(&sn->t0timer, NULL, 0);
spin_lock_bh(&rose_neigh_list_lock);
sn->next = rose_neigh_list;
@@ -507,6 +508,7 @@ void rose_rt_device_down(struct net_device *dev)
switch (i) {
case 0:
t->neighbour[0] = t->neighbour[1];
+ /* fall through */
case 1:
t->neighbour[1] = t->neighbour[2];
case 2:
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index bc5469d6d9cb..ea613b2a9735 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -28,16 +28,15 @@
#include <linux/interrupt.h>
#include <net/rose.h>
-static void rose_heartbeat_expiry(unsigned long);
-static void rose_timer_expiry(unsigned long);
-static void rose_idletimer_expiry(unsigned long);
+static void rose_heartbeat_expiry(struct timer_list *t);
+static void rose_timer_expiry(struct timer_list *);
+static void rose_idletimer_expiry(struct timer_list *);
void rose_start_heartbeat(struct sock *sk)
{
del_timer(&sk->sk_timer);
- sk->sk_timer.data = (unsigned long)sk;
- sk->sk_timer.function = &rose_heartbeat_expiry;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)rose_heartbeat_expiry;
sk->sk_timer.expires = jiffies + 5 * HZ;
add_timer(&sk->sk_timer);
@@ -49,8 +48,7 @@ void rose_start_t1timer(struct sock *sk)
del_timer(&rose->timer);
- rose->timer.data = (unsigned long)sk;
- rose->timer.function = &rose_timer_expiry;
+ rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
rose->timer.expires = jiffies + rose->t1;
add_timer(&rose->timer);
@@ -62,8 +60,7 @@ void rose_start_t2timer(struct sock *sk)
del_timer(&rose->timer);
- rose->timer.data = (unsigned long)sk;
- rose->timer.function = &rose_timer_expiry;
+ rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
rose->timer.expires = jiffies + rose->t2;
add_timer(&rose->timer);
@@ -75,8 +72,7 @@ void rose_start_t3timer(struct sock *sk)
del_timer(&rose->timer);
- rose->timer.data = (unsigned long)sk;
- rose->timer.function = &rose_timer_expiry;
+ rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
rose->timer.expires = jiffies + rose->t3;
add_timer(&rose->timer);
@@ -88,8 +84,7 @@ void rose_start_hbtimer(struct sock *sk)
del_timer(&rose->timer);
- rose->timer.data = (unsigned long)sk;
- rose->timer.function = &rose_timer_expiry;
+ rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
rose->timer.expires = jiffies + rose->hb;
add_timer(&rose->timer);
@@ -102,8 +97,7 @@ void rose_start_idletimer(struct sock *sk)
del_timer(&rose->idletimer);
if (rose->idle > 0) {
- rose->idletimer.data = (unsigned long)sk;
- rose->idletimer.function = &rose_idletimer_expiry;
+ rose->idletimer.function = (TIMER_FUNC_TYPE)rose_idletimer_expiry;
rose->idletimer.expires = jiffies + rose->idle;
add_timer(&rose->idletimer);
@@ -125,9 +119,9 @@ void rose_stop_idletimer(struct sock *sk)
del_timer(&rose_sk(sk)->idletimer);
}
-static void rose_heartbeat_expiry(unsigned long param)
+static void rose_heartbeat_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct sock *sk = from_timer(sk, t, sk_timer);
struct rose_sock *rose = rose_sk(sk);
bh_lock_sock(sk);
@@ -163,10 +157,10 @@ static void rose_heartbeat_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void rose_timer_expiry(unsigned long param)
+static void rose_timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
- struct rose_sock *rose = rose_sk(sk);
+ struct rose_sock *rose = from_timer(rose, t, timer);
+ struct sock *sk = &rose->sock;
bh_lock_sock(sk);
switch (rose->state) {
@@ -192,9 +186,10 @@ static void rose_timer_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void rose_idletimer_expiry(unsigned long param)
+static void rose_idletimer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct rose_sock *rose = from_timer(rose, t, idletimer);
+ struct sock *sk = &rose->sock;
bh_lock_sock(sk);
rose_clear_queues(sk);
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 9c68d2f8ba39..6ffb7e9887ce 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Linux kernel RxRPC
#
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index a2ad4482376f..9b5c46b052fd 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -246,6 +246,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
ret = 0;
break;
}
+ /* Fall through */
default:
ret = -EBUSY;
break;
@@ -265,6 +266,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
* @tx_total_len: Total length of data to transmit during the call (or -1)
* @gfp: The allocation constraints
* @notify_rx: Where to send notifications instead of socket queue
+ * @upgrade: Request service upgrade for call
*
* Allow a kernel service to begin a call on the nominated socket. This just
* sets up all the internal tracking structures and allocates connection and
@@ -279,7 +281,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
unsigned long user_call_ID,
s64 tx_total_len,
gfp_t gfp,
- rxrpc_notify_rx_t notify_rx)
+ rxrpc_notify_rx_t notify_rx,
+ bool upgrade)
{
struct rxrpc_conn_parameters cp;
struct rxrpc_call *call;
@@ -304,19 +307,29 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
cp.key = key;
cp.security_level = 0;
cp.exclusive = false;
+ cp.upgrade = upgrade;
cp.service_id = srx->srx_service;
call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len,
gfp);
/* The socket has been unlocked. */
- if (!IS_ERR(call))
+ if (!IS_ERR(call)) {
call->notify_rx = notify_rx;
+ mutex_unlock(&call->user_mutex);
+ }
- mutex_unlock(&call->user_mutex);
_leave(" = %p", call);
return call;
}
EXPORT_SYMBOL(rxrpc_kernel_begin_call);
+/*
+ * Dummy function used to stop the notifier talking to recvmsg().
+ */
+static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
+{
+}
+
/**
* rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
* @sock: The socket the call is on
@@ -331,12 +344,108 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
mutex_lock(&call->user_mutex);
rxrpc_release_call(rxrpc_sk(sock->sk), call);
+
+ /* Make sure we're not going to call back into a kernel service */
+ if (call->notify_rx) {
+ spin_lock_bh(&call->notify_lock);
+ call->notify_rx = rxrpc_dummy_notify_rx;
+ spin_unlock_bh(&call->notify_lock);
+ }
+
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put_kernel);
}
EXPORT_SYMBOL(rxrpc_kernel_end_call);
/**
+ * rxrpc_kernel_check_life - Check to see whether a call is still alive
+ * @sock: The socket the call is on
+ * @call: The call to check
+ *
+ * Allow a kernel service to find out whether a call is still alive - ie. we're
+ * getting ACKs from the server. Returns a number representing the life state
+ * which can be compared to that returned by a previous call.
+ *
+ * If this is a client call, ping ACKs will be sent to the server to find out
+ * whether it's still responsive and whether the call is still alive on the
+ * server.
+ */
+u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
+{
+ return call->acks_latest;
+}
+EXPORT_SYMBOL(rxrpc_kernel_check_life);
+
+/**
+ * rxrpc_kernel_check_call - Check a call's state
+ * @sock: The socket the call is on
+ * @call: The call to check
+ * @_compl: Where to store the completion state
+ * @_abort_code: Where to store any abort code
+ *
+ * Allow a kernel service to query the state of a call and find out the manner
+ * of its termination if it has completed. Returns -EINPROGRESS if the call is
+ * still going, 0 if the call finished successfully, -ECONNABORTED if the call
+ * was aborted and an appropriate error if the call failed in some other way.
+ */
+int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call,
+ enum rxrpc_call_completion *_compl, u32 *_abort_code)
+{
+ if (call->state != RXRPC_CALL_COMPLETE)
+ return -EINPROGRESS;
+ smp_rmb();
+ *_compl = call->completion;
+ *_abort_code = call->abort_code;
+ return call->error;
+}
+EXPORT_SYMBOL(rxrpc_kernel_check_call);
+
+/**
+ * rxrpc_kernel_retry_call - Allow a kernel service to retry a call
+ * @sock: The socket the call is on
+ * @call: The call to retry
+ * @srx: The address of the peer to contact
+ * @key: The security context to use (defaults to socket setting)
+ *
+ * Allow a kernel service to try resending a client call that failed due to a
+ * network error to a new address. The Tx queue is maintained intact, thereby
+ * relieving the need to re-encrypt any request data that has already been
+ * buffered.
+ */
+int rxrpc_kernel_retry_call(struct socket *sock, struct rxrpc_call *call,
+ struct sockaddr_rxrpc *srx, struct key *key)
+{
+ struct rxrpc_conn_parameters cp;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ int ret;
+
+ _enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+
+ if (!key)
+ key = rx->key;
+ if (key && !key->payload.data[0])
+ key = NULL; /* a no-security key */
+
+ memset(&cp, 0, sizeof(cp));
+ cp.local = rx->local;
+ cp.key = key;
+ cp.security_level = 0;
+ cp.exclusive = false;
+ cp.service_id = srx->srx_service;
+
+ mutex_lock(&call->user_mutex);
+
+ ret = rxrpc_prepare_call_for_retry(rx, call);
+ if (ret == 0)
+ ret = rxrpc_retry_client_call(rx, call, &cp, srx, GFP_KERNEL);
+
+ mutex_unlock(&call->user_mutex);
+ _leave(" = %d", ret);
+ return ret;
+}
+EXPORT_SYMBOL(rxrpc_kernel_retry_call);
+
+/**
* rxrpc_kernel_new_call_notification - Get notifications of new calls
* @sock: The socket to intercept received messages on
* @notify_new_call: Function to be called when new calls appear
@@ -468,6 +577,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
m->msg_name = &rx->connect_srx;
m->msg_namelen = sizeof(rx->connect_srx);
}
+ /* Fall through */
case RXRPC_SERVER_BOUND:
case RXRPC_SERVER_LISTENING:
ret = rxrpc_do_sendmsg(rx, m, len);
@@ -591,13 +701,13 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *_optlen)
{
int optlen;
-
+
if (level != SOL_RXRPC)
return -EOPNOTSUPP;
if (get_user(optlen, _optlen))
return -EFAULT;
-
+
switch (optname) {
case RXRPC_SUPPORTED_CMSG:
if (optlen < sizeof(int))
@@ -606,7 +716,7 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
put_user(sizeof(int), _optlen))
return -EFAULT;
return 0;
-
+
default:
return -EOPNOTSUPP;
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 69b97339ff9d..b2151993d384 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -15,7 +15,7 @@
#include <net/netns/generic.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
-#include <rxrpc/packet.h>
+#include "protocol.h"
#if 0
#define CHECK_SLAB_OKAY(X) \
@@ -445,6 +445,7 @@ enum rxrpc_call_flag {
RXRPC_CALL_EXPOSED, /* The call was exposed to the world */
RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */
RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */
+ RXRPC_CALL_TX_LASTQ, /* Last packet has been queued */
RXRPC_CALL_SEND_PING, /* A ping will need to be sent */
RXRPC_CALL_PINGING, /* Ping in process */
RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */
@@ -482,18 +483,6 @@ enum rxrpc_call_state {
};
/*
- * Call completion condition (state == RXRPC_CALL_COMPLETE).
- */
-enum rxrpc_call_completion {
- RXRPC_CALL_SUCCEEDED, /* - Normal termination */
- RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
- RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
- RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */
- RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
- NR__RXRPC_CALL_COMPLETIONS
-};
-
-/*
* Call Tx congestion management modes.
*/
enum rxrpc_congest_mode {
@@ -536,6 +525,7 @@ struct rxrpc_call {
unsigned long flags;
unsigned long events;
spinlock_t lock;
+ spinlock_t notify_lock; /* Kernel notification lock */
rwlock_t state_lock; /* lock for state transition */
u32 abort_code; /* Local/remote abort code */
int error; /* Local error incurred */
@@ -687,9 +677,15 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct rxrpc_conn_parameters *,
struct sockaddr_rxrpc *,
unsigned long, s64, gfp_t);
+int rxrpc_retry_client_call(struct rxrpc_sock *,
+ struct rxrpc_call *,
+ struct rxrpc_conn_parameters *,
+ struct sockaddr_rxrpc *,
+ gfp_t);
void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
struct sk_buff *);
void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
+int rxrpc_prepare_call_for_retry(struct rxrpc_sock *, struct rxrpc_call *);
void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
bool __rxrpc_queue_call(struct rxrpc_call *);
bool rxrpc_queue_call(struct rxrpc_call *);
@@ -830,7 +826,6 @@ void rxrpc_process_connection(struct work_struct *);
*/
extern unsigned int rxrpc_connection_expiry;
-int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
struct sk_buff *);
@@ -894,7 +889,7 @@ extern struct key_type key_type_rxrpc_s;
int rxrpc_request_key(struct rxrpc_sock *, char __user *, int);
int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int);
-int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
+int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t,
u32);
/*
@@ -1060,7 +1055,8 @@ static inline void rxrpc_sysctl_exit(void) {}
/*
* utils.c
*/
-int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
+int rxrpc_extract_addr_from_skb(struct rxrpc_local *, struct sockaddr_rxrpc *,
+ struct sk_buff *);
static inline bool before(u32 seq1, u32 seq2)
{
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index dd30d74824b0..cbd1701e813a 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -223,6 +223,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
tail = b->call_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_call *call = b->call_backlog[tail];
+ call->socket = rx;
if (rx->discard_new_call) {
_debug("discard %lx", call->user_call_ID);
rx->discard_new_call(call, call->user_call_ID);
@@ -276,7 +277,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
* anticipation - and to save on stack space.
*/
xpeer = b->peer_backlog[peer_tail];
- if (rxrpc_extract_addr_from_skb(&xpeer->srx, skb) < 0)
+ if (rxrpc_extract_addr_from_skb(local, &xpeer->srx, skb) < 0)
return NULL;
peer = rxrpc_lookup_incoming_peer(local, xpeer);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 7a77844aab16..3574508baf9a 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -386,7 +386,7 @@ recheck_state:
now = ktime_get_real();
if (ktime_before(call->expire_at, now)) {
- rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
+ rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
goto recheck_state;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index d7809a0620b4..4c7fbc6dcce7 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -124,6 +124,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
INIT_LIST_HEAD(&call->sock_link);
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->lock);
+ spin_lock_init(&call->notify_lock);
rwlock_init(&call->state_lock);
atomic_set(&call->usage, 1);
call->debug_id = atomic_inc_return(&rxrpc_debug_id);
@@ -269,11 +270,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
here, NULL);
- spin_lock_bh(&call->conn->params.peer->lock);
- hlist_add_head(&call->error_link,
- &call->conn->params.peer->error_targets);
- spin_unlock_bh(&call->conn->params.peer->lock);
-
rxrpc_start_call_timer(call);
_net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
@@ -304,6 +300,48 @@ error:
}
/*
+ * Retry a call to a new address. It is expected that the Tx queue of the call
+ * will contain data previously packaged for an old call.
+ */
+int rxrpc_retry_client_call(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ struct rxrpc_conn_parameters *cp,
+ struct sockaddr_rxrpc *srx,
+ gfp_t gfp)
+{
+ const void *here = __builtin_return_address(0);
+ int ret;
+
+ /* Set up or get a connection record and set the protocol parameters,
+ * including channel number and call ID.
+ */
+ ret = rxrpc_connect_call(call, cp, srx, gfp);
+ if (ret < 0)
+ goto error;
+
+ trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
+ here, NULL);
+
+ rxrpc_start_call_timer(call);
+
+ _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
+
+ if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+ rxrpc_queue_call(call);
+
+ _leave(" = 0");
+ return 0;
+
+error:
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+ RX_CALL_DEAD, ret);
+ trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
+ here, ERR_PTR(ret));
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
* Set up an incoming call. call->conn points to the connection.
* This is called in BH context and isn't allowed to fail.
*/
@@ -471,6 +509,61 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
}
/*
+ * Prepare a kernel service call for retry.
+ */
+int rxrpc_prepare_call_for_retry(struct rxrpc_sock *rx, struct rxrpc_call *call)
+{
+ const void *here = __builtin_return_address(0);
+ int i;
+ u8 last = 0;
+
+ _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
+
+ trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage),
+ here, (const void *)call->flags);
+
+ ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+ ASSERTCMP(call->completion, !=, RXRPC_CALL_REMOTELY_ABORTED);
+ ASSERTCMP(call->completion, !=, RXRPC_CALL_LOCALLY_ABORTED);
+ ASSERT(list_empty(&call->recvmsg_link));
+
+ del_timer_sync(&call->timer);
+
+ _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, call->conn);
+
+ if (call->conn)
+ rxrpc_disconnect_call(call);
+
+ if (rxrpc_is_service_call(call) ||
+ !call->tx_phase ||
+ call->tx_hard_ack != 0 ||
+ call->rx_hard_ack != 0 ||
+ call->rx_top != 0)
+ return -EINVAL;
+
+ call->state = RXRPC_CALL_UNINITIALISED;
+ call->completion = RXRPC_CALL_SUCCEEDED;
+ call->call_id = 0;
+ call->cid = 0;
+ call->cong_cwnd = 0;
+ call->cong_extra = 0;
+ call->cong_ssthresh = 0;
+ call->cong_mode = 0;
+ call->cong_dup_acks = 0;
+ call->cong_cumul_acks = 0;
+ call->acks_lowest_nak = 0;
+
+ for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
+ last |= call->rxtx_annotations[i];
+ call->rxtx_annotations[i] &= RXRPC_TX_ANNO_LAST;
+ call->rxtx_annotations[i] |= RXRPC_TX_ANNO_RETRANS;
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
* release all the calls associated with a socket
*/
void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index eb2157680399..5f9624bd311c 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -555,7 +555,10 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
write_lock_bh(&call->state_lock);
- call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+ if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags))
+ call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+ else
+ call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
write_unlock_bh(&call->state_lock);
rxrpc_see_call(call);
@@ -688,15 +691,23 @@ int rxrpc_connect_call(struct rxrpc_call *call,
ret = rxrpc_get_client_conn(call, cp, srx, gfp);
if (ret < 0)
- return ret;
+ goto out;
rxrpc_animate_client_conn(rxnet, call->conn);
rxrpc_activate_channels(call->conn);
ret = rxrpc_wait_for_channel(call, gfp);
- if (ret < 0)
+ if (ret < 0) {
rxrpc_disconnect_client_call(call);
+ goto out;
+ }
+
+ spin_lock_bh(&call->conn->params.peer->lock);
+ hlist_add_head(&call->error_link,
+ &call->conn->params.peer->error_targets);
+ spin_unlock_bh(&call->conn->params.peer->lock);
+out:
_leave(" = %d", ret);
return ret;
}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 929b50d5afe8..fe575798592f 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -72,7 +72,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
_enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
- if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+ if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0)
goto not_found;
k.epoch = sp->hdr.epoch;
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index e60fcd2a4a02..f6fcdb3130a1 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -50,12 +50,11 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
else if (conn->proto.index_key > k.index_key)
p = rcu_dereference_raw(p->rb_right);
else
- goto done;
+ break;
conn = NULL;
}
} while (need_seqretry(&peer->service_conn_lock, seq));
-done:
done_seqretry(&peer->service_conn_lock, seq);
_leave(" = %d", conn ? conn->debug_id : -1);
return conn;
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index e56e23ed2229..1b592073ec96 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -298,8 +298,6 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
write_unlock(&call->state_lock);
if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) {
- rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true,
- rxrpc_propose_ack_client_tx_end);
trace_rxrpc_transmit(call, rxrpc_transmit_await_reply);
} else {
trace_rxrpc_transmit(call, rxrpc_transmit_end);
@@ -1125,6 +1123,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
case RXRPC_PACKET_TYPE_BUSY:
if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
goto discard;
+ /* Fall through */
case RXRPC_PACKET_TYPE_DATA:
if (sp->hdr.callNumber == 0)
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 54369225766e..e7f6b8823eb6 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -92,6 +92,7 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep,
const __be32 *xdr, unsigned int toklen)
{
struct rxrpc_key_token *token, **pptoken;
+ time64_t expiry;
size_t plen;
u32 tktlen;
@@ -158,8 +159,9 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep,
pptoken = &(*pptoken)->next)
continue;
*pptoken = token;
- if (token->kad->expiry < prep->expiry)
- prep->expiry = token->kad->expiry;
+ expiry = rxrpc_u32_to_time64(token->kad->expiry);
+ if (expiry < prep->expiry)
+ prep->expiry = expiry;
_leave(" = 0");
return 0;
@@ -433,6 +435,7 @@ static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep,
struct rxrpc_key_token *token, **pptoken;
struct rxk5_key *rxk5;
const __be32 *end_xdr = xdr + (toklen >> 2);
+ time64_t expiry;
int ret;
_enter(",{%x,%x,%x,%x},%u",
@@ -533,8 +536,9 @@ static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep,
pptoken = &(*pptoken)->next)
continue;
*pptoken = token;
- if (token->kad->expiry < prep->expiry)
- prep->expiry = token->kad->expiry;
+ expiry = rxrpc_u32_to_time64(token->k5->endtime);
+ if (expiry < prep->expiry)
+ prep->expiry = expiry;
_leave(" = 0");
return 0;
@@ -691,6 +695,7 @@ static int rxrpc_preparse(struct key_preparsed_payload *prep)
{
const struct rxrpc_key_data_v1 *v1;
struct rxrpc_key_token *token, **pp;
+ time64_t expiry;
size_t plen;
u32 kver;
int ret;
@@ -777,8 +782,9 @@ static int rxrpc_preparse(struct key_preparsed_payload *prep)
while (*pp)
pp = &(*pp)->next;
*pp = token;
- if (token->kad->expiry < prep->expiry)
- prep->expiry = token->kad->expiry;
+ expiry = rxrpc_u32_to_time64(token->kad->expiry);
+ if (expiry < prep->expiry)
+ prep->expiry = expiry;
token = NULL;
ret = 0;
@@ -955,7 +961,7 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
*/
int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
const void *session_key,
- time_t expiry,
+ time64_t expiry,
u32 kvno)
{
const struct cred *cred = current_cred();
@@ -982,7 +988,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
data.kver = 1;
data.v1.security_index = RXRPC_SECURITY_RXKAD;
data.v1.ticket_length = 0;
- data.v1.expiry = expiry;
+ data.v1.expiry = rxrpc_time64_to_u32(expiry);
data.v1.kvno = 0;
memcpy(&data.v1.session_key, session_key, sizeof(data.v1.session_key));
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 540d3955c1bc..93b5d910b4a1 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -39,7 +39,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
_enter("");
- if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+ if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0)
return;
msg.msg_name = &srx.transport;
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 5bd2d0fa4a03..f47659c7b224 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -35,7 +35,8 @@ struct rxrpc_abort_buffer {
/*
* Fill out an ACK packet.
*/
-static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
+static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
+ struct rxrpc_call *call,
struct rxrpc_ack_buffer *pkt,
rxrpc_seq_t *_hard_ack,
rxrpc_seq_t *_top,
@@ -77,8 +78,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
} while (before_eq(seq, top));
}
- mtu = call->conn->params.peer->if_mtu;
- mtu -= call->conn->params.peer->hdrsize;
+ mtu = conn->params.peer->if_mtu;
+ mtu -= conn->params.peer->hdrsize;
jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
pkt->ackinfo.maxMTU = htonl(mtu);
@@ -148,7 +149,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
}
call->ackr_reason = 0;
}
- n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason);
+ n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason);
spin_unlock_bh(&call->lock);
@@ -221,6 +222,16 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
rxrpc_serial_t serial;
int ret;
+ /* Don't bother sending aborts for a client call once the server has
+ * hard-ACK'd all of its request data. After that point, we're not
+ * going to stop the operation proceeding, and whilst we might limit
+ * the reply, it's not worth it if we can send a new call on the same
+ * channel instead, thereby closing off this call.
+ */
+ if (rxrpc_is_client_call(call) &&
+ test_bit(RXRPC_CALL_TX_LAST, &call->flags))
+ return 0;
+
spin_lock_bh(&call->lock);
if (call->conn)
conn = rxrpc_get_connection_maybe(call->conn);
@@ -444,7 +455,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
sp = rxrpc_skb(skb);
- if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
+ if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) {
msg.msg_namelen = srx.transport_len;
code = htonl(skb->priority);
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 1ed9c0c2e94f..7f749505e699 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -37,6 +37,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
memset(&srx, 0, sizeof(srx));
srx.transport_type = local->srx.transport_type;
+ srx.transport_len = local->srx.transport_len;
srx.transport.family = local->srx.transport.family;
/* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
@@ -45,7 +46,6 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
switch (srx.transport.family) {
case AF_INET:
srx.transport.sin.sin_port = serr->port;
- srx.transport_len = sizeof(struct sockaddr_in);
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP:
_net("Rx ICMP");
@@ -69,7 +69,6 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
#ifdef CONFIG_AF_RXRPC_IPV6
case AF_INET6:
srx.transport.sin6.sin6_port = serr->port;
- srx.transport_len = sizeof(struct sockaddr_in6);
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP6:
_net("Rx ICMP6");
@@ -79,6 +78,9 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
break;
case SO_EE_ORIGIN_ICMP:
_net("Rx ICMP on v6 sock");
+ srx.transport.sin6.sin6_addr.s6_addr32[0] = 0;
+ srx.transport.sin6.sin6_addr.s6_addr32[1] = 0;
+ srx.transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in_addr));
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 5787f97f5330..d02a99f37f5f 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -411,3 +411,16 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
*_srx = call->peer->srx;
}
EXPORT_SYMBOL(rxrpc_kernel_get_peer);
+
+/**
+ * rxrpc_kernel_get_rtt - Get a call's peer RTT
+ * @sock: The socket on which the call is in progress.
+ * @call: The call to query
+ *
+ * Get the call's peer RTT.
+ */
+u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call)
+{
+ return call->peer->rtt;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_rtt);
diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h
new file mode 100644
index 000000000000..4bddcf3face3
--- /dev/null
+++ b/net/rxrpc/protocol.h
@@ -0,0 +1,190 @@
+/* packet.h: Rx packet layout and definitions
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_RXRPC_PACKET_H
+#define _LINUX_RXRPC_PACKET_H
+
+typedef u32 rxrpc_seq_t; /* Rx message sequence number */
+typedef u32 rxrpc_serial_t; /* Rx message serial number */
+typedef __be32 rxrpc_seq_net_t; /* on-the-wire Rx message sequence number */
+typedef __be32 rxrpc_serial_net_t; /* on-the-wire Rx message serial number */
+
+/*****************************************************************************/
+/*
+ * on-the-wire Rx packet header
+ * - all multibyte fields should be in network byte order
+ */
+struct rxrpc_wire_header {
+ __be32 epoch; /* client boot timestamp */
+#define RXRPC_RANDOM_EPOCH 0x80000000 /* Random if set, date-based if not */
+
+ __be32 cid; /* connection and channel ID */
+#define RXRPC_MAXCALLS 4 /* max active calls per conn */
+#define RXRPC_CHANNELMASK (RXRPC_MAXCALLS-1) /* mask for channel ID */
+#define RXRPC_CIDMASK (~RXRPC_CHANNELMASK) /* mask for connection ID */
+#define RXRPC_CIDSHIFT ilog2(RXRPC_MAXCALLS) /* shift for connection ID */
+#define RXRPC_CID_INC (1 << RXRPC_CIDSHIFT) /* connection ID increment */
+
+ __be32 callNumber; /* call ID (0 for connection-level packets) */
+ __be32 seq; /* sequence number of pkt in call stream */
+ __be32 serial; /* serial number of pkt sent to network */
+
+ uint8_t type; /* packet type */
+#define RXRPC_PACKET_TYPE_DATA 1 /* data */
+#define RXRPC_PACKET_TYPE_ACK 2 /* ACK */
+#define RXRPC_PACKET_TYPE_BUSY 3 /* call reject */
+#define RXRPC_PACKET_TYPE_ABORT 4 /* call/connection abort */
+#define RXRPC_PACKET_TYPE_ACKALL 5 /* ACK all outstanding packets on call */
+#define RXRPC_PACKET_TYPE_CHALLENGE 6 /* connection security challenge (SRVR->CLNT) */
+#define RXRPC_PACKET_TYPE_RESPONSE 7 /* connection secutity response (CLNT->SRVR) */
+#define RXRPC_PACKET_TYPE_DEBUG 8 /* debug info request */
+#define RXRPC_PACKET_TYPE_VERSION 13 /* version string request */
+#define RXRPC_N_PACKET_TYPES 14 /* number of packet types (incl type 0) */
+
+ uint8_t flags; /* packet flags */
+#define RXRPC_CLIENT_INITIATED 0x01 /* signifies a packet generated by a client */
+#define RXRPC_REQUEST_ACK 0x02 /* request an unconditional ACK of this packet */
+#define RXRPC_LAST_PACKET 0x04 /* the last packet from this side for this call */
+#define RXRPC_MORE_PACKETS 0x08 /* more packets to come */
+#define RXRPC_JUMBO_PACKET 0x20 /* [DATA] this is a jumbo packet */
+#define RXRPC_SLOW_START_OK 0x20 /* [ACK] slow start supported */
+
+ uint8_t userStatus; /* app-layer defined status */
+#define RXRPC_USERSTATUS_SERVICE_UPGRADE 0x01 /* AuriStor service upgrade request */
+
+ uint8_t securityIndex; /* security protocol ID */
+ union {
+ __be16 _rsvd; /* reserved */
+ __be16 cksum; /* kerberos security checksum */
+ };
+ __be16 serviceId; /* service ID */
+
+} __packed;
+
+#define RXRPC_SUPPORTED_PACKET_TYPES ( \
+ (1 << RXRPC_PACKET_TYPE_DATA) | \
+ (1 << RXRPC_PACKET_TYPE_ACK) | \
+ (1 << RXRPC_PACKET_TYPE_BUSY) | \
+ (1 << RXRPC_PACKET_TYPE_ABORT) | \
+ (1 << RXRPC_PACKET_TYPE_ACKALL) | \
+ (1 << RXRPC_PACKET_TYPE_CHALLENGE) | \
+ (1 << RXRPC_PACKET_TYPE_RESPONSE) | \
+ /*(1 << RXRPC_PACKET_TYPE_DEBUG) | */ \
+ (1 << RXRPC_PACKET_TYPE_VERSION))
+
+/*****************************************************************************/
+/*
+ * jumbo packet secondary header
+ * - can be mapped to read header by:
+ * - new_serial = serial + 1
+ * - new_seq = seq + 1
+ * - new_flags = j_flags
+ * - new__rsvd = j__rsvd
+ * - duplicating all other fields
+ */
+struct rxrpc_jumbo_header {
+ uint8_t flags; /* packet flags (as per rxrpc_header) */
+ uint8_t pad;
+ union {
+ __be16 _rsvd; /* reserved */
+ __be16 cksum; /* kerberos security checksum */
+ };
+};
+
+#define RXRPC_JUMBO_DATALEN 1412 /* non-terminal jumbo packet data length */
+#define RXRPC_JUMBO_SUBPKTLEN (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header))
+
+/*****************************************************************************/
+/*
+ * on-the-wire Rx ACK packet data payload
+ * - all multibyte fields should be in network byte order
+ */
+struct rxrpc_ackpacket {
+ __be16 bufferSpace; /* number of packet buffers available */
+ __be16 maxSkew; /* diff between serno being ACK'd and highest serial no
+ * received */
+ __be32 firstPacket; /* sequence no of first ACK'd packet in attached list */
+ __be32 previousPacket; /* sequence no of previous packet received */
+ __be32 serial; /* serial no of packet that prompted this ACK */
+
+ uint8_t reason; /* reason for ACK */
+#define RXRPC_ACK_REQUESTED 1 /* ACK was requested on packet */
+#define RXRPC_ACK_DUPLICATE 2 /* duplicate packet received */
+#define RXRPC_ACK_OUT_OF_SEQUENCE 3 /* out of sequence packet received */
+#define RXRPC_ACK_EXCEEDS_WINDOW 4 /* packet received beyond end of ACK window */
+#define RXRPC_ACK_NOSPACE 5 /* packet discarded due to lack of buffer space */
+#define RXRPC_ACK_PING 6 /* keep alive ACK */
+#define RXRPC_ACK_PING_RESPONSE 7 /* response to RXRPC_ACK_PING */
+#define RXRPC_ACK_DELAY 8 /* nothing happened since received packet */
+#define RXRPC_ACK_IDLE 9 /* ACK due to fully received ACK window */
+#define RXRPC_ACK__INVALID 10 /* Representation of invalid ACK reason */
+
+ uint8_t nAcks; /* number of ACKs */
+#define RXRPC_MAXACKS 255
+
+ uint8_t acks[0]; /* list of ACK/NAKs */
+#define RXRPC_ACK_TYPE_NACK 0
+#define RXRPC_ACK_TYPE_ACK 1
+
+} __packed;
+
+/* Some ACKs refer to specific packets and some are general and can be updated. */
+#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED) | \
+ (1 << RXRPC_ACK_PING_RESPONSE) | \
+ (1 << RXRPC_ACK_DELAY) | \
+ (1 << RXRPC_ACK_IDLE))
+
+
+/*
+ * ACK packets can have a further piece of information tagged on the end
+ */
+struct rxrpc_ackinfo {
+ __be32 rxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */
+ __be32 maxMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */
+ __be32 rwind; /* Rx window size (packets) [AFS 3.4] */
+ __be32 jumbo_max; /* max packets to stick into a jumbo packet [AFS 3.5] */
+};
+
+/*****************************************************************************/
+/*
+ * Kerberos security type-2 challenge packet
+ */
+struct rxkad_challenge {
+ __be32 version; /* version of this challenge type */
+ __be32 nonce; /* encrypted random number */
+ __be32 min_level; /* minimum security level */
+ __be32 __padding; /* padding to 8-byte boundary */
+} __packed;
+
+/*****************************************************************************/
+/*
+ * Kerberos security type-2 response packet
+ */
+struct rxkad_response {
+ __be32 version; /* version of this response type */
+ __be32 __pad;
+
+ /* encrypted bit of the response */
+ struct {
+ __be32 epoch; /* current epoch */
+ __be32 cid; /* parent connection ID */
+ __be32 checksum; /* checksum */
+ __be32 securityIndex; /* security type */
+ __be32 call_id[4]; /* encrypted call IDs */
+ __be32 inc_nonce; /* challenge nonce + 1 */
+ __be32 level; /* desired level */
+ } encrypted;
+
+ __be32 kvno; /* Kerberos key version number */
+ __be32 ticket_len; /* Kerberos ticket length */
+} __packed;
+
+#endif /* _LINUX_RXRPC_PACKET_H */
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index bdece21f313d..8510a98b87e1 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -40,7 +40,9 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
sk = &rx->sk;
if (rx && sk->sk_state < RXRPC_CLOSE) {
if (call->notify_rx) {
+ spin_lock_bh(&call->notify_lock);
call->notify_rx(sk, call, call->user_call_ID);
+ spin_unlock_bh(&call->notify_lock);
} else {
write_lock_bh(&rx->recvmsg_lock);
if (list_empty(&call->recvmsg_link)) {
@@ -607,6 +609,7 @@ wait_error:
* @_offset: The running offset into the buffer.
* @want_more: True if more data is expected to be read
* @_abort: Where the abort code is stored if -ECONNABORTED is returned
+ * @_service: Where to store the actual service ID (may be upgraded)
*
* Allow a kernel service to receive data and pick up information about the
* state of a call. Returns 0 if got what was asked for and there's more
@@ -624,7 +627,7 @@ wait_error:
*/
int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
void *buf, size_t size, size_t *_offset,
- bool want_more, u32 *_abort)
+ bool want_more, u32 *_abort, u16 *_service)
{
struct iov_iter iter;
struct kvec iov;
@@ -680,6 +683,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
read_phase_complete:
ret = 1;
out:
+ if (_service)
+ *_service = call->service_id;
mutex_unlock(&call->user_mutex);
_leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
return ret;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 46d1a1f0b55b..c38b3a1de56c 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -634,8 +634,8 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
challenge.min_level = htonl(0);
challenge.__padding = 0;
- msg.msg_name = &conn->params.peer->srx.transport.sin;
- msg.msg_namelen = sizeof(conn->params.peer->srx.transport.sin);
+ msg.msg_name = &conn->params.peer->srx.transport;
+ msg.msg_namelen = conn->params.peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -689,8 +689,8 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
_enter("");
- msg.msg_name = &conn->params.peer->srx.transport.sin;
- msg.msg_namelen = sizeof(conn->params.peer->srx.transport.sin);
+ msg.msg_name = &conn->params.peer->srx.transport;
+ msg.msg_namelen = conn->params.peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -854,7 +854,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
struct sk_buff *skb,
void *ticket, size_t ticket_len,
struct rxrpc_crypt *_session_key,
- time_t *_expiry,
+ time64_t *_expiry,
u32 *_abort_code)
{
struct skcipher_request *req;
@@ -864,7 +864,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
struct in_addr addr;
unsigned int life;
const char *eproto;
- time_t issue, now;
+ time64_t issue, now;
bool little_endian;
int ret;
u32 abort_code;
@@ -960,15 +960,15 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
if (little_endian) {
__le32 stamp;
memcpy(&stamp, p, 4);
- issue = le32_to_cpu(stamp);
+ issue = rxrpc_u32_to_time64(le32_to_cpu(stamp));
} else {
__be32 stamp;
memcpy(&stamp, p, 4);
- issue = be32_to_cpu(stamp);
+ issue = rxrpc_u32_to_time64(be32_to_cpu(stamp));
}
p += 4;
- now = get_seconds();
- _debug("KIV ISSUE: %lx [%lx]", issue, now);
+ now = ktime_get_real_seconds();
+ _debug("KIV ISSUE: %llx [%llx]", issue, now);
/* check the ticket is in date */
if (issue > now) {
@@ -1053,7 +1053,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt session_key;
const char *eproto;
- time_t expiry;
+ time64_t expiry;
void *ticket;
u32 abort_code, version, kvno, ticket_len, level;
__be32 csum;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index b0d2cda6ec0a..7d2595582c09 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -38,12 +38,86 @@ struct rxrpc_send_params {
};
/*
+ * Wait for space to appear in the Tx queue or a signal to occur.
+ */
+static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ long *timeo)
+{
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (call->tx_top - call->tx_hard_ack <
+ min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra))
+ return 0;
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return call->error;
+
+ if (signal_pending(current))
+ return sock_intr_errno(*timeo);
+
+ trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ mutex_unlock(&call->user_mutex);
+ *timeo = schedule_timeout(*timeo);
+ if (mutex_lock_interruptible(&call->user_mutex) < 0)
+ return sock_intr_errno(*timeo);
+ }
+}
+
+/*
+ * Wait for space to appear in the Tx queue uninterruptibly, but with
+ * a timeout of 2*RTT if no progress was made and a signal occurred.
+ */
+static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
+ struct rxrpc_call *call)
+{
+ rxrpc_seq_t tx_start, tx_win;
+ signed long rtt2, timeout;
+ u64 rtt;
+
+ rtt = READ_ONCE(call->peer->rtt);
+ rtt2 = nsecs_to_jiffies64(rtt) * 2;
+ if (rtt2 < 1)
+ rtt2 = 1;
+
+ timeout = rtt2;
+ tx_start = READ_ONCE(call->tx_hard_ack);
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ tx_win = READ_ONCE(call->tx_hard_ack);
+ if (call->tx_top - tx_win <
+ min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra))
+ return 0;
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return call->error;
+
+ if (timeout == 0 &&
+ tx_win == tx_start && signal_pending(current))
+ return -EINTR;
+
+ if (tx_win != tx_start) {
+ timeout = rtt2;
+ tx_start = tx_win;
+ }
+
+ trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ timeout = schedule_timeout(timeout);
+ }
+}
+
+/*
* wait for space to appear in the transmit/ACK window
* - caller holds the socket locked
*/
static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
struct rxrpc_call *call,
- long *timeo)
+ long *timeo,
+ bool waitall)
{
DECLARE_WAITQUEUE(myself, current);
int ret;
@@ -53,30 +127,10 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
add_wait_queue(&call->waitq, &myself);
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- ret = 0;
- if (call->tx_top - call->tx_hard_ack <
- min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra))
- break;
- if (call->state >= RXRPC_CALL_COMPLETE) {
- ret = -call->error;
- break;
- }
- if (signal_pending(current)) {
- ret = sock_intr_errno(*timeo);
- break;
- }
-
- trace_rxrpc_transmit(call, rxrpc_transmit_wait);
- mutex_unlock(&call->user_mutex);
- *timeo = schedule_timeout(*timeo);
- if (mutex_lock_interruptible(&call->user_mutex) < 0) {
- ret = sock_intr_errno(*timeo);
- break;
- }
- }
+ if (waitall)
+ ret = rxrpc_wait_for_tx_window_nonintr(rx, call);
+ else
+ ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
remove_wait_queue(&call->waitq, &myself);
set_current_state(TASK_RUNNING);
@@ -101,11 +155,23 @@ static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix)
}
/*
+ * Notify the owner of the call that the transmit phase is ended and the last
+ * packet has been queued.
+ */
+static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call,
+ rxrpc_notify_end_tx_t notify_end_tx)
+{
+ if (notify_end_tx)
+ notify_end_tx(&rx->sk, call, call->user_call_ID);
+}
+
+/*
* Queue a DATA packet for transmission, set the resend timeout and send the
* packet immediately
*/
-static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
- bool last)
+static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
+ struct sk_buff *skb, bool last,
+ rxrpc_notify_end_tx_t notify_end_tx)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
rxrpc_seq_t seq = sp->hdr.seq;
@@ -116,8 +182,10 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
ASSERTCMP(seq, ==, call->tx_top + 1);
- if (last)
+ if (last) {
annotation |= RXRPC_TX_ANNO_LAST;
+ set_bit(RXRPC_CALL_TX_LASTQ, &call->flags);
+ }
/* We have to set the timestamp before queueing as the retransmit
* algorithm can see the packet as soon as we queue it.
@@ -141,6 +209,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
switch (call->state) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+ rxrpc_notify_end_tx(rx, call, notify_end_tx);
break;
case RXRPC_CALL_SERVER_ACK_REQUEST:
call->state = RXRPC_CALL_SERVER_SEND_REPLY;
@@ -151,8 +220,10 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
ktime_get_real());
if (!last)
break;
+ /* Fall through */
case RXRPC_CALL_SERVER_SEND_REPLY:
call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
+ rxrpc_notify_end_tx(rx, call, notify_end_tx);
break;
default:
break;
@@ -189,7 +260,8 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
*/
static int rxrpc_send_data(struct rxrpc_sock *rx,
struct rxrpc_call *call,
- struct msghdr *msg, size_t len)
+ struct msghdr *msg, size_t len,
+ rxrpc_notify_end_tx_t notify_end_tx)
{
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
@@ -237,7 +309,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
if (msg->msg_flags & MSG_DONTWAIT)
goto maybe_error;
ret = rxrpc_wait_for_tx_window(rx, call,
- &timeo);
+ &timeo,
+ msg->msg_flags & MSG_WAITALL);
if (ret < 0)
goto maybe_error;
}
@@ -311,11 +384,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
call->tx_total_len -= copy;
}
- /* check for the far side aborting the call or a network error
- * occurring */
- if (call->state == RXRPC_CALL_COMPLETE)
- goto call_terminated;
-
/* add the packet to the send queue if it's now full */
if (sp->remain <= 0 ||
(msg_data_left(msg) == 0 && !more)) {
@@ -350,9 +418,21 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
if (ret < 0)
goto out;
- rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
+ rxrpc_queue_packet(rx, call, skb,
+ !msg_data_left(msg) && !more,
+ notify_end_tx);
skb = NULL;
}
+
+ /* Check for the far side aborting the call or a network error
+ * occurring. If this happens, save any packet that was under
+ * construction so that in the case of a network error, the
+ * call can be retried or redirected.
+ */
+ if (call->state == RXRPC_CALL_COMPLETE) {
+ ret = call->error;
+ goto out;
+ }
} while (msg_data_left(msg) > 0);
success:
@@ -362,11 +442,6 @@ out:
_leave(" = %d", ret);
return ret;
-call_terminated:
- rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
- _leave(" = %d", -call->error);
- return -call->error;
-
maybe_error:
if (copied)
goto success;
@@ -611,7 +686,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
/* Reply phase not begun or not complete for service call. */
ret = -EPROTO;
} else {
- ret = rxrpc_send_data(rx, call, msg, len);
+ ret = rxrpc_send_data(rx, call, msg, len, NULL);
}
mutex_unlock(&call->user_mutex);
@@ -631,6 +706,7 @@ error_release_sock:
* @call: The call to send data through
* @msg: The data to send
* @len: The amount of data to send
+ * @notify_end_tx: Notification that the last packet is queued.
*
* Allow a kernel service to send data on a call. The call must be in an state
* appropriate to sending data. No control data should be supplied in @msg,
@@ -638,7 +714,8 @@ error_release_sock:
* more data to come, otherwise this data will end the transmission phase.
*/
int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
- struct msghdr *msg, size_t len)
+ struct msghdr *msg, size_t len,
+ rxrpc_notify_end_tx_t notify_end_tx)
{
int ret;
@@ -656,11 +733,12 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_SERVER_ACK_REQUEST:
case RXRPC_CALL_SERVER_SEND_REPLY:
- ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
+ ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
+ notify_end_tx);
break;
case RXRPC_CALL_COMPLETE:
read_lock_bh(&call->state_lock);
- ret = -call->error;
+ ret = call->error;
read_unlock_bh(&call->state_lock);
break;
default:
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
index ff7af71c4b49..e801171fa351 100644
--- a/net/rxrpc/utils.c
+++ b/net/rxrpc/utils.c
@@ -17,17 +17,28 @@
/*
* Fill out a peer address from a socket buffer containing a packet.
*/
-int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
+int rxrpc_extract_addr_from_skb(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx,
+ struct sk_buff *skb)
{
memset(srx, 0, sizeof(*srx));
switch (ntohs(skb->protocol)) {
case ETH_P_IP:
- srx->transport_type = SOCK_DGRAM;
- srx->transport_len = sizeof(srx->transport.sin);
- srx->transport.sin.sin_family = AF_INET;
- srx->transport.sin.sin_port = udp_hdr(skb)->source;
- srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ if (local->srx.transport.family == AF_INET6) {
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin6);
+ srx->transport.sin6.sin6_family = AF_INET6;
+ srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
+ srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+ srx->transport.sin6.sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr;
+ } else {
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.sin.sin_family = AF_INET;
+ srx->transport.sin.sin_port = udp_hdr(skb)->source;
+ srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ }
return 0;
#ifdef CONFIG_AF_RXRPC_IPV6
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index e70ed26485a2..c03d86a7775e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -172,6 +172,17 @@ config NET_SCH_TBF
To compile this code as a module, choose M here: the
module will be called sch_tbf.
+config NET_SCH_CBS
+ tristate "Credit Based Shaper (CBS)"
+ ---help---
+ Say Y here if you want to use the Credit Based Shaper (CBS) packet
+ scheduling algorithm.
+
+ See the top of <file:net/sched/sch_cbs.c> for more details.
+
+ To compile this code as a module, choose M here: the
+ module will be called sch_cbs.
+
config NET_SCH_GRED
tristate "Generic Random Early Detection (GRED)"
---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7b915d226de7..5b635447e3f8 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux Traffic Control Unit.
#
@@ -52,6 +53,7 @@ obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o
obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o
obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o
+obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f2e9ed34a963..4d33a50a8a6d 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -21,6 +21,8 @@
#include <linux/kmod.h>
#include <linux/err.h>
#include <linux/module.h>
+#include <linux/rhashtable.h>
+#include <linux/list.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/sch_generic.h>
@@ -53,10 +55,13 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a,
res->goto_tp = rcu_dereference_bh(chain->filter_chain);
}
-static void free_tcf(struct rcu_head *head)
+/* XXX: For standalone actions, we don't need a RCU grace period either, because
+ * actions are always connected to filters and filters are already destroyed in
+ * RCU callbacks, so after a RCU grace period actions are already disconnected
+ * from filters. Readers later can not find us.
+ */
+static void free_tcf(struct tc_action *p)
{
- struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu);
-
free_percpu(p->cpu_bstats);
free_percpu(p->cpu_qstats);
@@ -70,23 +75,21 @@ static void free_tcf(struct rcu_head *head)
kfree(p);
}
-static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p)
+static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
{
- spin_lock_bh(&hinfo->lock);
- hlist_del(&p->tcfa_head);
- spin_unlock_bh(&hinfo->lock);
+ spin_lock_bh(&idrinfo->lock);
+ idr_remove_ext(&idrinfo->action_idr, p->tcfa_index);
+ spin_unlock_bh(&idrinfo->lock);
gen_kill_estimator(&p->tcfa_rate_est);
- /*
- * gen_estimator est_timer() might access p->tcfa_lock
- * or bstats, wait a RCU grace period before freeing p
- */
- call_rcu(&p->tcfa_rcu, free_tcf);
+ free_tcf(p);
}
-int __tcf_hash_release(struct tc_action *p, bool bind, bool strict)
+int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
{
int ret = 0;
+ ASSERT_RTNL();
+
if (p) {
if (bind)
p->tcfa_bindcnt--;
@@ -97,55 +100,64 @@ int __tcf_hash_release(struct tc_action *p, bool bind, bool strict)
if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
if (p->ops->cleanup)
p->ops->cleanup(p, bind);
- tcf_hash_destroy(p->hinfo, p);
+ tcf_idr_remove(p->idrinfo, p);
ret = ACT_P_DELETED;
}
}
return ret;
}
-EXPORT_SYMBOL(__tcf_hash_release);
+EXPORT_SYMBOL(__tcf_idr_release);
-static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
+static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
struct netlink_callback *cb)
{
- int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+ int err = 0, index = -1, s_i = 0, n_i = 0;
+ u32 act_flags = cb->args[2];
+ unsigned long jiffy_since = cb->args[3];
struct nlattr *nest;
+ struct idr *idr = &idrinfo->action_idr;
+ struct tc_action *p;
+ unsigned long id = 1;
- spin_lock_bh(&hinfo->lock);
+ spin_lock_bh(&idrinfo->lock);
s_i = cb->args[0];
- for (i = 0; i < (hinfo->hmask + 1); i++) {
- struct hlist_head *head;
- struct tc_action *p;
-
- head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
-
- hlist_for_each_entry_rcu(p, head, tcfa_head) {
- index++;
- if (index < s_i)
- continue;
-
- nest = nla_nest_start(skb, n_i);
- if (nest == NULL)
- goto nla_put_failure;
- err = tcf_action_dump_1(skb, p, 0, 0);
- if (err < 0) {
- index--;
- nlmsg_trim(skb, nest);
- goto done;
- }
- nla_nest_end(skb, nest);
- n_i++;
- if (n_i >= TCA_ACT_MAX_PRIO)
- goto done;
+ idr_for_each_entry_ext(idr, p, id) {
+ index++;
+ if (index < s_i)
+ continue;
+
+ if (jiffy_since &&
+ time_after(jiffy_since,
+ (unsigned long)p->tcfa_tm.lastuse))
+ continue;
+
+ nest = nla_nest_start(skb, n_i);
+ if (!nest)
+ goto nla_put_failure;
+ err = tcf_action_dump_1(skb, p, 0, 0);
+ if (err < 0) {
+ index--;
+ nlmsg_trim(skb, nest);
+ goto done;
}
+ nla_nest_end(skb, nest);
+ n_i++;
+ if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
+ n_i >= TCA_ACT_MAX_PRIO)
+ goto done;
}
done:
- spin_unlock_bh(&hinfo->lock);
- if (n_i)
- cb->args[0] += n_i;
+ if (index >= 0)
+ cb->args[0] = index + 1;
+
+ spin_unlock_bh(&idrinfo->lock);
+ if (n_i) {
+ if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
+ cb->args[1] = n_i;
+ }
return n_i;
nla_put_failure:
@@ -153,31 +165,29 @@ nla_put_failure:
goto done;
}
-static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
+static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
const struct tc_action_ops *ops)
{
struct nlattr *nest;
- int i = 0, n_i = 0;
+ int n_i = 0;
int ret = -EINVAL;
+ struct idr *idr = &idrinfo->action_idr;
+ struct tc_action *p;
+ unsigned long id = 1;
nest = nla_nest_start(skb, 0);
if (nest == NULL)
goto nla_put_failure;
if (nla_put_string(skb, TCA_KIND, ops->kind))
goto nla_put_failure;
- for (i = 0; i < (hinfo->hmask + 1); i++) {
- struct hlist_head *head;
- struct hlist_node *n;
- struct tc_action *p;
-
- head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
- hlist_for_each_entry_safe(p, n, head, tcfa_head) {
- ret = __tcf_hash_release(p, false, true);
- if (ret == ACT_P_DELETED) {
- module_put(p->ops->owner);
- n_i++;
- } else if (ret < 0)
- goto nla_put_failure;
+
+ idr_for_each_entry_ext(idr, p, id) {
+ ret = __tcf_idr_release(p, false, true);
+ if (ret == ACT_P_DELETED) {
+ module_put(ops->owner);
+ n_i++;
+ } else if (ret < 0) {
+ goto nla_put_failure;
}
}
if (nla_put_u32(skb, TCA_FCNT, n_i))
@@ -194,12 +204,12 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
struct netlink_callback *cb, int type,
const struct tc_action_ops *ops)
{
- struct tcf_hashinfo *hinfo = tn->hinfo;
+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
if (type == RTM_DELACTION) {
- return tcf_del_walker(hinfo, skb, ops);
+ return tcf_del_walker(idrinfo, skb, ops);
} else if (type == RTM_GETACTION) {
- return tcf_dump_walker(hinfo, skb, cb);
+ return tcf_dump_walker(idrinfo, skb, cb);
} else {
WARN(1, "tcf_generic_walker: unknown action %d\n", type);
return -EINVAL;
@@ -207,40 +217,21 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
}
EXPORT_SYMBOL(tcf_generic_walker);
-static struct tc_action *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
+static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo)
{
struct tc_action *p = NULL;
- struct hlist_head *head;
- spin_lock_bh(&hinfo->lock);
- head = &hinfo->htab[tcf_hash(index, hinfo->hmask)];
- hlist_for_each_entry_rcu(p, head, tcfa_head)
- if (p->tcfa_index == index)
- break;
- spin_unlock_bh(&hinfo->lock);
+ spin_lock_bh(&idrinfo->lock);
+ p = idr_find_ext(&idrinfo->action_idr, index);
+ spin_unlock_bh(&idrinfo->lock);
return p;
}
-u32 tcf_hash_new_index(struct tc_action_net *tn)
-{
- struct tcf_hashinfo *hinfo = tn->hinfo;
- u32 val = hinfo->index;
-
- do {
- if (++val == 0)
- val = 1;
- } while (tcf_hash_lookup(val, hinfo));
-
- hinfo->index = val;
- return val;
-}
-EXPORT_SYMBOL(tcf_hash_new_index);
-
-int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
+int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
{
- struct tcf_hashinfo *hinfo = tn->hinfo;
- struct tc_action *p = tcf_hash_lookup(index, hinfo);
+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
+ struct tc_action *p = tcf_idr_lookup(index, idrinfo);
if (p) {
*a = p;
@@ -248,15 +239,15 @@ int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
}
return 0;
}
-EXPORT_SYMBOL(tcf_hash_search);
+EXPORT_SYMBOL(tcf_idr_search);
-bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
- int bind)
+bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
+ int bind)
{
- struct tcf_hashinfo *hinfo = tn->hinfo;
- struct tc_action *p = NULL;
+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
+ struct tc_action *p = tcf_idr_lookup(index, idrinfo);
- if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
+ if (index && p) {
if (bind)
p->tcfa_bindcnt++;
p->tcfa_refcnt++;
@@ -265,23 +256,25 @@ bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
}
return false;
}
-EXPORT_SYMBOL(tcf_hash_check);
+EXPORT_SYMBOL(tcf_idr_check);
-void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
+void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est)
{
if (est)
gen_kill_estimator(&a->tcfa_rate_est);
- call_rcu(&a->tcfa_rcu, free_tcf);
+ free_tcf(a);
}
-EXPORT_SYMBOL(tcf_hash_cleanup);
+EXPORT_SYMBOL(tcf_idr_cleanup);
-int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
- struct tc_action **a, const struct tc_action_ops *ops,
- int bind, bool cpustats)
+int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
+ struct tc_action **a, const struct tc_action_ops *ops,
+ int bind, bool cpustats)
{
struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
- struct tcf_hashinfo *hinfo = tn->hinfo;
+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
+ struct idr *idr = &idrinfo->action_idr;
int err = -ENOMEM;
+ unsigned long idr_index;
if (unlikely(!p))
return -ENOMEM;
@@ -304,8 +297,32 @@ err2:
}
}
spin_lock_init(&p->tcfa_lock);
- INIT_HLIST_NODE(&p->tcfa_head);
- p->tcfa_index = index ? index : tcf_hash_new_index(tn);
+ /* user doesn't specify an index */
+ if (!index) {
+ idr_preload(GFP_KERNEL);
+ spin_lock_bh(&idrinfo->lock);
+ err = idr_alloc_ext(idr, NULL, &idr_index, 1, 0,
+ GFP_ATOMIC);
+ spin_unlock_bh(&idrinfo->lock);
+ idr_preload_end();
+ if (err) {
+err3:
+ free_percpu(p->cpu_qstats);
+ goto err2;
+ }
+ p->tcfa_index = idr_index;
+ } else {
+ idr_preload(GFP_KERNEL);
+ spin_lock_bh(&idrinfo->lock);
+ err = idr_alloc_ext(idr, NULL, NULL, index, index + 1,
+ GFP_ATOMIC);
+ spin_unlock_bh(&idrinfo->lock);
+ idr_preload_end();
+ if (err)
+ goto err3;
+ p->tcfa_index = index;
+ }
+
p->tcfa_tm.install = jiffies;
p->tcfa_tm.lastuse = jiffies;
p->tcfa_tm.firstuse = 0;
@@ -314,52 +331,46 @@ err2:
&p->tcfa_rate_est,
&p->tcfa_lock, NULL, est);
if (err) {
- free_percpu(p->cpu_qstats);
- goto err2;
+ goto err3;
}
}
- p->hinfo = hinfo;
+ p->idrinfo = idrinfo;
p->ops = ops;
INIT_LIST_HEAD(&p->list);
*a = p;
return 0;
}
-EXPORT_SYMBOL(tcf_hash_create);
+EXPORT_SYMBOL(tcf_idr_create);
-void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a)
+void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
{
- struct tcf_hashinfo *hinfo = tn->hinfo;
- unsigned int h = tcf_hash(a->tcfa_index, hinfo->hmask);
+ struct tcf_idrinfo *idrinfo = tn->idrinfo;
- spin_lock_bh(&hinfo->lock);
- hlist_add_head(&a->tcfa_head, &hinfo->htab[h]);
- spin_unlock_bh(&hinfo->lock);
+ spin_lock_bh(&idrinfo->lock);
+ idr_replace_ext(&idrinfo->action_idr, a, a->tcfa_index);
+ spin_unlock_bh(&idrinfo->lock);
}
-EXPORT_SYMBOL(tcf_hash_insert);
+EXPORT_SYMBOL(tcf_idr_insert);
-void tcf_hashinfo_destroy(const struct tc_action_ops *ops,
- struct tcf_hashinfo *hinfo)
+void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
+ struct tcf_idrinfo *idrinfo)
{
- int i;
-
- for (i = 0; i < hinfo->hmask + 1; i++) {
- struct tc_action *p;
- struct hlist_node *n;
-
- hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfa_head) {
- int ret;
+ struct idr *idr = &idrinfo->action_idr;
+ struct tc_action *p;
+ int ret;
+ unsigned long id = 1;
- ret = __tcf_hash_release(p, false, true);
- if (ret == ACT_P_DELETED)
- module_put(ops->owner);
- else if (ret < 0)
- return;
- }
+ idr_for_each_entry_ext(idr, p, id) {
+ ret = __tcf_idr_release(p, false, true);
+ if (ret == ACT_P_DELETED)
+ module_put(ops->owner);
+ else if (ret < 0)
+ return;
}
- kfree(hinfo->htab);
+ idr_destroy(&idrinfo->action_idr);
}
-EXPORT_SYMBOL(tcf_hashinfo_destroy);
+EXPORT_SYMBOL(tcf_idrinfo_destroy);
static LIST_HEAD(act_base);
static DEFINE_RWLOCK(act_mod_lock);
@@ -460,9 +471,10 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
int nr_actions, struct tcf_result *res)
{
- int ret = -1, i;
u32 jmp_prgcnt = 0;
u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */
+ int i;
+ int ret = TC_ACT_OK;
if (skb_skip_tc_classify(skb))
return TC_ACT_OK;
@@ -506,13 +518,15 @@ EXPORT_SYMBOL(tcf_action_exec);
int tcf_action_destroy(struct list_head *actions, int bind)
{
+ const struct tc_action_ops *ops;
struct tc_action *a, *tmp;
int ret = 0;
list_for_each_entry_safe(a, tmp, actions, list) {
- ret = __tcf_hash_release(a, bind, true);
+ ops = a->ops;
+ ret = __tcf_idr_release(a, bind, true);
if (ret == ACT_P_DELETED)
- module_put(a->ops->owner);
+ module_put(ops->owner);
else if (ret < 0)
return ret;
}
@@ -1068,11 +1082,18 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
return tcf_add_notify(net, n, &actions, portid);
}
+static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
+static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
+ [TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD32,
+ .validation_data = &tcaa_root_flags_allowed },
+ [TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 },
+};
+
static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
- struct nlattr *tca[TCA_ACT_MAX + 1];
+ struct nlattr *tca[TCA_ROOT_MAX + 1];
u32 portid = skb ? NETLINK_CB(skb).portid : 0;
int ret = 0, ovr = 0;
@@ -1080,7 +1101,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
+ ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
extack);
if (ret < 0)
return ret;
@@ -1121,16 +1142,12 @@ replay:
return ret;
}
-static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
+static struct nlattr *find_dump_kind(struct nlattr **nla)
{
struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
- struct nlattr *nla[TCAA_MAX + 1];
struct nlattr *kind;
- if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
- NULL, NULL) < 0)
- return NULL;
tb1 = nla[TCA_ACT_TAB];
if (tb1 == NULL)
return NULL;
@@ -1157,8 +1174,20 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
struct tc_action_ops *a_o;
int ret = 0;
struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
- struct nlattr *kind = find_dump_kind(cb->nlh);
+ struct nlattr *tb[TCA_ROOT_MAX + 1];
+ struct nlattr *count_attr = NULL;
+ unsigned long jiffy_since = 0;
+ struct nlattr *kind = NULL;
+ struct nla_bitfield32 bf;
+ u32 msecs_since = 0;
+ u32 act_count = 0;
+
+ ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
+ tcaa_policy, NULL);
+ if (ret < 0)
+ return ret;
+ kind = find_dump_kind(tb);
if (kind == NULL) {
pr_info("tc_dump_action: action bad kind\n");
return 0;
@@ -1168,14 +1197,32 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
if (a_o == NULL)
return 0;
+ cb->args[2] = 0;
+ if (tb[TCA_ROOT_FLAGS]) {
+ bf = nla_get_bitfield32(tb[TCA_ROOT_FLAGS]);
+ cb->args[2] = bf.value;
+ }
+
+ if (tb[TCA_ROOT_TIME_DELTA]) {
+ msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]);
+ }
+
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
cb->nlh->nlmsg_type, sizeof(*t), 0);
if (!nlh)
goto out_module_put;
+
+ if (msecs_since)
+ jiffy_since = jiffies - msecs_to_jiffies(msecs_since);
+
t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
+ cb->args[3] = jiffy_since;
+ count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
+ if (!count_attr)
+ goto out_module_put;
nest = nla_nest_start(skb, TCA_ACT_TAB);
if (nest == NULL)
@@ -1188,6 +1235,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
if (ret > 0) {
nla_nest_end(skb, nest);
ret = skb->len;
+ act_count = cb->args[1];
+ memcpy(nla_data(count_attr), &act_count, sizeof(u32));
+ cb->args[1] = 0;
} else
nlmsg_trim(skb, b);
@@ -1203,12 +1253,231 @@ out_module_put:
return skb->len;
}
+struct tcf_action_net {
+ struct rhashtable egdev_ht;
+};
+
+static unsigned int tcf_action_net_id;
+
+struct tcf_action_egdev_cb {
+ struct list_head list;
+ tc_setup_cb_t *cb;
+ void *cb_priv;
+};
+
+struct tcf_action_egdev {
+ struct rhash_head ht_node;
+ const struct net_device *dev;
+ unsigned int refcnt;
+ struct list_head cb_list;
+};
+
+static const struct rhashtable_params tcf_action_egdev_ht_params = {
+ .key_offset = offsetof(struct tcf_action_egdev, dev),
+ .head_offset = offsetof(struct tcf_action_egdev, ht_node),
+ .key_len = sizeof(const struct net_device *),
+};
+
+static struct tcf_action_egdev *
+tcf_action_egdev_lookup(const struct net_device *dev)
+{
+ struct net *net = dev_net(dev);
+ struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+ return rhashtable_lookup_fast(&tan->egdev_ht, &dev,
+ tcf_action_egdev_ht_params);
+}
+
+static struct tcf_action_egdev *
+tcf_action_egdev_get(const struct net_device *dev)
+{
+ struct tcf_action_egdev *egdev;
+ struct tcf_action_net *tan;
+
+ egdev = tcf_action_egdev_lookup(dev);
+ if (egdev)
+ goto inc_ref;
+
+ egdev = kzalloc(sizeof(*egdev), GFP_KERNEL);
+ if (!egdev)
+ return NULL;
+ INIT_LIST_HEAD(&egdev->cb_list);
+ egdev->dev = dev;
+ tan = net_generic(dev_net(dev), tcf_action_net_id);
+ rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node,
+ tcf_action_egdev_ht_params);
+
+inc_ref:
+ egdev->refcnt++;
+ return egdev;
+}
+
+static void tcf_action_egdev_put(struct tcf_action_egdev *egdev)
+{
+ struct tcf_action_net *tan;
+
+ if (--egdev->refcnt)
+ return;
+ tan = net_generic(dev_net(egdev->dev), tcf_action_net_id);
+ rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node,
+ tcf_action_egdev_ht_params);
+ kfree(egdev);
+}
+
+static struct tcf_action_egdev_cb *
+tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev_cb *egdev_cb;
+
+ list_for_each_entry(egdev_cb, &egdev->cb_list, list)
+ if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv)
+ return egdev_cb;
+ return NULL;
+}
+
+static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev,
+ enum tc_setup_type type,
+ void *type_data, bool err_stop)
+{
+ struct tcf_action_egdev_cb *egdev_cb;
+ int ok_count = 0;
+ int err;
+
+ list_for_each_entry(egdev_cb, &egdev->cb_list, list) {
+ err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv);
+ if (err) {
+ if (err_stop)
+ return err;
+ } else {
+ ok_count++;
+ }
+ }
+ return ok_count;
+}
+
+static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev_cb *egdev_cb;
+
+ egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
+ if (WARN_ON(egdev_cb))
+ return -EEXIST;
+ egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL);
+ if (!egdev_cb)
+ return -ENOMEM;
+ egdev_cb->cb = cb;
+ egdev_cb->cb_priv = cb_priv;
+ list_add(&egdev_cb->list, &egdev->cb_list);
+ return 0;
+}
+
+static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev_cb *egdev_cb;
+
+ egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
+ if (WARN_ON(!egdev_cb))
+ return;
+ list_del(&egdev_cb->list);
+ kfree(egdev_cb);
+}
+
+static int __tc_setup_cb_egdev_register(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev);
+ int err;
+
+ if (!egdev)
+ return -ENOMEM;
+ err = tcf_action_egdev_cb_add(egdev, cb, cb_priv);
+ if (err)
+ goto err_cb_add;
+ return 0;
+
+err_cb_add:
+ tcf_action_egdev_put(egdev);
+ return err;
+}
+int tc_setup_cb_egdev_register(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tc_setup_cb_egdev_register(dev, cb, cb_priv);
+ rtnl_unlock();
+ return err;
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register);
+
+static void __tc_setup_cb_egdev_unregister(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
+
+ if (WARN_ON(!egdev))
+ return;
+ tcf_action_egdev_cb_del(egdev, cb, cb_priv);
+ tcf_action_egdev_put(egdev);
+}
+void tc_setup_cb_egdev_unregister(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ rtnl_lock();
+ __tc_setup_cb_egdev_unregister(dev, cb, cb_priv);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister);
+
+int tc_setup_cb_egdev_call(const struct net_device *dev,
+ enum tc_setup_type type, void *type_data,
+ bool err_stop)
+{
+ struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
+
+ if (!egdev)
+ return 0;
+ return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop);
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call);
+
+static __net_init int tcf_action_net_init(struct net *net)
+{
+ struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+ return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params);
+}
+
+static void __net_exit tcf_action_net_exit(struct net *net)
+{
+ struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+ rhashtable_destroy(&tan->egdev_ht);
+}
+
+static struct pernet_operations tcf_action_net_ops = {
+ .init = tcf_action_net_init,
+ .exit = tcf_action_net_exit,
+ .id = &tcf_action_net_id,
+ .size = sizeof(struct tcf_action_net),
+};
+
static int __init tc_action_init(void)
{
- rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, NULL);
+ int err;
+
+ err = register_pernet_subsys(&tcf_action_net_ops);
+ if (err)
+ return err;
+
+ rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
- NULL);
+ 0);
return 0;
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 9afe1337cfd1..5ef8ce8c83d4 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -21,7 +21,6 @@
#include <linux/tc_act/tc_bpf.h>
#include <net/tc_act/tc_bpf.h>
-#define BPF_TAB_MASK 15
#define ACT_BPF_NAME_LEN 256
struct tcf_bpf_cfg {
@@ -50,11 +49,11 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
filter = rcu_dereference(prog->filter);
if (at_ingress) {
__skb_push(skb, skb->mac_len);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
filter_res = BPF_PROG_RUN(filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
filter_res = BPF_PROG_RUN(filter, skb);
}
rcu_read_unlock();
@@ -295,9 +294,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
- if (!tcf_hash_check(tn, parm->index, act, bind)) {
- ret = tcf_hash_create(tn, parm->index, est, act,
- &act_bpf_ops, bind, true);
+ if (!tcf_idr_check(tn, parm->index, act, bind)) {
+ ret = tcf_idr_create(tn, parm->index, est, act,
+ &act_bpf_ops, bind, true);
if (ret < 0)
return ret;
@@ -307,7 +306,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (bind)
return 0;
- tcf_hash_release(*act, bind);
+ tcf_idr_release(*act, bind);
if (!replace)
return -EEXIST;
}
@@ -343,7 +342,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
rcu_assign_pointer(prog->filter, cfg.filter);
if (res == ACT_P_CREATED) {
- tcf_hash_insert(tn, *act);
+ tcf_idr_insert(tn, *act);
} else {
/* make sure the program being replaced is no longer executing */
synchronize_rcu();
@@ -353,7 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
return res;
out:
if (res == ACT_P_CREATED)
- tcf_hash_cleanup(*act, est);
+ tcf_idr_cleanup(*act, est);
return ret;
}
@@ -379,7 +378,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_bpf_ops __read_mostly = {
@@ -399,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
- return tc_action_net_init(tn, &act_bpf_ops, BPF_TAB_MASK);
+ return tc_action_net_init(tn, &act_bpf_ops);
}
static void __net_exit bpf_exit_net(struct net *net)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2155bc6c6a1e..10b7a8855a6c 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -28,8 +28,6 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
-#define CONNMARK_TAB_MASK 3
-
static unsigned int connmark_net_id;
static struct tc_action_ops act_connmark_ops;
@@ -119,9 +117,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
- if (!tcf_hash_check(tn, parm->index, a, bind)) {
- ret = tcf_hash_create(tn, parm->index, est, a,
- &act_connmark_ops, bind, false);
+ if (!tcf_idr_check(tn, parm->index, a, bind)) {
+ ret = tcf_idr_create(tn, parm->index, est, a,
+ &act_connmark_ops, bind, false);
if (ret)
return ret;
@@ -130,13 +128,13 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
ci->net = net;
ci->zone = parm->zone;
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
ret = ACT_P_CREATED;
} else {
ci = to_connmark(*a);
if (bind)
return 0;
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (!ovr)
return -EEXIST;
/* replacing action and zone */
@@ -189,7 +187,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_connmark_ops = {
@@ -208,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
- return tc_action_net_init(tn, &act_connmark_ops, CONNMARK_TAB_MASK);
+ return tc_action_net_init(tn, &act_connmark_ops);
}
static void __net_exit connmark_exit_net(struct net *net)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 3317a2f579da..1c40caadcff9 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -37,8 +37,6 @@
#include <linux/tc_act/tc_csum.h>
#include <net/tc_act/tc_csum.h>
-#define CSUM_TAB_MASK 15
-
static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
};
@@ -67,16 +65,16 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
return -EINVAL;
parm = nla_data(tb[TCA_CSUM_PARMS]);
- if (!tcf_hash_check(tn, parm->index, a, bind)) {
- ret = tcf_hash_create(tn, parm->index, est, a,
- &act_csum_ops, bind, false);
+ if (!tcf_idr_check(tn, parm->index, a, bind)) {
+ ret = tcf_idr_create(tn, parm->index, est, a,
+ &act_csum_ops, bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
if (bind)/* dont override defaults */
return 0;
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (!ovr)
return -EEXIST;
}
@@ -88,7 +86,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
return ret;
}
@@ -231,9 +229,6 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl,
const struct iphdr *iph;
u16 ul;
- if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
- return 1;
-
/*
* Support both UDP and UDPLITE checksum algorithms, Don't use
* udph->len to get the real length without any protocol check,
@@ -287,9 +282,6 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl,
const struct ipv6hdr *ip6h;
u16 ul;
- if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
- return 1;
-
/*
* Support both UDP and UDPLITE checksum algorithms, Don't use
* udph->len to get the real length without any protocol check,
@@ -615,7 +607,7 @@ static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_csum_ops = {
@@ -634,7 +626,7 @@ static __net_init int csum_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
- return tc_action_net_init(tn, &act_csum_ops, CSUM_TAB_MASK);
+ return tc_action_net_init(tn, &act_csum_ops);
}
static void __net_exit csum_exit_net(struct net *net)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 99afe8b1f1fb..e29a48ef7fc3 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -23,8 +23,6 @@
#include <linux/tc_act/tc_gact.h>
#include <net/tc_act/tc_gact.h>
-#define GACT_TAB_MASK 15
-
static unsigned int gact_net_id;
static struct tc_action_ops act_gact_ops;
@@ -92,16 +90,16 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
}
#endif
- if (!tcf_hash_check(tn, parm->index, a, bind)) {
- ret = tcf_hash_create(tn, parm->index, est, a,
- &act_gact_ops, bind, true);
+ if (!tcf_idr_check(tn, parm->index, a, bind)) {
+ ret = tcf_idr_create(tn, parm->index, est, a,
+ &act_gact_ops, bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
if (bind)/* dont override defaults */
return 0;
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (!ovr)
return -EEXIST;
}
@@ -122,7 +120,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
}
#endif
if (ret == ACT_P_CREATED)
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
return ret;
}
@@ -214,7 +212,7 @@ static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_gact_ops = {
@@ -234,7 +232,7 @@ static __net_init int gact_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
- return tc_action_net_init(tn, &act_gact_ops, GACT_TAB_MASK);
+ return tc_action_net_init(tn, &act_gact_ops);
}
static void __net_exit gact_exit_net(struct net *net)
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index c5dec308b8b1..3007cb1310ea 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -34,8 +34,6 @@
#include <linux/etherdevice.h>
#include <net/ife.h>
-#define IFE_TAB_MASK 15
-
static unsigned int ife_net_id;
static int max_metacnt = IFE_META_MAX + 1;
static struct tc_action_ops act_ife_ops;
@@ -250,6 +248,22 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len)
return ret;
}
+#ifdef CONFIG_MODULES
+static const char *ife_meta_id2name(u32 metaid)
+{
+ switch (metaid) {
+ case IFE_META_SKBMARK:
+ return "skbmark";
+ case IFE_META_PRIO:
+ return "skbprio";
+ case IFE_META_TCINDEX:
+ return "tcindex";
+ default:
+ return "unknown";
+ }
+}
+#endif
+
/* called when adding new meta information
* under ife->tcf_lock for existing action
*/
@@ -265,7 +279,7 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
if (exists)
spin_unlock_bh(&ife->tcf_lock);
rtnl_unlock();
- request_module("ifemeta%u", metaid);
+ request_module("ife-meta-%s", ife_meta_id2name(metaid));
rtnl_lock();
if (exists)
spin_lock_bh(&ife->tcf_lock);
@@ -394,10 +408,14 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind)
static void tcf_ife_cleanup(struct tc_action *a, int bind)
{
struct tcf_ife_info *ife = to_ife(a);
+ struct tcf_ife_params *p;
spin_lock_bh(&ife->tcf_lock);
_tcf_ife_cleanup(a, bind);
spin_unlock_bh(&ife->tcf_lock);
+
+ p = rcu_dereference_protected(ife->params, 1);
+ kfree_rcu(p, rcu);
}
/* under ife->tcf_lock for existing action */
@@ -434,9 +452,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct tc_action_net *tn = net_generic(net, ife_net_id);
struct nlattr *tb[TCA_IFE_MAX + 1];
struct nlattr *tb2[IFE_META_MAX + 1];
+ struct tcf_ife_params *p, *p_old;
struct tcf_ife_info *ife;
+ u16 ife_type = ETH_P_IFE;
struct tc_ife *parm;
- u16 ife_type = 0;
u8 *daddr = NULL;
u8 *saddr = NULL;
bool exists = false;
@@ -452,63 +471,70 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_IFE_PARMS]);
- exists = tcf_hash_check(tn, parm->index, a, bind);
- if (exists && bind)
- return 0;
+ /* IFE_DECODE is 0 and indicates the opposite of IFE_ENCODE because
+ * they cannot run as the same time. Check on all other values which
+ * are not supported right now.
+ */
+ if (parm->flags & ~IFE_ENCODE)
+ return -EINVAL;
- if (parm->flags & IFE_ENCODE) {
- /* Until we get issued the ethertype, we cant have
- * a default..
- **/
- if (!tb[TCA_IFE_TYPE]) {
- if (exists)
- tcf_hash_release(*a, bind);
- pr_info("You MUST pass etherype for encoding\n");
- return -EINVAL;
- }
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ exists = tcf_idr_check(tn, parm->index, a, bind);
+ if (exists && bind) {
+ kfree(p);
+ return 0;
}
if (!exists) {
- ret = tcf_hash_create(tn, parm->index, est, a, &act_ife_ops,
- bind, false);
- if (ret)
+ ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
+ bind, true);
+ if (ret) {
+ kfree(p);
return ret;
+ }
ret = ACT_P_CREATED;
} else {
- tcf_hash_release(*a, bind);
- if (!ovr)
+ tcf_idr_release(*a, bind);
+ if (!ovr) {
+ kfree(p);
return -EEXIST;
+ }
}
ife = to_ife(*a);
- ife->flags = parm->flags;
+ p->flags = parm->flags;
if (parm->flags & IFE_ENCODE) {
- ife_type = nla_get_u16(tb[TCA_IFE_TYPE]);
+ if (tb[TCA_IFE_TYPE])
+ ife_type = nla_get_u16(tb[TCA_IFE_TYPE]);
if (tb[TCA_IFE_DMAC])
daddr = nla_data(tb[TCA_IFE_DMAC]);
if (tb[TCA_IFE_SMAC])
saddr = nla_data(tb[TCA_IFE_SMAC]);
}
- if (exists)
- spin_lock_bh(&ife->tcf_lock);
ife->tcf_action = parm->action;
if (parm->flags & IFE_ENCODE) {
if (daddr)
- ether_addr_copy(ife->eth_dst, daddr);
+ ether_addr_copy(p->eth_dst, daddr);
else
- eth_zero_addr(ife->eth_dst);
+ eth_zero_addr(p->eth_dst);
if (saddr)
- ether_addr_copy(ife->eth_src, saddr);
+ ether_addr_copy(p->eth_src, saddr);
else
- eth_zero_addr(ife->eth_src);
+ eth_zero_addr(p->eth_src);
- ife->eth_type = ife_type;
+ p->eth_type = ife_type;
}
+ if (exists)
+ spin_lock_bh(&ife->tcf_lock);
+
if (ret == ACT_P_CREATED)
INIT_LIST_HEAD(&ife->metalist);
@@ -518,12 +544,13 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
if (err) {
metadata_parse_err:
if (exists)
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (ret == ACT_P_CREATED)
_tcf_ife_cleanup(*a, bind);
if (exists)
spin_unlock_bh(&ife->tcf_lock);
+ kfree(p);
return err;
}
@@ -544,6 +571,7 @@ metadata_parse_err:
if (exists)
spin_unlock_bh(&ife->tcf_lock);
+ kfree(p);
return err;
}
}
@@ -551,8 +579,13 @@ metadata_parse_err:
if (exists)
spin_unlock_bh(&ife->tcf_lock);
+ p_old = rtnl_dereference(ife->params);
+ rcu_assign_pointer(ife->params, p);
+ if (p_old)
+ kfree_rcu(p_old, rcu);
+
if (ret == ACT_P_CREATED)
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
return ret;
}
@@ -562,12 +595,13 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_ife_info *ife = to_ife(a);
+ struct tcf_ife_params *p = rtnl_dereference(ife->params);
struct tc_ife opt = {
.index = ife->tcf_index,
.refcnt = ife->tcf_refcnt - ref,
.bindcnt = ife->tcf_bindcnt - bind,
.action = ife->tcf_action,
- .flags = ife->flags,
+ .flags = p->flags,
};
struct tcf_t t;
@@ -578,17 +612,17 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD))
goto nla_put_failure;
- if (!is_zero_ether_addr(ife->eth_dst)) {
- if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, ife->eth_dst))
+ if (!is_zero_ether_addr(p->eth_dst)) {
+ if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, p->eth_dst))
goto nla_put_failure;
}
- if (!is_zero_ether_addr(ife->eth_src)) {
- if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, ife->eth_src))
+ if (!is_zero_ether_addr(p->eth_src)) {
+ if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, p->eth_src))
goto nla_put_failure;
}
- if (nla_put(skb, TCA_IFE_TYPE, 2, &ife->eth_type))
+ if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type))
goto nla_put_failure;
if (dump_metalist(skb, ife)) {
@@ -630,19 +664,15 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
u8 *tlv_data;
u16 metalen;
- spin_lock(&ife->tcf_lock);
- bstats_update(&ife->tcf_bstats, skb);
+ bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
tcf_lastuse_update(&ife->tcf_tm);
- spin_unlock(&ife->tcf_lock);
if (skb_at_tc_ingress(skb))
skb_push(skb, skb->dev->hard_header_len);
tlv_data = ife_decode(skb, &metalen);
if (unlikely(!tlv_data)) {
- spin_lock(&ife->tcf_lock);
- ife->tcf_qstats.drops++;
- spin_unlock(&ife->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
return TC_ACT_SHOT;
}
@@ -660,14 +690,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
*/
pr_info_ratelimited("Unknown metaid %d dlen %d\n",
mtype, dlen);
- ife->tcf_qstats.overlimits++;
+ qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
}
}
if (WARN_ON(tlv_data != ifehdr_end)) {
- spin_lock(&ife->tcf_lock);
- ife->tcf_qstats.drops++;
- spin_unlock(&ife->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
return TC_ACT_SHOT;
}
@@ -696,7 +724,7 @@ static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife)
}
static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+ struct tcf_result *res, struct tcf_ife_params *p)
{
struct tcf_ife_info *ife = to_ife(a);
int action = ife->tcf_action;
@@ -719,23 +747,20 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
exceed_mtu = true;
}
- spin_lock(&ife->tcf_lock);
- bstats_update(&ife->tcf_bstats, skb);
+ bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
tcf_lastuse_update(&ife->tcf_tm);
if (!metalen) { /* no metadata to send */
/* abuse overlimits to count when we allow packet
* with no metadata
*/
- ife->tcf_qstats.overlimits++;
- spin_unlock(&ife->tcf_lock);
+ qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
return action;
}
/* could be stupid policy setup or mtu config
* so lets be conservative.. */
if ((action == TC_ACT_SHOT) || exceed_mtu) {
- ife->tcf_qstats.drops++;
- spin_unlock(&ife->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
return TC_ACT_SHOT;
}
@@ -744,6 +769,8 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
ife_meta = ife_encode(skb, metalen);
+ spin_lock(&ife->tcf_lock);
+
/* XXX: we dont have a clever way of telling encode to
* not repeat some of the computations that are done by
* ops->presence_check...
@@ -755,25 +782,24 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
}
if (err < 0) {
/* too corrupt to keep around if overwritten */
- ife->tcf_qstats.drops++;
spin_unlock(&ife->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
return TC_ACT_SHOT;
}
skboff += err;
}
+ spin_unlock(&ife->tcf_lock);
oethh = (struct ethhdr *)skb->data;
- if (!is_zero_ether_addr(ife->eth_src))
- ether_addr_copy(oethh->h_source, ife->eth_src);
- if (!is_zero_ether_addr(ife->eth_dst))
- ether_addr_copy(oethh->h_dest, ife->eth_dst);
- oethh->h_proto = htons(ife->eth_type);
+ if (!is_zero_ether_addr(p->eth_src))
+ ether_addr_copy(oethh->h_source, p->eth_src);
+ if (!is_zero_ether_addr(p->eth_dst))
+ ether_addr_copy(oethh->h_dest, p->eth_dst);
+ oethh->h_proto = htons(p->eth_type);
if (skb_at_tc_ingress(skb))
skb_pull(skb, skb->dev->hard_header_len);
- spin_unlock(&ife->tcf_lock);
-
return action;
}
@@ -781,21 +807,19 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_ife_info *ife = to_ife(a);
+ struct tcf_ife_params *p;
+ int ret;
+
+ rcu_read_lock();
+ p = rcu_dereference(ife->params);
+ if (p->flags & IFE_ENCODE) {
+ ret = tcf_ife_encode(skb, a, res, p);
+ rcu_read_unlock();
+ return ret;
+ }
+ rcu_read_unlock();
- if (ife->flags & IFE_ENCODE)
- return tcf_ife_encode(skb, a, res);
-
- if (!(ife->flags & IFE_ENCODE))
- return tcf_ife_decode(skb, a, res);
-
- pr_info_ratelimited("unknown failure(policy neither de/encode\n");
- spin_lock(&ife->tcf_lock);
- bstats_update(&ife->tcf_bstats, skb);
- tcf_lastuse_update(&ife->tcf_tm);
- ife->tcf_qstats.drops++;
- spin_unlock(&ife->tcf_lock);
-
- return TC_ACT_SHOT;
+ return tcf_ife_decode(skb, a, res);
}
static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
@@ -811,7 +835,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_ife_ops = {
@@ -831,7 +855,7 @@ static __net_init int ife_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
- return tc_action_net_init(tn, &act_ife_ops, IFE_TAB_MASK);
+ return tc_action_net_init(tn, &act_ife_ops);
}
static void __net_exit ife_exit_net(struct net *net)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d516ba8178b8..d9e399a7e3d5 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -28,8 +28,6 @@
#include <linux/netfilter_ipv4/ip_tables.h>
-#define IPT_TAB_MASK 15
-
static unsigned int ipt_net_id;
static struct tc_action_ops act_ipt_ops;
@@ -41,6 +39,7 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t,
{
struct xt_tgchk_param par;
struct xt_target *target;
+ struct ipt_entry e = {};
int ret = 0;
target = xt_request_find_target(AF_INET, t->u.user.name,
@@ -52,6 +51,7 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t,
memset(&par, 0, sizeof(par));
par.net = net;
par.table = table;
+ par.entryinfo = &e;
par.target = target;
par.targinfo = t->data;
par.hook_mask = hook;
@@ -116,33 +116,33 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
if (tb[TCA_IPT_INDEX] != NULL)
index = nla_get_u32(tb[TCA_IPT_INDEX]);
- exists = tcf_hash_check(tn, index, a, bind);
+ exists = tcf_idr_check(tn, index, a, bind);
if (exists && bind)
return 0;
if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
if (exists)
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -EINVAL;
}
td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) {
if (exists)
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -EINVAL;
}
if (!exists) {
- ret = tcf_hash_create(tn, index, est, a, ops, bind,
- false);
+ ret = tcf_idr_create(tn, index, est, a, ops, bind,
+ false);
if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
if (bind)/* dont override defaults */
return 0;
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (!ovr)
return -EEXIST;
@@ -178,7 +178,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
ipt->tcfi_hook = hook;
spin_unlock_bh(&ipt->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
return ret;
err3:
@@ -187,7 +187,7 @@ err2:
kfree(tname);
err1:
if (ret == ACT_P_CREATED)
- tcf_hash_cleanup(*a, est);
+ tcf_idr_cleanup(*a, est);
return err;
}
@@ -314,7 +314,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, ipt_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_ipt_ops = {
@@ -334,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, ipt_net_id);
- return tc_action_net_init(tn, &act_ipt_ops, IPT_TAB_MASK);
+ return tc_action_net_init(tn, &act_ipt_ops);
}
static void __net_exit ipt_exit_net(struct net *net)
@@ -364,7 +364,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, xt_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_xt_ops = {
@@ -384,7 +384,7 @@ static __net_init int xt_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, xt_net_id);
- return tc_action_net_init(tn, &act_xt_ops, IPT_TAB_MASK);
+ return tc_action_net_init(tn, &act_xt_ops);
}
static void __net_exit xt_exit_net(struct net *net)
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
index 82892170ce4f..1e3f10e5da99 100644
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c
@@ -76,4 +76,4 @@ module_exit(ifemark_cleanup_module);
MODULE_AUTHOR("Jamal Hadi Salim(2015)");
MODULE_DESCRIPTION("Inter-FE skb mark metadata module");
MODULE_LICENSE("GPL");
-MODULE_ALIAS_IFE_META(IFE_META_SKBMARK);
+MODULE_ALIAS_IFE_META("skbmark");
diff --git a/net/sched/act_meta_skbprio.c b/net/sched/act_meta_skbprio.c
index 26bf4d86030b..4033f9fc4d4a 100644
--- a/net/sched/act_meta_skbprio.c
+++ b/net/sched/act_meta_skbprio.c
@@ -73,4 +73,4 @@ module_exit(ifeprio_cleanup_module);
MODULE_AUTHOR("Jamal Hadi Salim(2015)");
MODULE_DESCRIPTION("Inter-FE skb prio metadata action");
MODULE_LICENSE("GPL");
-MODULE_ALIAS_IFE_META(IFE_META_PRIO);
+MODULE_ALIAS_IFE_META("skbprio");
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
index 3b35774ce890..2ea1f26c9e96 100644
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c
@@ -76,4 +76,4 @@ module_exit(ifetc_index_cleanup_module);
MODULE_AUTHOR("Jamal Hadi Salim(2016)");
MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module");
MODULE_LICENSE("GPL");
-MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX);
+MODULE_ALIAS_IFE_META("tcindex");
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1b5549ababd4..8b3e59388480 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -28,7 +28,6 @@
#include <linux/tc_act/tc_mirred.h>
#include <net/tc_act/tc_mirred.h>
-#define MIRRED_TAB_MASK 7
static LIST_HEAD(mirred_list);
static DEFINE_SPINLOCK(mirred_list_lock);
@@ -94,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
return -EINVAL;
parm = nla_data(tb[TCA_MIRRED_PARMS]);
- exists = tcf_hash_check(tn, parm->index, a, bind);
+ exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
@@ -106,14 +105,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
break;
default:
if (exists)
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -EINVAL;
}
if (parm->ifindex) {
dev = __dev_get_by_index(net, parm->ifindex);
if (dev == NULL) {
if (exists)
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -ENODEV;
}
mac_header_xmit = dev_is_mac_header_xmit(dev);
@@ -124,13 +123,13 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
if (!exists) {
if (dev == NULL)
return -EINVAL;
- ret = tcf_hash_create(tn, parm->index, est, a,
- &act_mirred_ops, bind, true);
+ ret = tcf_idr_create(tn, parm->index, est, a,
+ &act_mirred_ops, bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (!ovr)
return -EEXIST;
}
@@ -141,6 +140,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
m->tcfm_eaction = parm->eaction;
if (dev != NULL) {
m->tcfm_ifindex = parm->ifindex;
+ m->net = net;
if (ret != ACT_P_CREATED)
dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
dev_hold(dev);
@@ -152,7 +152,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
spin_lock_bh(&mirred_list_lock);
list_add(&m->tcfm_list, &mirred_list);
spin_unlock_bh(&mirred_list_lock);
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
}
return ret;
@@ -283,7 +283,7 @@ static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static int mirred_device_event(struct notifier_block *unused,
@@ -314,15 +314,11 @@ static struct notifier_block mirred_device_notifier = {
.notifier_call = mirred_device_event,
};
-static int tcf_mirred_device(const struct tc_action *a, struct net *net,
- struct net_device **mirred_dev)
+static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
{
- int ifindex = tcf_mirred_ifindex(a);
+ struct tcf_mirred *m = to_mirred(a);
- *mirred_dev = __dev_get_by_index(net, ifindex);
- if (!*mirred_dev)
- return -EINVAL;
- return 0;
+ return __dev_get_by_index(m->net, m->tcfm_ifindex);
}
static struct tc_action_ops act_mirred_ops = {
@@ -337,14 +333,14 @@ static struct tc_action_ops act_mirred_ops = {
.walk = tcf_mirred_walker,
.lookup = tcf_mirred_search,
.size = sizeof(struct tcf_mirred),
- .get_dev = tcf_mirred_device,
+ .get_dev = tcf_mirred_get_dev,
};
static __net_init int mirred_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
- return tc_action_net_init(tn, &act_mirred_ops, MIRRED_TAB_MASK);
+ return tc_action_net_init(tn, &act_mirred_ops);
}
static void __net_exit mirred_exit_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 9016ab8a0649..c365d01b99c8 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -29,8 +29,6 @@
#include <net/udp.h>
-#define NAT_TAB_MASK 15
-
static unsigned int nat_net_id;
static struct tc_action_ops act_nat_ops;
@@ -58,16 +56,16 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
return -EINVAL;
parm = nla_data(tb[TCA_NAT_PARMS]);
- if (!tcf_hash_check(tn, parm->index, a, bind)) {
- ret = tcf_hash_create(tn, parm->index, est, a,
- &act_nat_ops, bind, false);
+ if (!tcf_idr_check(tn, parm->index, a, bind)) {
+ ret = tcf_idr_create(tn, parm->index, est, a,
+ &act_nat_ops, bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
if (bind)
return 0;
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (!ovr)
return -EEXIST;
}
@@ -83,7 +81,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
return ret;
}
@@ -290,7 +288,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_nat_ops = {
@@ -309,7 +307,7 @@ static __net_init int nat_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
- return tc_action_net_init(tn, &act_nat_ops, NAT_TAB_MASK);
+ return tc_action_net_init(tn, &act_nat_ops);
}
static void __net_exit nat_exit_net(struct net *net)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 7dc5892671c8..491fe5deb09e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -24,8 +24,6 @@
#include <net/tc_act/tc_pedit.h>
#include <uapi/linux/tc_act/tc_pedit.h>
-#define PEDIT_TAB_MASK 15
-
static unsigned int pedit_net_id;
static struct tc_action_ops act_pedit_ops;
@@ -168,17 +166,17 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (IS_ERR(keys_ex))
return PTR_ERR(keys_ex);
- if (!tcf_hash_check(tn, parm->index, a, bind)) {
+ if (!tcf_idr_check(tn, parm->index, a, bind)) {
if (!parm->nkeys)
return -EINVAL;
- ret = tcf_hash_create(tn, parm->index, est, a,
- &act_pedit_ops, bind, false);
+ ret = tcf_idr_create(tn, parm->index, est, a,
+ &act_pedit_ops, bind, false);
if (ret)
return ret;
p = to_pedit(*a);
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
- tcf_hash_cleanup(*a, est);
+ tcf_idr_cleanup(*a, est);
kfree(keys_ex);
return -ENOMEM;
}
@@ -186,7 +184,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
} else {
if (bind)
return 0;
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (!ovr)
return -EEXIST;
p = to_pedit(*a);
@@ -214,7 +212,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
return ret;
}
@@ -432,7 +430,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_pedit_ops = {
@@ -452,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
- return tc_action_net_init(tn, &act_pedit_ops, PEDIT_TAB_MASK);
+ return tc_action_net_init(tn, &act_pedit_ops);
}
static void __net_exit pedit_exit_net(struct net *net)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index b062bc80c7cb..3bb2ebf9e9ae 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -40,8 +40,6 @@ struct tcf_police {
#define to_police(pc) ((struct tcf_police *)pc)
-#define POL_TAB_MASK 15
-
/* old policer structure from before tc actions */
struct tc_police_compat {
u32 index;
@@ -101,18 +99,18 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
return -EINVAL;
parm = nla_data(tb[TCA_POLICE_TBF]);
- exists = tcf_hash_check(tn, parm->index, a, bind);
+ exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
if (!exists) {
- ret = tcf_hash_create(tn, parm->index, NULL, a,
- &act_police_ops, bind, false);
+ ret = tcf_idr_create(tn, parm->index, NULL, a,
+ &act_police_ops, bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (!ovr)
return -EEXIST;
}
@@ -188,7 +186,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
return ret;
police->tcfp_t_c = ktime_get_ns();
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
return ret;
@@ -196,7 +194,7 @@ failure:
qdisc_put_rtab(P_tab);
qdisc_put_rtab(R_tab);
if (ret == ACT_P_CREATED)
- tcf_hash_cleanup(*a, est);
+ tcf_idr_cleanup(*a, est);
return err;
}
@@ -310,7 +308,7 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, police_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
MODULE_AUTHOR("Alexey Kuznetsov");
@@ -333,7 +331,7 @@ static __net_init int police_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, police_net_id);
- return tc_action_net_init(tn, &act_police_ops, POL_TAB_MASK);
+ return tc_action_net_init(tn, &act_police_ops);
}
static void __net_exit police_exit_net(struct net *net)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 59d6645a4007..8b5abcd2f32f 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -25,7 +25,6 @@
#include <linux/if_arp.h>
-#define SAMPLE_TAB_MASK 7
static unsigned int sample_net_id;
static struct tc_action_ops act_sample_ops;
@@ -59,18 +58,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_SAMPLE_PARMS]);
- exists = tcf_hash_check(tn, parm->index, a, bind);
+ exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
if (!exists) {
- ret = tcf_hash_create(tn, parm->index, est, a,
- &act_sample_ops, bind, false);
+ ret = tcf_idr_create(tn, parm->index, est, a,
+ &act_sample_ops, bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (!ovr)
return -EEXIST;
}
@@ -82,7 +81,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
psample_group = psample_group_get(net, s->psample_group_num);
if (!psample_group) {
if (ret == ACT_P_CREATED)
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -ENOMEM;
}
RCU_INIT_POINTER(s->psample_group, psample_group);
@@ -93,7 +92,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
}
if (ret == ACT_P_CREATED)
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
return ret;
}
@@ -221,7 +220,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_sample_ops = {
@@ -241,7 +240,7 @@ static __net_init int sample_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
- return tc_action_net_init(tn, &act_sample_ops, SAMPLE_TAB_MASK);
+ return tc_action_net_init(tn, &act_sample_ops);
}
static void __net_exit sample_exit_net(struct net *net)
@@ -265,12 +264,13 @@ static int __init sample_init_module(void)
static void __exit sample_cleanup_module(void)
{
+ rcu_barrier();
tcf_unregister_action(&act_sample_ops, &sample_net_ops);
}
module_init(sample_init_module);
module_exit(sample_cleanup_module);
-MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>");
+MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
MODULE_DESCRIPTION("Packet sampling action");
MODULE_LICENSE("GPL v2");
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 43605e7ce051..e7b57e5071a3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -24,8 +24,6 @@
#include <linux/tc_act/tc_defact.h>
#include <net/tc_act/tc_defact.h>
-#define SIMP_TAB_MASK 7
-
static unsigned int simp_net_id;
static struct tc_action_ops act_simp_ops;
@@ -102,28 +100,28 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
return -EINVAL;
parm = nla_data(tb[TCA_DEF_PARMS]);
- exists = tcf_hash_check(tn, parm->index, a, bind);
+ exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
if (tb[TCA_DEF_DATA] == NULL) {
if (exists)
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -EINVAL;
}
defdata = nla_data(tb[TCA_DEF_DATA]);
if (!exists) {
- ret = tcf_hash_create(tn, parm->index, est, a,
- &act_simp_ops, bind, false);
+ ret = tcf_idr_create(tn, parm->index, est, a,
+ &act_simp_ops, bind, false);
if (ret)
return ret;
d = to_defact(*a);
ret = alloc_defdata(d, defdata);
if (ret < 0) {
- tcf_hash_cleanup(*a, est);
+ tcf_idr_cleanup(*a, est);
return ret;
}
d->tcf_action = parm->action;
@@ -131,7 +129,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
} else {
d = to_defact(*a);
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (!ovr)
return -EEXIST;
@@ -139,7 +137,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
}
if (ret == ACT_P_CREATED)
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
return ret;
}
@@ -183,7 +181,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_simp_ops = {
@@ -203,7 +201,7 @@ static __net_init int simp_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
- return tc_action_net_init(tn, &act_simp_ops, SIMP_TAB_MASK);
+ return tc_action_net_init(tn, &act_simp_ops);
}
static void __net_exit simp_exit_net(struct net *net)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6b3e65d7de0c..59949d61f20d 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -27,8 +27,6 @@
#include <linux/tc_act/tc_skbedit.h>
#include <net/tc_act/tc_skbedit.h>
-#define SKBEDIT_TAB_MASK 15
-
static unsigned int skbedit_net_id;
static struct tc_action_ops act_skbedit_ops;
@@ -118,18 +116,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
- exists = tcf_hash_check(tn, parm->index, a, bind);
+ exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
if (!flags) {
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -EINVAL;
}
if (!exists) {
- ret = tcf_hash_create(tn, parm->index, est, a,
- &act_skbedit_ops, bind, false);
+ ret = tcf_idr_create(tn, parm->index, est, a,
+ &act_skbedit_ops, bind, false);
if (ret)
return ret;
@@ -137,7 +135,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
ret = ACT_P_CREATED;
} else {
d = to_skbedit(*a);
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (!ovr)
return -EEXIST;
}
@@ -163,7 +161,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
spin_unlock_bh(&d->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
return ret;
}
@@ -221,7 +219,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_skbedit_ops = {
@@ -240,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
- return tc_action_net_init(tn, &act_skbedit_ops, SKBEDIT_TAB_MASK);
+ return tc_action_net_init(tn, &act_skbedit_ops);
}
static void __net_exit skbedit_exit_net(struct net *net)
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index a73c4bbcada2..b642ad3d39dd 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -20,8 +20,6 @@
#include <linux/tc_act/tc_skbmod.h>
#include <net/tc_act/tc_skbmod.h>
-#define SKBMOD_TAB_MASK 15
-
static unsigned int skbmod_net_id;
static struct tc_action_ops act_skbmod_ops;
@@ -129,7 +127,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
if (parm->flags & SKBMOD_F_SWAPMAC)
lflags = SKBMOD_F_SWAPMAC;
- exists = tcf_hash_check(tn, parm->index, a, bind);
+ exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
@@ -137,14 +135,14 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
return -EINVAL;
if (!exists) {
- ret = tcf_hash_create(tn, parm->index, est, a,
- &act_skbmod_ops, bind, true);
+ ret = tcf_idr_create(tn, parm->index, est, a,
+ &act_skbmod_ops, bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (!ovr)
return -EEXIST;
}
@@ -155,7 +153,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
if (unlikely(!p)) {
if (ovr)
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -ENOMEM;
}
@@ -182,7 +180,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
kfree_rcu(p_old, rcu);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
return ret;
}
@@ -245,7 +243,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_skbmod_ops = {
@@ -265,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
- return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK);
+ return tc_action_net_init(tn, &act_skbmod_ops);
}
static void __net_exit skbmod_exit_net(struct net *net)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index fd7e75679c69..30c96274c638 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -20,8 +20,6 @@
#include <linux/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_tunnel_key.h>
-#define TUNNEL_KEY_TAB_MASK 15
-
static unsigned int tunnel_key_net_id;
static struct tc_action_ops act_tunnel_key_ops;
@@ -100,7 +98,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
return -EINVAL;
parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
- exists = tcf_hash_check(tn, parm->index, a, bind);
+ exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
@@ -159,14 +157,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
}
if (!exists) {
- ret = tcf_hash_create(tn, parm->index, est, a,
- &act_tunnel_key_ops, bind, true);
+ ret = tcf_idr_create(tn, parm->index, est, a,
+ &act_tunnel_key_ops, bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (!ovr)
return -EEXIST;
}
@@ -177,7 +175,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
if (unlikely(!params_new)) {
if (ret == ACT_P_CREATED)
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -ENOMEM;
}
@@ -193,13 +191,13 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
kfree_rcu(params_old, rcu);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
return ret;
err_out:
if (exists)
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
return ret;
}
@@ -304,7 +302,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_tunnel_key_ops = {
@@ -324,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
- return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK);
+ return tc_action_net_init(tn, &act_tunnel_key_ops);
}
static void __net_exit tunnel_key_exit_net(struct net *net)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 13ba3a89f675..97f717a13ad5 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -19,8 +19,6 @@
#include <linux/tc_act/tc_vlan.h>
#include <net/tc_act/tc_vlan.h>
-#define VLAN_TAB_MASK 15
-
static unsigned int vlan_net_id;
static struct tc_action_ops act_vlan_ops;
@@ -28,14 +26,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_vlan *v = to_vlan(a);
+ struct tcf_vlan_params *p;
int action;
int err;
u16 tci;
- spin_lock(&v->tcf_lock);
tcf_lastuse_update(&v->tcf_tm);
- bstats_update(&v->tcf_bstats, skb);
- action = v->tcf_action;
+ bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb);
/* Ensure 'data' points at mac_header prior calling vlan manipulating
* functions.
@@ -43,15 +40,21 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
if (skb_at_tc_ingress(skb))
skb_push_rcsum(skb, skb->mac_len);
- switch (v->tcfv_action) {
+ rcu_read_lock();
+
+ action = READ_ONCE(v->tcf_action);
+
+ p = rcu_dereference(v->vlan_p);
+
+ switch (p->tcfv_action) {
case TCA_VLAN_ACT_POP:
err = skb_vlan_pop(skb);
if (err)
goto drop;
break;
case TCA_VLAN_ACT_PUSH:
- err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid |
- (v->tcfv_push_prio << VLAN_PRIO_SHIFT));
+ err = skb_vlan_push(skb, p->tcfv_push_proto, p->tcfv_push_vid |
+ (p->tcfv_push_prio << VLAN_PRIO_SHIFT));
if (err)
goto drop;
break;
@@ -70,14 +73,14 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
goto drop;
}
/* replace the vid */
- tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid;
+ tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid;
/* replace prio bits, if tcfv_push_prio specified */
- if (v->tcfv_push_prio) {
+ if (p->tcfv_push_prio) {
tci &= ~VLAN_PRIO_MASK;
- tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT;
+ tci |= p->tcfv_push_prio << VLAN_PRIO_SHIFT;
}
/* put updated tci as hwaccel tag */
- __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci);
+ __vlan_hwaccel_put_tag(skb, p->tcfv_push_proto, tci);
break;
default:
BUG();
@@ -87,12 +90,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
drop:
action = TC_ACT_SHOT;
- v->tcf_qstats.drops++;
+ qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
+
unlock:
+ rcu_read_unlock();
if (skb_at_tc_ingress(skb))
skb_pull_rcsum(skb, skb->mac_len);
- spin_unlock(&v->tcf_lock);
return action;
}
@@ -109,6 +113,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
struct nlattr *tb[TCA_VLAN_MAX + 1];
+ struct tcf_vlan_params *p, *p_old;
struct tc_vlan *parm;
struct tcf_vlan *v;
int action;
@@ -128,7 +133,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
if (!tb[TCA_VLAN_PARMS])
return -EINVAL;
parm = nla_data(tb[TCA_VLAN_PARMS]);
- exists = tcf_hash_check(tn, parm->index, a, bind);
+ exists = tcf_idr_check(tn, parm->index, a, bind);
if (exists && bind)
return 0;
@@ -139,13 +144,13 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
case TCA_VLAN_ACT_MODIFY:
if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
if (exists)
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -EINVAL;
}
push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]);
if (push_vid >= VLAN_VID_MASK) {
if (exists)
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -ERANGE;
}
@@ -167,66 +172,87 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
break;
default:
if (exists)
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
return -EINVAL;
}
action = parm->v_action;
if (!exists) {
- ret = tcf_hash_create(tn, parm->index, est, a,
- &act_vlan_ops, bind, false);
+ ret = tcf_idr_create(tn, parm->index, est, a,
+ &act_vlan_ops, bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
- tcf_hash_release(*a, bind);
+ tcf_idr_release(*a, bind);
if (!ovr)
return -EEXIST;
}
v = to_vlan(*a);
- spin_lock_bh(&v->tcf_lock);
-
- v->tcfv_action = action;
- v->tcfv_push_vid = push_vid;
- v->tcfv_push_prio = push_prio;
- v->tcfv_push_proto = push_proto;
+ ASSERT_RTNL();
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ if (ovr)
+ tcf_idr_release(*a, bind);
+ return -ENOMEM;
+ }
v->tcf_action = parm->action;
- spin_unlock_bh(&v->tcf_lock);
+ p_old = rtnl_dereference(v->vlan_p);
+
+ p->tcfv_action = action;
+ p->tcfv_push_vid = push_vid;
+ p->tcfv_push_prio = push_prio;
+ p->tcfv_push_proto = push_proto;
+
+ rcu_assign_pointer(v->vlan_p, p);
+
+ if (p_old)
+ kfree_rcu(p_old, rcu);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(tn, *a);
+ tcf_idr_insert(tn, *a);
return ret;
}
+static void tcf_vlan_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_vlan *v = to_vlan(a);
+ struct tcf_vlan_params *p;
+
+ p = rcu_dereference_protected(v->vlan_p, 1);
+ kfree_rcu(p, rcu);
+}
+
static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_vlan *v = to_vlan(a);
+ struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p);
struct tc_vlan opt = {
.index = v->tcf_index,
.refcnt = v->tcf_refcnt - ref,
.bindcnt = v->tcf_bindcnt - bind,
.action = v->tcf_action,
- .v_action = v->tcfv_action,
+ .v_action = p->tcfv_action,
};
struct tcf_t t;
if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- if ((v->tcfv_action == TCA_VLAN_ACT_PUSH ||
- v->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
- (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
+ if ((p->tcfv_action == TCA_VLAN_ACT_PUSH ||
+ p->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
+ (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, p->tcfv_push_vid) ||
nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
- v->tcfv_push_proto) ||
+ p->tcfv_push_proto) ||
(nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY,
- v->tcfv_push_prio))))
+ p->tcfv_push_prio))))
goto nla_put_failure;
tcf_tm_dump(&t, &v->tcf_tm);
@@ -252,7 +278,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
- return tcf_hash_search(tn, a, index);
+ return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_vlan_ops = {
@@ -262,6 +288,7 @@ static struct tc_action_ops act_vlan_ops = {
.act = tcf_vlan,
.dump = tcf_vlan_dump,
.init = tcf_vlan_init,
+ .cleanup = tcf_vlan_cleanup,
.walk = tcf_vlan_walker,
.lookup = tcf_vlan_search,
.size = sizeof(struct tcf_vlan),
@@ -271,7 +298,7 @@ static __net_init int vlan_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
- return tc_action_net_init(tn, &act_vlan_ops, VLAN_TAB_MASK);
+ return tc_action_net_init(tn, &act_vlan_ops);
}
static void __net_exit vlan_exit_net(struct net *net)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 39da0c5801c9..ab255b421781 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -77,6 +77,8 @@ out:
}
EXPORT_SYMBOL(register_tcf_proto_ops);
+static struct workqueue_struct *tc_filter_wq;
+
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
struct tcf_proto_ops *t;
@@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
* tcf_proto_ops's destroy() handler.
*/
rcu_barrier();
+ flush_workqueue(tc_filter_wq);
write_lock(&cls_mod_lock);
list_for_each_entry(t, &tcf_proto_base, head) {
@@ -100,20 +103,11 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
}
EXPORT_SYMBOL(unregister_tcf_proto_ops);
-static int tfilter_notify(struct net *net, struct sk_buff *oskb,
- struct nlmsghdr *n, struct tcf_proto *tp,
- unsigned long fh, int event, bool unicast);
-
-static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
- struct nlmsghdr *n,
- struct tcf_chain *chain, int event)
+bool tcf_queue_work(struct work_struct *work)
{
- struct tcf_proto *tp;
-
- for (tp = rtnl_dereference(chain->filter_chain);
- tp; tp = rtnl_dereference(tp->next))
- tfilter_notify(net, oskb, n, tp, 0, event, false);
+ return queue_work(tc_filter_wq, work);
}
+EXPORT_SYMBOL(tcf_queue_work);
/* Select new prio value from the range, managed by kernel. */
@@ -201,14 +195,22 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
return chain;
}
+static void tcf_chain_head_change(struct tcf_chain *chain,
+ struct tcf_proto *tp_head)
+{
+ if (chain->chain_head_change)
+ chain->chain_head_change(tp_head,
+ chain->chain_head_change_priv);
+}
+
static void tcf_chain_flush(struct tcf_chain *chain)
{
struct tcf_proto *tp;
- if (*chain->p_filter_chain)
- RCU_INIT_POINTER(*chain->p_filter_chain, NULL);
+ tcf_chain_head_change(chain, NULL);
while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
RCU_INIT_POINTER(chain->filter_chain, tp->next);
+ tcf_chain_put(chain);
tcf_proto_destroy(tp);
}
}
@@ -216,10 +218,14 @@ static void tcf_chain_flush(struct tcf_chain *chain)
static void tcf_chain_destroy(struct tcf_chain *chain)
{
list_del(&chain->list);
- tcf_chain_flush(chain);
kfree(chain);
}
+static void tcf_chain_hold(struct tcf_chain *chain)
+{
+ ++chain->refcnt;
+}
+
struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
bool create)
{
@@ -227,36 +233,51 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
list_for_each_entry(chain, &block->chain_list, list) {
if (chain->index == chain_index) {
- chain->refcnt++;
+ tcf_chain_hold(chain);
return chain;
}
}
- if (create)
- return tcf_chain_create(block, chain_index);
- else
- return NULL;
+
+ return create ? tcf_chain_create(block, chain_index) : NULL;
}
EXPORT_SYMBOL(tcf_chain_get);
void tcf_chain_put(struct tcf_chain *chain)
{
- /* Destroy unused chain, with exception of chain 0, which is the
- * default one and has to be always present.
- */
- if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0)
+ if (--chain->refcnt == 0)
tcf_chain_destroy(chain);
}
EXPORT_SYMBOL(tcf_chain_put);
-static void
-tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain,
- struct tcf_proto __rcu **p_filter_chain)
+static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei,
+ enum tc_block_command command)
{
- chain->p_filter_chain = p_filter_chain;
+ struct net_device *dev = q->dev_queue->dev;
+ struct tc_block_offload bo = {};
+
+ if (!dev->netdev_ops->ndo_setup_tc)
+ return;
+ bo.command = command;
+ bo.binder_type = ei->binder_type;
+ bo.block = block;
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
}
-int tcf_block_get(struct tcf_block **p_block,
- struct tcf_proto __rcu **p_filter_chain)
+static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
+{
+ tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
+}
+
+static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
+{
+ tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
+}
+
+int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
{
struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
struct tcf_chain *chain;
@@ -265,13 +286,20 @@ int tcf_block_get(struct tcf_block **p_block,
if (!block)
return -ENOMEM;
INIT_LIST_HEAD(&block->chain_list);
+ INIT_LIST_HEAD(&block->cb_list);
+
/* Create chain 0 by default, it has to be always present. */
chain = tcf_chain_create(block, 0);
if (!chain) {
err = -ENOMEM;
goto err_chain_create;
}
- tcf_chain_filter_chain_ptr_set(chain, p_filter_chain);
+ WARN_ON(!ei->chain_head_change);
+ chain->chain_head_change = ei->chain_head_change;
+ chain->chain_head_change_priv = ei->chain_head_change_priv;
+ block->net = qdisc_net(q);
+ block->q = q;
+ tcf_block_offload_bind(block, q, ei);
*p_block = block;
return 0;
@@ -279,21 +307,179 @@ err_chain_create:
kfree(block);
return err;
}
+EXPORT_SYMBOL(tcf_block_get_ext);
+
+static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
+{
+ struct tcf_proto __rcu **p_filter_chain = priv;
+
+ rcu_assign_pointer(*p_filter_chain, tp_head);
+}
+
+int tcf_block_get(struct tcf_block **p_block,
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q)
+{
+ struct tcf_block_ext_info ei = {
+ .chain_head_change = tcf_chain_head_change_dflt,
+ .chain_head_change_priv = p_filter_chain,
+ };
+
+ WARN_ON(!p_filter_chain);
+ return tcf_block_get_ext(p_block, q, &ei);
+}
EXPORT_SYMBOL(tcf_block_get);
-void tcf_block_put(struct tcf_block *block)
+static void tcf_block_put_final(struct work_struct *work)
{
+ struct tcf_block *block = container_of(work, struct tcf_block, work);
struct tcf_chain *chain, *tmp;
- if (!block)
- return;
-
+ rtnl_lock();
+ /* Only chain 0 should be still here. */
list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
- tcf_chain_destroy(chain);
+ tcf_chain_put(chain);
+ rtnl_unlock();
kfree(block);
}
+
+/* XXX: Standalone actions are not allowed to jump to any chain, and bound
+ * actions should be all removed after flushing. However, filters are now
+ * destroyed in tc filter workqueue with RTNL lock, they can not race here.
+ */
+void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
+{
+ struct tcf_chain *chain, *tmp;
+
+ list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+ tcf_chain_flush(chain);
+
+ tcf_block_offload_unbind(block, q, ei);
+
+ INIT_WORK(&block->work, tcf_block_put_final);
+ /* Wait for existing RCU callbacks to cool down, make sure their works
+ * have been queued before this. We can not flush pending works here
+ * because we are holding the RTNL lock.
+ */
+ rcu_barrier();
+ tcf_queue_work(&block->work);
+}
+EXPORT_SYMBOL(tcf_block_put_ext);
+
+void tcf_block_put(struct tcf_block *block)
+{
+ struct tcf_block_ext_info ei = {0, };
+
+ if (!block)
+ return;
+ tcf_block_put_ext(block, block->q, &ei);
+}
+
EXPORT_SYMBOL(tcf_block_put);
+struct tcf_block_cb {
+ struct list_head list;
+ tc_setup_cb_t *cb;
+ void *cb_ident;
+ void *cb_priv;
+ unsigned int refcnt;
+};
+
+void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
+{
+ return block_cb->cb_priv;
+}
+EXPORT_SYMBOL(tcf_block_cb_priv);
+
+struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident)
+{ struct tcf_block_cb *block_cb;
+
+ list_for_each_entry(block_cb, &block->cb_list, list)
+ if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
+ return block_cb;
+ return NULL;
+}
+EXPORT_SYMBOL(tcf_block_cb_lookup);
+
+void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
+{
+ block_cb->refcnt++;
+}
+EXPORT_SYMBOL(tcf_block_cb_incref);
+
+unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
+{
+ return --block_cb->refcnt;
+}
+EXPORT_SYMBOL(tcf_block_cb_decref);
+
+struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident,
+ void *cb_priv)
+{
+ struct tcf_block_cb *block_cb;
+
+ block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
+ if (!block_cb)
+ return NULL;
+ block_cb->cb = cb;
+ block_cb->cb_ident = cb_ident;
+ block_cb->cb_priv = cb_priv;
+ list_add(&block_cb->list, &block->cb_list);
+ return block_cb;
+}
+EXPORT_SYMBOL(__tcf_block_cb_register);
+
+int tcf_block_cb_register(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident,
+ void *cb_priv)
+{
+ struct tcf_block_cb *block_cb;
+
+ block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
+ return block_cb ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(tcf_block_cb_register);
+
+void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
+{
+ list_del(&block_cb->list);
+ kfree(block_cb);
+}
+EXPORT_SYMBOL(__tcf_block_cb_unregister);
+
+void tcf_block_cb_unregister(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident)
+{
+ struct tcf_block_cb *block_cb;
+
+ block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
+ if (!block_cb)
+ return;
+ __tcf_block_cb_unregister(block_cb);
+}
+EXPORT_SYMBOL(tcf_block_cb_unregister);
+
+static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
+ void *type_data, bool err_stop)
+{
+ struct tcf_block_cb *block_cb;
+ int ok_count = 0;
+ int err;
+
+ list_for_each_entry(block_cb, &block->cb_list, list) {
+ err = block_cb->cb(type, type_data, block_cb->cb_priv);
+ if (err) {
+ if (err_stop)
+ return err;
+ } else {
+ ok_count++;
+ }
+ }
+ return ok_count;
+}
+
/* Main classifier routine: scans classifier chain attached
* to this qdisc, (optionally) tests for protocol and asks
* specific classifiers.
@@ -362,11 +548,11 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain,
struct tcf_chain_info *chain_info,
struct tcf_proto *tp)
{
- if (chain->p_filter_chain &&
- *chain_info->pprev == chain->filter_chain)
- rcu_assign_pointer(*chain->p_filter_chain, tp);
+ if (*chain_info->pprev == chain->filter_chain)
+ tcf_chain_head_change(chain, tp);
RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
rcu_assign_pointer(*chain_info->pprev, tp);
+ tcf_chain_hold(chain);
}
static void tcf_chain_tp_remove(struct tcf_chain *chain,
@@ -375,9 +561,10 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
{
struct tcf_proto *next = rtnl_dereference(chain_info->next);
- if (chain->p_filter_chain && tp == chain->filter_chain)
- RCU_INIT_POINTER(*chain->p_filter_chain, next);
+ if (tp == chain->filter_chain)
+ tcf_chain_head_change(chain, next);
RCU_INIT_POINTER(*chain_info->pprev, next);
+ tcf_chain_put(chain);
}
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
@@ -407,6 +594,112 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
return tp;
}
+static int tcf_fill_node(struct net *net, struct sk_buff *skb,
+ struct tcf_proto *tp, struct Qdisc *q, u32 parent,
+ void *fh, u32 portid, u32 seq, u16 flags, int event)
+{
+ struct tcmsg *tcm;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb_tail_pointer(skb);
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+ if (!nlh)
+ goto out_nlmsg_trim;
+ tcm = nlmsg_data(nlh);
+ tcm->tcm_family = AF_UNSPEC;
+ tcm->tcm__pad1 = 0;
+ tcm->tcm__pad2 = 0;
+ tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+ tcm->tcm_parent = parent;
+ tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
+ if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
+ goto nla_put_failure;
+ if (!fh) {
+ tcm->tcm_handle = 0;
+ } else {
+ if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
+ goto nla_put_failure;
+ }
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ return skb->len;
+
+out_nlmsg_trim:
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static int tfilter_notify(struct net *net, struct sk_buff *oskb,
+ struct nlmsghdr *n, struct tcf_proto *tp,
+ struct Qdisc *q, u32 parent,
+ void *fh, int event, bool unicast)
+{
+ struct sk_buff *skb;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
+ n->nlmsg_flags, event) <= 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ if (unicast)
+ return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+}
+
+static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
+ struct nlmsghdr *n, struct tcf_proto *tp,
+ struct Qdisc *q, u32 parent,
+ void *fh, bool unicast, bool *last)
+{
+ struct sk_buff *skb;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ int err;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
+ n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ err = tp->ops->delete(tp, fh, last);
+ if (err) {
+ kfree_skb(skb);
+ return err;
+ }
+
+ if (unicast)
+ return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+}
+
+static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
+ struct Qdisc *q, u32 parent,
+ struct nlmsghdr *n,
+ struct tcf_chain *chain, int event)
+{
+ struct tcf_proto *tp;
+
+ for (tp = rtnl_dereference(chain->filter_chain);
+ tp; tp = rtnl_dereference(tp->next))
+ tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false);
+}
+
/* Add/change/delete/get a filter node */
static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -428,7 +721,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
struct tcf_proto *tp;
const struct Qdisc_class_ops *cops;
unsigned long cl;
- unsigned long fh;
+ void *fh;
int err;
int tp_created;
@@ -498,7 +791,7 @@ replay:
/* Do we search for filter, attached to class? */
if (TC_H_MIN(parent)) {
- cl = cops->get(q, parent);
+ cl = cops->find(q, parent);
if (cl == 0)
return -ENOENT;
}
@@ -523,7 +816,8 @@ replay:
}
if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
- tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
+ tfilter_notify_chain(net, skb, q, parent, n,
+ chain, RTM_DELTFILTER);
tcf_chain_flush(chain);
err = 0;
goto errout;
@@ -567,10 +861,10 @@ replay:
fh = tp->ops->get(tp, t->tcm_handle);
- if (fh == 0) {
+ if (!fh) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
tcf_chain_tp_remove(chain, &chain_info, tp);
- tfilter_notify(net, skb, n, tp, fh,
+ tfilter_notify(net, skb, n, tp, q, parent, fh,
RTM_DELTFILTER, false);
tcf_proto_destroy(tp);
err = 0;
@@ -595,18 +889,17 @@ replay:
}
break;
case RTM_DELTFILTER:
- err = tp->ops->delete(tp, fh, &last);
+ err = tfilter_del_notify(net, skb, n, tp, q, parent,
+ fh, false, &last);
if (err)
goto errout;
- tfilter_notify(net, skb, n, tp, t->tcm_handle,
- RTM_DELTFILTER, false);
if (last) {
tcf_chain_tp_remove(chain, &chain_info, tp);
tcf_proto_destroy(tp);
}
goto errout;
case RTM_GETTFILTER:
- err = tfilter_notify(net, skb, n, tp, fh,
+ err = tfilter_notify(net, skb, n, tp, q, parent, fh,
RTM_NEWTFILTER, true);
goto errout;
default:
@@ -620,7 +913,8 @@ replay:
if (err == 0) {
if (tp_created)
tcf_chain_tp_insert(chain, &chain_info, tp);
- tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
+ tfilter_notify(net, skb, n, tp, q, parent, fh,
+ RTM_NEWTFILTER, false);
} else {
if (tp_created)
tcf_proto_destroy(tp);
@@ -629,94 +923,33 @@ replay:
errout:
if (chain)
tcf_chain_put(chain);
- if (cl)
- cops->put(q, cl);
if (err == -EAGAIN)
/* Replay the request. */
goto replay;
return err;
}
-static int tcf_fill_node(struct net *net, struct sk_buff *skb,
- struct tcf_proto *tp, unsigned long fh, u32 portid,
- u32 seq, u16 flags, int event)
-{
- struct tcmsg *tcm;
- struct nlmsghdr *nlh;
- unsigned char *b = skb_tail_pointer(skb);
-
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
- if (!nlh)
- goto out_nlmsg_trim;
- tcm = nlmsg_data(nlh);
- tcm->tcm_family = AF_UNSPEC;
- tcm->tcm__pad1 = 0;
- tcm->tcm__pad2 = 0;
- tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
- tcm->tcm_parent = tp->classid;
- tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
- if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
- goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
- goto nla_put_failure;
- tcm->tcm_handle = fh;
- if (RTM_DELTFILTER != event) {
- tcm->tcm_handle = 0;
- if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
- goto nla_put_failure;
- }
- nlh->nlmsg_len = skb_tail_pointer(skb) - b;
- return skb->len;
-
-out_nlmsg_trim:
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static int tfilter_notify(struct net *net, struct sk_buff *oskb,
- struct nlmsghdr *n, struct tcf_proto *tp,
- unsigned long fh, int event, bool unicast)
-{
- struct sk_buff *skb;
- u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
-
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb)
- return -ENOBUFS;
-
- if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
- n->nlmsg_flags, event) <= 0) {
- kfree_skb(skb);
- return -EINVAL;
- }
-
- if (unicast)
- return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
-
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
-}
-
struct tcf_dump_args {
struct tcf_walker w;
struct sk_buff *skb;
struct netlink_callback *cb;
+ struct Qdisc *q;
+ u32 parent;
};
-static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
- struct tcf_walker *arg)
+static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
{
struct tcf_dump_args *a = (void *)arg;
struct net *net = sock_net(a->skb->sk);
- return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
+ return tcf_fill_node(net, a->skb, tp, a->q, a->parent,
+ n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER);
}
-static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
- struct netlink_callback *cb,
+static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
+ struct sk_buff *skb, struct netlink_callback *cb,
long index_start, long *p_index)
{
struct net *net = sock_net(skb->sk);
@@ -738,7 +971,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
if (cb->args[1] == 0) {
- if (tcf_fill_node(net, skb, tp, 0,
+ if (tcf_fill_node(net, skb, tp, q, parent, 0,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER) <= 0)
@@ -751,6 +984,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
arg.w.fn = tcf_node_dump;
arg.skb = skb;
arg.cb = cb;
+ arg.q = q;
+ arg.parent = parent;
arg.w.stop = 0;
arg.w.skip = cb->args[1] - 1;
arg.w.count = 0;
@@ -776,6 +1011,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
const struct Qdisc_class_ops *cops;
long index_start;
long index;
+ u32 parent;
int err;
if (nlmsg_len(cb->nlh) < sizeof(*tcm))
@@ -789,25 +1025,28 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if (!dev)
return skb->len;
- if (!tcm->tcm_parent)
+ parent = tcm->tcm_parent;
+ if (!parent) {
q = dev->qdisc;
- else
+ parent = q->handle;
+ } else {
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+ }
if (!q)
goto out;
cops = q->ops->cl_ops;
if (!cops)
- goto errout;
+ goto out;
if (!cops->tcf_block)
- goto errout;
+ goto out;
if (TC_H_MIN(tcm->tcm_parent)) {
- cl = cops->get(q, tcm->tcm_parent);
+ cl = cops->find(q, tcm->tcm_parent);
if (cl == 0)
- goto errout;
+ goto out;
}
block = cops->tcf_block(q, cl);
if (!block)
- goto errout;
+ goto out;
index_start = cb->args[0];
index = 0;
@@ -816,15 +1055,13 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if (tca[TCA_CHAIN] &&
nla_get_u32(tca[TCA_CHAIN]) != chain->index)
continue;
- if (!tcf_chain_dump(chain, skb, cb, index_start, &index))
+ if (!tcf_chain_dump(chain, q, parent, skb, cb,
+ index_start, &index))
break;
}
cb->args[0] = index;
-errout:
- if (cl)
- cops->put(q, cl);
out:
return skb->len;
}
@@ -834,6 +1071,7 @@ void tcf_exts_destroy(struct tcf_exts *exts)
#ifdef CONFIG_NET_CLS_ACT
LIST_HEAD(actions);
+ ASSERT_RTNL();
tcf_exts_to_list(exts, &actions);
tcf_action_destroy(&actions, TCA_ACT_UNBIND);
kfree(exts->actions);
@@ -872,6 +1110,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
exts->actions[i++] = act;
exts->nr_actions = i;
}
+ exts->net = net;
}
#else
if ((exts->action && tb[exts->action]) ||
@@ -883,18 +1122,12 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
}
EXPORT_SYMBOL(tcf_exts_validate);
-void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
- struct tcf_exts *src)
+void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
{
#ifdef CONFIG_NET_CLS_ACT
struct tcf_exts old = *dst;
- tcf_tree_lock(tp);
- dst->nr_actions = src->nr_actions;
- dst->actions = src->actions;
- dst->type = src->type;
- tcf_tree_unlock(tp);
-
+ *dst = *src;
tcf_exts_destroy(&old);
#endif
}
@@ -915,7 +1148,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
#ifdef CONFIG_NET_CLS_ACT
struct nlattr *nest;
- if (exts->action && exts->nr_actions) {
+ if (exts->action && tcf_exts_has_actions(exts)) {
/*
* again for backward compatible mode - we want
* to work with both old and new modes of entering
@@ -965,36 +1198,67 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
}
EXPORT_SYMBOL(tcf_exts_dump_stats);
-int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
- struct net_device **hw_dev)
+static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
+ enum tc_setup_type type,
+ void *type_data, bool err_stop)
{
+ int ok_count = 0;
#ifdef CONFIG_NET_CLS_ACT
const struct tc_action *a;
- LIST_HEAD(actions);
+ struct net_device *dev;
+ int i, ret;
- if (tc_no_actions(exts))
- return -EINVAL;
+ if (!tcf_exts_has_actions(exts))
+ return 0;
- tcf_exts_to_list(exts, &actions);
- list_for_each_entry(a, &actions, list) {
- if (a->ops->get_dev) {
- a->ops->get_dev(a, dev_net(dev), hw_dev);
- break;
- }
+ for (i = 0; i < exts->nr_actions; i++) {
+ a = exts->actions[i];
+ if (!a->ops->get_dev)
+ continue;
+ dev = a->ops->get_dev(a);
+ if (!dev)
+ continue;
+ ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
+ if (ret < 0)
+ return ret;
+ ok_count += ret;
}
- if (*hw_dev)
- return 0;
#endif
- return -EOPNOTSUPP;
+ return ok_count;
+}
+
+int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
+ enum tc_setup_type type, void *type_data, bool err_stop)
+{
+ int ok_count;
+ int ret;
+
+ ret = tcf_block_cb_call(block, type, type_data, err_stop);
+ if (ret < 0)
+ return ret;
+ ok_count = ret;
+
+ if (!exts)
+ return ok_count;
+ ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
+ if (ret < 0)
+ return ret;
+ ok_count += ret;
+
+ return ok_count;
}
-EXPORT_SYMBOL(tcf_exts_get_dev);
+EXPORT_SYMBOL(tc_setup_cb_call);
static int __init tc_filter_init(void)
{
- rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL);
+ tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
+ if (!tc_filter_wq)
+ return -ENOMEM;
+
+ rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
- tc_dump_tfilter, NULL);
+ tc_dump_tfilter, 0);
return 0;
}
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index c4fd63a068f9..5f169ded347e 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -17,13 +17,14 @@
#include <linux/errno.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/idr.h>
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
struct basic_head {
- u32 hgenerator;
struct list_head flist;
+ struct idr handle_idr;
struct rcu_head rcu;
};
@@ -34,7 +35,10 @@ struct basic_filter {
struct tcf_result res;
struct tcf_proto *tp;
struct list_head link;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -56,20 +60,18 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
return -1;
}
-static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
+static void *basic_get(struct tcf_proto *tp, u32 handle)
{
- unsigned long l = 0UL;
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f;
list_for_each_entry(f, &head->flist, link) {
if (f->handle == handle) {
- l = (unsigned long) f;
- break;
+ return f;
}
}
- return l;
+ return NULL;
}
static int basic_init(struct tcf_proto *tp)
@@ -80,19 +82,36 @@ static int basic_init(struct tcf_proto *tp)
if (head == NULL)
return -ENOBUFS;
INIT_LIST_HEAD(&head->flist);
+ idr_init(&head->handle_idr);
rcu_assign_pointer(tp->root, head);
return 0;
}
-static void basic_delete_filter(struct rcu_head *head)
+static void __basic_delete_filter(struct basic_filter *f)
{
- struct basic_filter *f = container_of(head, struct basic_filter, rcu);
-
tcf_exts_destroy(&f->exts);
tcf_em_tree_destroy(&f->ematches);
+ tcf_exts_put_net(&f->exts);
kfree(f);
}
+static void basic_delete_filter_work(struct work_struct *work)
+{
+ struct basic_filter *f = container_of(work, struct basic_filter, work);
+
+ rtnl_lock();
+ __basic_delete_filter(f);
+ rtnl_unlock();
+}
+
+static void basic_delete_filter(struct rcu_head *head)
+{
+ struct basic_filter *f = container_of(head, struct basic_filter, rcu);
+
+ INIT_WORK(&f->work, basic_delete_filter_work);
+ tcf_queue_work(&f->work);
+}
+
static void basic_destroy(struct tcf_proto *tp)
{
struct basic_head *head = rtnl_dereference(tp->root);
@@ -101,18 +120,25 @@ static void basic_destroy(struct tcf_proto *tp)
list_for_each_entry_safe(f, n, &head->flist, link) {
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
- call_rcu(&f->rcu, basic_delete_filter);
+ idr_remove_ext(&head->handle_idr, f->handle);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, basic_delete_filter);
+ else
+ __basic_delete_filter(f);
}
+ idr_destroy(&head->handle_idr);
kfree_rcu(head, rcu);
}
-static int basic_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int basic_delete(struct tcf_proto *tp, void *arg, bool *last)
{
struct basic_head *head = rtnl_dereference(tp->root);
- struct basic_filter *f = (struct basic_filter *) arg;
+ struct basic_filter *f = arg;
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
+ idr_remove_ext(&head->handle_idr, f->handle);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, basic_delete_filter);
*last = list_empty(&head->flist);
return 0;
@@ -129,44 +155,34 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
struct nlattr *est, bool ovr)
{
int err;
- struct tcf_exts e;
- struct tcf_ematch_tree t;
- err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
- if (err < 0)
- goto errout;
- err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t);
+ err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &f->ematches);
if (err < 0)
- goto errout;
+ return err;
if (tb[TCA_BASIC_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_BASIC_CLASSID]);
tcf_bind_filter(tp, &f->res, base);
}
- tcf_exts_change(tp, &f->exts, &e);
- tcf_em_tree_change(tp, &f->ematches, &t);
f->tp = tp;
-
return 0;
-errout:
- tcf_exts_destroy(&e);
- return err;
}
static int basic_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, unsigned long *arg, bool ovr)
+ struct nlattr **tca, void **arg, bool ovr)
{
int err;
struct basic_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_BASIC_MAX + 1];
struct basic_filter *fold = (struct basic_filter *) *arg;
struct basic_filter *fnew;
+ unsigned long idr_index;
if (tca[TCA_OPTIONS] == NULL)
return -EINVAL;
@@ -189,35 +205,36 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = -EINVAL;
if (handle) {
fnew->handle = handle;
- } else if (fold) {
- fnew->handle = fold->handle;
+ if (!fold) {
+ err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
+ handle, handle + 1, GFP_KERNEL);
+ if (err)
+ goto errout;
+ }
} else {
- unsigned int i = 0x80000000;
- do {
- if (++head->hgenerator == 0x7FFFFFFF)
- head->hgenerator = 1;
- } while (--i > 0 && basic_get(tp, head->hgenerator));
-
- if (i <= 0) {
- pr_err("Insufficient number of handles\n");
+ err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
+ 1, 0x7FFFFFFF, GFP_KERNEL);
+ if (err)
goto errout;
- }
-
- fnew->handle = head->hgenerator;
+ fnew->handle = idr_index;
}
err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr);
- if (err < 0)
+ if (err < 0) {
+ if (!fold)
+ idr_remove_ext(&head->handle_idr, fnew->handle);
goto errout;
+ }
- *arg = (unsigned long)fnew;
+ *arg = fnew;
if (fold) {
+ idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
list_replace_rcu(&fold->link, &fnew->link);
tcf_unbind_filter(tp, &fold->res);
+ tcf_exts_get_net(&fold->exts);
call_rcu(&fold->rcu, basic_delete_filter);
} else {
list_add_rcu(&fnew->link, &head->flist);
@@ -239,7 +256,7 @@ static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
if (arg->count < arg->skip)
goto skip;
- if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+ if (arg->fn(tp, f, arg) < 0) {
arg->stop = 1;
break;
}
@@ -248,10 +265,18 @@ skip:
}
}
-static int basic_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+ struct basic_filter *f = fh;
+
+ if (f && f->res.classid == classid)
+ f->res.class = cl;
+}
+
+static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct basic_filter *f = (struct basic_filter *) fh;
+ struct basic_filter *f = fh;
struct nlattr *nest;
if (f == NULL)
@@ -293,6 +318,7 @@ static struct tcf_proto_ops cls_basic_ops __read_mostly = {
.delete = basic_delete,
.walk = basic_walk,
.dump = basic_dump,
+ .bind_class = basic_bind_class,
.owner = THIS_MODULE,
};
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index f57bd531ba98..fb680dafac5a 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -17,6 +17,7 @@
#include <linux/skbuff.h>
#include <linux/filter.h>
#include <linux/bpf.h>
+#include <linux/idr.h>
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
@@ -32,7 +33,7 @@ MODULE_DESCRIPTION("TC BPF based classifier");
struct cls_bpf_head {
struct list_head plist;
- u32 hgen;
+ struct idr handle_idr;
struct rcu_head rcu;
};
@@ -49,7 +50,10 @@ struct cls_bpf_prog {
struct sock_filter *bpf_ops;
const char *bpf_name;
struct tcf_proto *tp;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
@@ -99,11 +103,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
} else if (at_ingress) {
/* It is safe to push/pull even if skb_shared() */
__skb_push(skb, skb->mac_len);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
filter_res = BPF_PROG_RUN(prog->filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
filter_res = BPF_PROG_RUN(prog->filter, skb);
}
@@ -146,35 +150,39 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
enum tc_clsbpf_command cmd)
{
- struct net_device *dev = tp->q->dev_queue->dev;
- struct tc_cls_bpf_offload bpf_offload = {};
- struct tc_to_netdev offload;
+ bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
+ struct tcf_block *block = tp->chain->block;
+ bool skip_sw = tc_skip_sw(prog->gen_flags);
+ struct tc_cls_bpf_offload cls_bpf = {};
int err;
- offload.type = TC_SETUP_CLSBPF;
- offload.cls_bpf = &bpf_offload;
-
- bpf_offload.command = cmd;
- bpf_offload.exts = &prog->exts;
- bpf_offload.prog = prog->filter;
- bpf_offload.name = prog->bpf_name;
- bpf_offload.exts_integrated = prog->exts_integrated;
- bpf_offload.gen_flags = prog->gen_flags;
-
- err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
- tp->chain->index,
- tp->protocol, &offload);
+ tc_cls_common_offload_init(&cls_bpf.common, tp);
+ cls_bpf.command = cmd;
+ cls_bpf.exts = &prog->exts;
+ cls_bpf.prog = prog->filter;
+ cls_bpf.name = prog->bpf_name;
+ cls_bpf.exts_integrated = prog->exts_integrated;
+ cls_bpf.gen_flags = prog->gen_flags;
+
+ err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
+ if (addorrep) {
+ if (err < 0) {
+ cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+ return err;
+ } else if (err > 0) {
+ prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
+ }
+ }
- if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE))
- prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
+ if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
+ return -EINVAL;
- return err;
+ return 0;
}
static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
struct cls_bpf_prog *oldprog)
{
- struct net_device *dev = tp->q->dev_queue->dev;
struct cls_bpf_prog *obj = prog;
enum tc_clsbpf_command cmd;
bool skip_sw;
@@ -184,7 +192,7 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
(oldprog && tc_skip_sw(oldprog->gen_flags));
if (oldprog && oldprog->offloaded) {
- if (tc_should_offload(dev, tp, prog->gen_flags)) {
+ if (!tc_skip_hw(prog->gen_flags)) {
cmd = TC_CLSBPF_REPLACE;
} else if (!tc_skip_sw(prog->gen_flags)) {
obj = oldprog;
@@ -193,14 +201,14 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
return -EINVAL;
}
} else {
- if (!tc_should_offload(dev, tp, prog->gen_flags))
+ if (tc_skip_hw(prog->gen_flags))
return skip_sw ? -EINVAL : 0;
cmd = TC_CLSBPF_ADD;
}
ret = cls_bpf_offload_cmd(tp, obj, cmd);
if (ret)
- return skip_sw ? ret : 0;
+ return ret;
obj->offloaded = true;
if (oldprog)
@@ -244,6 +252,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
return -ENOBUFS;
INIT_LIST_HEAD_RCU(&head->plist);
+ idr_init(&head->handle_idr);
rcu_assign_pointer(tp->root, head);
return 0;
@@ -252,6 +261,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
{
tcf_exts_destroy(&prog->exts);
+ tcf_exts_put_net(&prog->exts);
if (cls_bpf_is_ebpf(prog))
bpf_prog_put(prog->filter);
@@ -263,24 +273,42 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
kfree(prog);
}
+static void cls_bpf_delete_prog_work(struct work_struct *work)
+{
+ struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, work);
+
+ rtnl_lock();
+ __cls_bpf_delete_prog(prog);
+ rtnl_unlock();
+}
+
static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
{
- __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu));
+ struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
+
+ INIT_WORK(&prog->work, cls_bpf_delete_prog_work);
+ tcf_queue_work(&prog->work);
}
static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
{
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
+
+ idr_remove_ext(&head->handle_idr, prog->handle);
cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
- call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
+ if (tcf_exts_get_net(&prog->exts))
+ call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
+ else
+ __cls_bpf_delete_prog(prog);
}
-static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
- __cls_bpf_delete(tp, (struct cls_bpf_prog *) arg);
+ __cls_bpf_delete(tp, arg);
*last = list_empty(&head->plist);
return 0;
}
@@ -293,23 +321,21 @@ static void cls_bpf_destroy(struct tcf_proto *tp)
list_for_each_entry_safe(prog, tmp, &head->plist, link)
__cls_bpf_delete(tp, prog);
+ idr_destroy(&head->handle_idr);
kfree_rcu(head, rcu);
}
-static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
+static void *cls_bpf_get(struct tcf_proto *tp, u32 handle)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog;
- unsigned long ret = 0UL;
list_for_each_entry(prog, &head->plist, link) {
- if (prog->handle == handle) {
- ret = (unsigned long) prog;
- break;
- }
+ if (prog->handle == handle)
+ return prog;
}
- return ret;
+ return NULL;
}
static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
@@ -352,7 +378,7 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
}
static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
- const struct tcf_proto *tp)
+ u32 gen_flags, const struct tcf_proto *tp)
{
struct bpf_prog *fp;
char *name = NULL;
@@ -360,7 +386,11 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
- fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS);
+ if (gen_flags & TCA_CLS_FLAGS_SKIP_SW)
+ fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS,
+ qdisc_dev(tp->q));
+ else
+ fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS);
if (IS_ERR(fp))
return PTR_ERR(fp);
@@ -382,13 +412,11 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
return 0;
}
-static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
- struct cls_bpf_prog *prog,
- unsigned long base, struct nlattr **tb,
- struct nlattr *est, bool ovr)
+static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
+ struct cls_bpf_prog *prog, unsigned long base,
+ struct nlattr **tb, struct nlattr *est, bool ovr)
{
bool is_bpf, is_ebpf, have_exts = false;
- struct tcf_exts exts;
u32 gen_flags = 0;
int ret;
@@ -397,83 +425,51 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
return -EINVAL;
- ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+ ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr);
if (ret < 0)
return ret;
- ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
- if (ret < 0)
- goto errout;
if (tb[TCA_BPF_FLAGS]) {
u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
- if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
- ret = -EINVAL;
- goto errout;
- }
+ if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT)
+ return -EINVAL;
have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
}
if (tb[TCA_BPF_FLAGS_GEN]) {
gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
- !tc_flags_valid(gen_flags)) {
- ret = -EINVAL;
- goto errout;
- }
+ !tc_flags_valid(gen_flags))
+ return -EINVAL;
}
prog->exts_integrated = have_exts;
prog->gen_flags = gen_flags;
ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
- cls_bpf_prog_from_efd(tb, prog, tp);
+ cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
if (ret < 0)
- goto errout;
+ return ret;
if (tb[TCA_BPF_CLASSID]) {
prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
tcf_bind_filter(tp, &prog->res, base);
}
- tcf_exts_change(tp, &prog->exts, &exts);
return 0;
-
-errout:
- tcf_exts_destroy(&exts);
- return ret;
-}
-
-static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
- struct cls_bpf_head *head)
-{
- unsigned int i = 0x80000000;
- u32 handle;
-
- do {
- if (++head->hgen == 0x7FFFFFFF)
- head->hgen = 1;
- } while (--i > 0 && cls_bpf_get(tp, head->hgen));
-
- if (unlikely(i == 0)) {
- pr_err("Insufficient number of handles\n");
- handle = 0;
- } else {
- handle = head->hgen;
- }
-
- return handle;
}
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- unsigned long *arg, bool ovr)
+ void **arg, bool ovr)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
- struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg;
+ struct cls_bpf_prog *oldprog = *arg;
struct nlattr *tb[TCA_BPF_MAX + 1];
struct cls_bpf_prog *prog;
+ unsigned long idr_index;
int ret;
if (tca[TCA_OPTIONS] == NULL)
@@ -499,22 +495,30 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
}
}
- if (handle == 0)
- prog->handle = cls_bpf_grab_new_handle(tp, head);
- else
+ if (handle == 0) {
+ ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index,
+ 1, 0x7FFFFFFF, GFP_KERNEL);
+ if (ret)
+ goto errout;
+ prog->handle = idr_index;
+ } else {
+ if (!oldprog) {
+ ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index,
+ handle, handle + 1, GFP_KERNEL);
+ if (ret)
+ goto errout;
+ }
prog->handle = handle;
- if (prog->handle == 0) {
- ret = -EINVAL;
- goto errout;
}
- ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE],
- ovr);
+ ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
if (ret < 0)
- goto errout;
+ goto errout_idr;
ret = cls_bpf_offload(tp, prog, oldprog);
if (ret) {
+ if (!oldprog)
+ idr_remove_ext(&head->handle_idr, prog->handle);
__cls_bpf_delete_prog(prog);
return ret;
}
@@ -523,16 +527,21 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
if (oldprog) {
+ idr_replace_ext(&head->handle_idr, prog, handle);
list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
+ tcf_exts_get_net(&oldprog->exts);
call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
} else {
list_add_rcu(&prog->link, &head->plist);
}
- *arg = (unsigned long) prog;
+ *arg = prog;
return 0;
+errout_idr:
+ if (!oldprog)
+ idr_remove_ext(&head->handle_idr, prog->handle);
errout:
tcf_exts_destroy(&prog->exts);
kfree(prog);
@@ -578,10 +587,10 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
return 0;
}
-static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *tm)
{
- struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
+ struct cls_bpf_prog *prog = fh;
struct nlattr *nest;
u32 bpf_flags = 0;
int ret;
@@ -631,6 +640,14 @@ nla_put_failure:
return -1;
}
+static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+ struct cls_bpf_prog *prog = fh;
+
+ if (prog && prog->res.classid == classid)
+ prog->res.class = cl;
+}
+
static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -639,7 +656,7 @@ static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
list_for_each_entry(prog, &head->plist, link) {
if (arg->count < arg->skip)
goto skip;
- if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
+ if (arg->fn(tp, prog, arg) < 0) {
arg->stop = 1;
break;
}
@@ -659,6 +676,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
.delete = cls_bpf_delete,
.walk = cls_bpf_walk,
.dump = cls_bpf_dump,
+ .bind_class = cls_bpf_bind_class,
};
static int __init cls_bpf_init_mod(void)
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 12ce547eea04..309d5899265f 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -23,7 +23,10 @@ struct cls_cgroup_head {
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
struct tcf_proto *tp;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -43,9 +46,9 @@ static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
return tcf_exts_exec(skb, &head->exts, res);
}
-static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
+static void *cls_cgroup_get(struct tcf_proto *tp, u32 handle)
{
- return 0UL;
+ return NULL;
}
static int cls_cgroup_init(struct tcf_proto *tp)
@@ -57,27 +60,42 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
[TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
};
+static void __cls_cgroup_destroy(struct cls_cgroup_head *head)
+{
+ tcf_exts_destroy(&head->exts);
+ tcf_em_tree_destroy(&head->ematches);
+ tcf_exts_put_net(&head->exts);
+ kfree(head);
+}
+
+static void cls_cgroup_destroy_work(struct work_struct *work)
+{
+ struct cls_cgroup_head *head = container_of(work,
+ struct cls_cgroup_head,
+ work);
+ rtnl_lock();
+ __cls_cgroup_destroy(head);
+ rtnl_unlock();
+}
+
static void cls_cgroup_destroy_rcu(struct rcu_head *root)
{
struct cls_cgroup_head *head = container_of(root,
struct cls_cgroup_head,
rcu);
- tcf_exts_destroy(&head->exts);
- tcf_em_tree_destroy(&head->ematches);
- kfree(head);
+ INIT_WORK(&head->work, cls_cgroup_destroy_work);
+ tcf_queue_work(&head->work);
}
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- unsigned long *arg, bool ovr)
+ void **arg, bool ovr)
{
struct nlattr *tb[TCA_CGROUP_MAX + 1];
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
struct cls_cgroup_head *new;
- struct tcf_ematch_tree t;
- struct tcf_exts e;
int err;
if (!tca[TCA_OPTIONS])
@@ -103,27 +121,19 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr);
if (err < 0)
goto errout;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
- if (err < 0) {
- tcf_exts_destroy(&e);
- goto errout;
- }
- err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
- if (err < 0) {
- tcf_exts_destroy(&e);
+ err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &new->ematches);
+ if (err < 0)
goto errout;
- }
-
- tcf_exts_change(tp, &new->exts, &e);
- tcf_em_tree_change(tp, &new->ematches, &t);
rcu_assign_pointer(tp->root, new);
- if (head)
+ if (head) {
+ tcf_exts_get_net(&head->exts);
call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+ }
return 0;
errout:
tcf_exts_destroy(&new->exts);
@@ -136,11 +146,15 @@ static void cls_cgroup_destroy(struct tcf_proto *tp)
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
/* Head can still be NULL due to cls_cgroup_init(). */
- if (head)
- call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+ if (head) {
+ if (tcf_exts_get_net(&head->exts))
+ call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+ else
+ __cls_cgroup_destroy(head);
+ }
}
-static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last)
{
return -EOPNOTSUPP;
}
@@ -152,7 +166,7 @@ static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
if (arg->count < arg->skip)
goto skip;
- if (arg->fn(tp, (unsigned long) head, arg) < 0) {
+ if (arg->fn(tp, head, arg) < 0) {
arg->stop = 1;
return;
}
@@ -160,7 +174,7 @@ skip:
arg->count++;
}
-static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 3065752b9cda..25c2a888e1f0 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -57,7 +57,10 @@ struct flow_filter {
u32 divisor;
u32 baseclass;
u32 hashrnd;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static inline u32 addr_fold(void *addr)
@@ -345,9 +348,9 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
return -1;
}
-static void flow_perturbation(unsigned long arg)
+static void flow_perturbation(struct timer_list *t)
{
- struct flow_filter *f = (struct flow_filter *)arg;
+ struct flow_filter *f = from_timer(f, t, perturb_timer);
get_random_bytes(&f->hashrnd, 4);
if (f->perturb_period)
@@ -369,27 +372,41 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
[TCA_FLOW_PERTURB] = { .type = NLA_U32 },
};
-static void flow_destroy_filter(struct rcu_head *head)
+static void __flow_destroy_filter(struct flow_filter *f)
{
- struct flow_filter *f = container_of(head, struct flow_filter, rcu);
-
del_timer_sync(&f->perturb_timer);
tcf_exts_destroy(&f->exts);
tcf_em_tree_destroy(&f->ematches);
+ tcf_exts_put_net(&f->exts);
kfree(f);
}
+static void flow_destroy_filter_work(struct work_struct *work)
+{
+ struct flow_filter *f = container_of(work, struct flow_filter, work);
+
+ rtnl_lock();
+ __flow_destroy_filter(f);
+ rtnl_unlock();
+}
+
+static void flow_destroy_filter(struct rcu_head *head)
+{
+ struct flow_filter *f = container_of(head, struct flow_filter, rcu);
+
+ INIT_WORK(&f->work, flow_destroy_filter_work);
+ tcf_queue_work(&f->work);
+}
+
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- unsigned long *arg, bool ovr)
+ void **arg, bool ovr)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *fold, *fnew;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_FLOW_MAX + 1];
- struct tcf_exts e;
- struct tcf_ematch_tree t;
unsigned int nkeys = 0;
unsigned int perturb_period = 0;
u32 baseclass = 0;
@@ -425,31 +442,27 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
return -EOPNOTSUPP;
}
- err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
- if (err < 0)
- goto err1;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
- if (err < 0)
- goto err1;
+ fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+ if (!fnew)
+ return -ENOBUFS;
- err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
+ err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches);
if (err < 0)
goto err1;
- err = -ENOBUFS;
- fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
- if (!fnew)
+ err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+ if (err < 0)
goto err2;
- err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr);
if (err < 0)
- goto err3;
+ goto err2;
- fold = (struct flow_filter *)*arg;
+ fold = *arg;
if (fold) {
err = -EINVAL;
if (fold->handle != handle && handle)
- goto err3;
+ goto err2;
/* Copy fold into fnew */
fnew->tp = fold->tp;
@@ -469,36 +482,39 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (tb[TCA_FLOW_MODE])
mode = nla_get_u32(tb[TCA_FLOW_MODE]);
if (mode != FLOW_MODE_HASH && nkeys > 1)
- goto err3;
+ goto err2;
if (mode == FLOW_MODE_HASH)
perturb_period = fold->perturb_period;
if (tb[TCA_FLOW_PERTURB]) {
if (mode != FLOW_MODE_HASH)
- goto err3;
+ goto err2;
perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
}
} else {
err = -EINVAL;
if (!handle)
- goto err3;
+ goto err2;
if (!tb[TCA_FLOW_KEYS])
- goto err3;
+ goto err2;
mode = FLOW_MODE_MAP;
if (tb[TCA_FLOW_MODE])
mode = nla_get_u32(tb[TCA_FLOW_MODE]);
if (mode != FLOW_MODE_HASH && nkeys > 1)
- goto err3;
+ goto err2;
if (tb[TCA_FLOW_PERTURB]) {
if (mode != FLOW_MODE_HASH)
- goto err3;
+ goto err2;
perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
}
- if (TC_H_MAJ(baseclass) == 0)
- baseclass = TC_H_MAKE(tp->q->handle, baseclass);
+ if (TC_H_MAJ(baseclass) == 0) {
+ struct Qdisc *q = tcf_block_q(tp->chain->block);
+
+ baseclass = TC_H_MAKE(q->handle, baseclass);
+ }
if (TC_H_MIN(baseclass) == 0)
baseclass = TC_H_MAKE(baseclass, 1);
@@ -508,11 +524,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
get_random_bytes(&fnew->hashrnd, 4);
}
- setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation,
- (unsigned long)fnew);
-
- tcf_exts_change(tp, &fnew->exts, &e);
- tcf_em_tree_change(tp, &fnew->ematches, &t);
+ timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE);
netif_keep_dst(qdisc_dev(tp->q));
@@ -541,33 +553,34 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (perturb_period)
mod_timer(&fnew->perturb_timer, jiffies + perturb_period);
- if (*arg == 0)
+ if (!*arg)
list_add_tail_rcu(&fnew->list, &head->filters);
else
list_replace_rcu(&fold->list, &fnew->list);
- *arg = (unsigned long)fnew;
+ *arg = fnew;
- if (fold)
+ if (fold) {
+ tcf_exts_get_net(&fold->exts);
call_rcu(&fold->rcu, flow_destroy_filter);
+ }
return 0;
-err3:
- tcf_exts_destroy(&fnew->exts);
err2:
- tcf_em_tree_destroy(&t);
- kfree(fnew);
+ tcf_exts_destroy(&fnew->exts);
+ tcf_em_tree_destroy(&fnew->ematches);
err1:
- tcf_exts_destroy(&e);
+ kfree(fnew);
return err;
}
-static int flow_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int flow_delete(struct tcf_proto *tp, void *arg, bool *last)
{
struct flow_head *head = rtnl_dereference(tp->root);
- struct flow_filter *f = (struct flow_filter *)arg;
+ struct flow_filter *f = arg;
list_del_rcu(&f->list);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, flow_destroy_filter);
*last = list_empty(&head->filters);
return 0;
@@ -592,26 +605,29 @@ static void flow_destroy(struct tcf_proto *tp)
list_for_each_entry_safe(f, next, &head->filters, list) {
list_del_rcu(&f->list);
- call_rcu(&f->rcu, flow_destroy_filter);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, flow_destroy_filter);
+ else
+ __flow_destroy_filter(f);
}
kfree_rcu(head, rcu);
}
-static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
+static void *flow_get(struct tcf_proto *tp, u32 handle)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f;
list_for_each_entry(f, &head->filters, list)
if (f->handle == handle)
- return (unsigned long)f;
- return 0;
+ return f;
+ return NULL;
}
-static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct flow_filter *f = (struct flow_filter *)fh;
+ struct flow_filter *f = fh;
struct nlattr *nest;
if (f == NULL)
@@ -677,7 +693,7 @@ static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
list_for_each_entry(f, &head->filters, list) {
if (arg->count < arg->skip)
goto skip;
- if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+ if (arg->fn(tp, f, arg) < 0) {
arg->stop = 1;
break;
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 7832eb93379b..543a3e875d05 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -68,7 +68,6 @@ struct cls_fl_head {
struct rhashtable ht;
struct fl_flow_mask mask;
struct flow_dissector dissector;
- u32 hgen;
bool mask_assigned;
struct list_head filters;
struct rhashtable_params ht_params;
@@ -76,6 +75,7 @@ struct cls_fl_head {
struct work_struct work;
struct rcu_head rcu;
};
+ struct idr handle_idr;
};
struct cls_fl_filter {
@@ -87,8 +87,10 @@ struct cls_fl_filter {
struct list_head list;
u32 handle;
u32 flags;
- struct rcu_head rcu;
- struct tc_to_netdev tc;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
struct net_device *hw_dev;
};
@@ -153,37 +155,12 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct cls_fl_filter *f;
struct fl_flow_key skb_key;
struct fl_flow_key skb_mkey;
- struct ip_tunnel_info *info;
if (!atomic_read(&head->ht.nelems))
return -1;
fl_clear_masked_range(&skb_key, &head->mask);
- info = skb_tunnel_info(skb);
- if (info) {
- struct ip_tunnel_key *key = &info->key;
-
- switch (ip_tunnel_info_af(info)) {
- case AF_INET:
- skb_key.enc_control.addr_type =
- FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- skb_key.enc_ipv4.src = key->u.ipv4.src;
- skb_key.enc_ipv4.dst = key->u.ipv4.dst;
- break;
- case AF_INET6:
- skb_key.enc_control.addr_type =
- FLOW_DISSECTOR_KEY_IPV6_ADDRS;
- skb_key.enc_ipv6.src = key->u.ipv6.src;
- skb_key.enc_ipv6.dst = key->u.ipv6.dst;
- break;
- }
-
- skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
- skb_key.enc_tp.src = key->tp_src;
- skb_key.enc_tp.dst = key->tp_dst;
- }
-
skb_key.indev_ifindex = skb->skb_iif;
/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
* so do it rather here.
@@ -211,36 +188,46 @@ static int fl_init(struct tcf_proto *tp)
INIT_LIST_HEAD_RCU(&head->filters);
rcu_assign_pointer(tp->root, head);
+ idr_init(&head->handle_idr);
return 0;
}
-static void fl_destroy_filter(struct rcu_head *head)
+static void __fl_destroy_filter(struct cls_fl_filter *f)
{
- struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
-
tcf_exts_destroy(&f->exts);
+ tcf_exts_put_net(&f->exts);
kfree(f);
}
-static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
+static void fl_destroy_filter_work(struct work_struct *work)
{
- struct tc_cls_flower_offload offload = {0};
- struct net_device *dev = f->hw_dev;
- struct tc_to_netdev *tc = &f->tc;
+ struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work);
- if (!tc_can_offload(dev, tp))
- return;
+ rtnl_lock();
+ __fl_destroy_filter(f);
+ rtnl_unlock();
+}
- offload.command = TC_CLSFLOWER_DESTROY;
- offload.prio = tp->prio;
- offload.cookie = (unsigned long)f;
+static void fl_destroy_filter(struct rcu_head *head)
+{
+ struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
+
+ INIT_WORK(&f->work, fl_destroy_filter_work);
+ tcf_queue_work(&f->work);
+}
+
+static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
+{
+ struct tc_cls_flower_offload cls_flower = {};
+ struct tcf_block *block = tp->chain->block;
- tc->type = TC_SETUP_CLSFLOWER;
- tc->cls_flower = &offload;
+ tc_cls_common_offload_init(&cls_flower.common, tp);
+ cls_flower.command = TC_CLSFLOWER_DESTROY;
+ cls_flower.cookie = (unsigned long) f;
- dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index,
- tp->protocol, tc);
+ tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
+ &cls_flower, false);
}
static int fl_hw_replace_filter(struct tcf_proto *tp,
@@ -248,72 +235,63 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
struct fl_flow_key *mask,
struct cls_fl_filter *f)
{
- struct net_device *dev = tp->q->dev_queue->dev;
- struct tc_cls_flower_offload offload = {0};
- struct tc_to_netdev *tc = &f->tc;
+ struct tc_cls_flower_offload cls_flower = {};
+ struct tcf_block *block = tp->chain->block;
+ bool skip_sw = tc_skip_sw(f->flags);
int err;
- if (!tc_can_offload(dev, tp)) {
- if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) ||
- (f->hw_dev && !tc_can_offload(f->hw_dev, tp))) {
- f->hw_dev = dev;
- return tc_skip_sw(f->flags) ? -EINVAL : 0;
- }
- dev = f->hw_dev;
- tc->egress_dev = true;
- } else {
- f->hw_dev = dev;
+ tc_cls_common_offload_init(&cls_flower.common, tp);
+ cls_flower.command = TC_CLSFLOWER_REPLACE;
+ cls_flower.cookie = (unsigned long) f;
+ cls_flower.dissector = dissector;
+ cls_flower.mask = mask;
+ cls_flower.key = &f->mkey;
+ cls_flower.exts = &f->exts;
+ cls_flower.classid = f->res.classid;
+
+ err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
+ &cls_flower, skip_sw);
+ if (err < 0) {
+ fl_hw_destroy_filter(tp, f);
+ return err;
+ } else if (err > 0) {
+ f->flags |= TCA_CLS_FLAGS_IN_HW;
}
- offload.command = TC_CLSFLOWER_REPLACE;
- offload.prio = tp->prio;
- offload.cookie = (unsigned long)f;
- offload.dissector = dissector;
- offload.mask = mask;
- offload.key = &f->mkey;
- offload.exts = &f->exts;
-
- tc->type = TC_SETUP_CLSFLOWER;
- tc->cls_flower = &offload;
-
- err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
- tp->chain->index, tp->protocol, tc);
- if (!err)
- f->flags |= TCA_CLS_FLAGS_IN_HW;
+ if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
+ return -EINVAL;
- if (tc_skip_sw(f->flags))
- return err;
return 0;
}
static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
{
- struct tc_cls_flower_offload offload = {0};
- struct net_device *dev = f->hw_dev;
- struct tc_to_netdev *tc = &f->tc;
-
- if (!tc_can_offload(dev, tp))
- return;
-
- offload.command = TC_CLSFLOWER_STATS;
- offload.prio = tp->prio;
- offload.cookie = (unsigned long)f;
- offload.exts = &f->exts;
+ struct tc_cls_flower_offload cls_flower = {};
+ struct tcf_block *block = tp->chain->block;
- tc->type = TC_SETUP_CLSFLOWER;
- tc->cls_flower = &offload;
+ tc_cls_common_offload_init(&cls_flower.common, tp);
+ cls_flower.command = TC_CLSFLOWER_STATS;
+ cls_flower.cookie = (unsigned long) f;
+ cls_flower.exts = &f->exts;
+ cls_flower.classid = f->res.classid;
- dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
- tp->chain->index, tp->protocol, tc);
+ tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
+ &cls_flower, false);
}
static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+
+ idr_remove_ext(&head->handle_idr, f->handle);
list_del_rcu(&f->list);
if (!tc_skip_hw(f->flags))
fl_hw_destroy_filter(tp, f);
tcf_unbind_filter(tp, &f->res);
- call_rcu(&f->rcu, fl_destroy_filter);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, fl_destroy_filter);
+ else
+ __fl_destroy_filter(f);
}
static void fl_destroy_sleepable(struct work_struct *work)
@@ -341,20 +319,17 @@ static void fl_destroy(struct tcf_proto *tp)
list_for_each_entry_safe(f, next, &head->filters, list)
__fl_delete(tp, f);
+ idr_destroy(&head->handle_idr);
__module_get(THIS_MODULE);
call_rcu(&head->rcu, fl_destroy_rcu);
}
-static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
+static void *fl_get(struct tcf_proto *tp, u32 handle)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
- struct cls_fl_filter *f;
- list_for_each_entry(f, &head->filters, list)
- if (f->handle == handle)
- return (unsigned long) f;
- return 0;
+ return idr_find_ext(&head->handle_idr, handle);
}
static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
@@ -852,15 +827,11 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base, struct nlattr **tb,
struct nlattr *est, bool ovr)
{
- struct tcf_exts e;
int err;
- err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
- if (err < 0)
- goto errout;
if (tb[TCA_FLOWER_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
@@ -869,50 +840,25 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
err = fl_set_key(net, tb, &f->key, &mask->key);
if (err)
- goto errout;
+ return err;
fl_mask_update_range(mask);
fl_set_masked_key(&f->mkey, &f->key, mask);
- tcf_exts_change(tp, &f->exts, &e);
-
return 0;
-errout:
- tcf_exts_destroy(&e);
- return err;
-}
-
-static u32 fl_grab_new_handle(struct tcf_proto *tp,
- struct cls_fl_head *head)
-{
- unsigned int i = 0x80000000;
- u32 handle;
-
- do {
- if (++head->hgen == 0x7FFFFFFF)
- head->hgen = 1;
- } while (--i > 0 && fl_get(tp, head->hgen));
-
- if (unlikely(i == 0)) {
- pr_err("Insufficient number of handles\n");
- handle = 0;
- } else {
- handle = head->hgen;
- }
-
- return handle;
}
static int fl_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- unsigned long *arg, bool ovr)
+ void **arg, bool ovr)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
- struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
+ struct cls_fl_filter *fold = *arg;
struct cls_fl_filter *fnew;
struct nlattr **tb;
struct fl_flow_mask mask = {};
+ unsigned long idr_index;
int err;
if (!tca[TCA_OPTIONS])
@@ -943,41 +889,49 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
goto errout;
if (!handle) {
- handle = fl_grab_new_handle(tp, head);
- if (!handle) {
- err = -EINVAL;
+ err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
+ 1, 0x80000000, GFP_KERNEL);
+ if (err)
goto errout;
- }
+ fnew->handle = idr_index;
+ }
+
+ /* user specifies a handle and it doesn't exist */
+ if (handle && !fold) {
+ err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
+ handle, handle + 1, GFP_KERNEL);
+ if (err)
+ goto errout;
+ fnew->handle = idr_index;
}
- fnew->handle = handle;
if (tb[TCA_FLOWER_FLAGS]) {
fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
if (!tc_flags_valid(fnew->flags)) {
err = -EINVAL;
- goto errout;
+ goto errout_idr;
}
}
err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
if (err)
- goto errout;
+ goto errout_idr;
err = fl_check_assign_mask(head, &mask);
if (err)
- goto errout;
+ goto errout_idr;
if (!tc_skip_sw(fnew->flags)) {
if (!fold && fl_lookup(head, &fnew->mkey)) {
err = -EEXIST;
- goto errout;
+ goto errout_idr;
}
err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
head->ht_params);
if (err)
- goto errout;
+ goto errout_idr;
}
if (!tc_skip_hw(fnew->flags)) {
@@ -986,7 +940,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
&mask.key,
fnew);
if (err)
- goto errout;
+ goto errout_idr;
}
if (!tc_in_hw(fnew->flags))
@@ -1000,11 +954,14 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
fl_hw_destroy_filter(tp, fold);
}
- *arg = (unsigned long) fnew;
+ *arg = fnew;
if (fold) {
+ fnew->handle = handle;
+ idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
list_replace_rcu(&fold->list, &fnew->list);
tcf_unbind_filter(tp, &fold->res);
+ tcf_exts_get_net(&fold->exts);
call_rcu(&fold->rcu, fl_destroy_filter);
} else {
list_add_tail_rcu(&fnew->list, &head->filters);
@@ -1013,6 +970,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
kfree(tb);
return 0;
+errout_idr:
+ if (fnew->handle)
+ idr_remove_ext(&head->handle_idr, fnew->handle);
errout:
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
@@ -1021,10 +981,10 @@ errout_tb:
return err;
}
-static int fl_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int fl_delete(struct tcf_proto *tp, void *arg, bool *last)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
- struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
+ struct cls_fl_filter *f = arg;
if (!tc_skip_sw(f->flags))
rhashtable_remove_fast(&head->ht, &f->ht_node,
@@ -1042,7 +1002,7 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
list_for_each_entry_rcu(f, &head->filters, list) {
if (arg->count < arg->skip)
goto skip;
- if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+ if (arg->fn(tp, f, arg) < 0) {
arg->stop = 1;
break;
}
@@ -1177,11 +1137,11 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
}
-static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
- struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
+ struct cls_fl_filter *f = fh;
struct nlattr *nest;
struct fl_flow_key *key, *mask;
@@ -1383,6 +1343,14 @@ nla_put_failure:
return -1;
}
+static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+ struct cls_fl_filter *f = fh;
+
+ if (f && f->res.classid == classid)
+ f->res.class = cl;
+}
+
static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.kind = "flower",
.classify = fl_classify,
@@ -1393,6 +1361,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.delete = fl_delete,
.walk = fl_walk,
.dump = fl_dump,
+ .bind_class = fl_bind_class,
.owner = THIS_MODULE,
};
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index d3885362e017..20f0de1a960a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -28,6 +28,7 @@
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include <net/sch_generic.h>
#define HTSIZE 256
@@ -46,7 +47,10 @@ struct fw_filter {
#endif /* CONFIG_NET_CLS_IND */
struct tcf_exts exts;
struct tcf_proto *tp;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static u32 fw_hash(u32 handle)
@@ -83,9 +87,11 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
}
}
} else {
+ struct Qdisc *q = tcf_block_q(tp->chain->block);
+
/* Old method: classify the packet using its skb mark. */
if (id && (TC_H_MAJ(id) == 0 ||
- !(TC_H_MAJ(id ^ tp->q->handle)))) {
+ !(TC_H_MAJ(id ^ q->handle)))) {
res->classid = id;
res->class = 0;
return 0;
@@ -95,20 +101,20 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
return -1;
}
-static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
+static void *fw_get(struct tcf_proto *tp, u32 handle)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
if (head == NULL)
- return 0;
+ return NULL;
f = rtnl_dereference(head->ht[fw_hash(handle)]);
for (; f; f = rtnl_dereference(f->next)) {
if (f->id == handle)
- return (unsigned long)f;
+ return f;
}
- return 0;
+ return NULL;
}
static int fw_init(struct tcf_proto *tp)
@@ -119,12 +125,28 @@ static int fw_init(struct tcf_proto *tp)
return 0;
}
+static void __fw_delete_filter(struct fw_filter *f)
+{
+ tcf_exts_destroy(&f->exts);
+ tcf_exts_put_net(&f->exts);
+ kfree(f);
+}
+
+static void fw_delete_filter_work(struct work_struct *work)
+{
+ struct fw_filter *f = container_of(work, struct fw_filter, work);
+
+ rtnl_lock();
+ __fw_delete_filter(f);
+ rtnl_unlock();
+}
+
static void fw_delete_filter(struct rcu_head *head)
{
struct fw_filter *f = container_of(head, struct fw_filter, rcu);
- tcf_exts_destroy(&f->exts);
- kfree(f);
+ INIT_WORK(&f->work, fw_delete_filter_work);
+ tcf_queue_work(&f->work);
}
static void fw_destroy(struct tcf_proto *tp)
@@ -141,16 +163,19 @@ static void fw_destroy(struct tcf_proto *tp)
RCU_INIT_POINTER(head->ht[h],
rtnl_dereference(f->next));
tcf_unbind_filter(tp, &f->res);
- call_rcu(&f->rcu, fw_delete_filter);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, fw_delete_filter);
+ else
+ __fw_delete_filter(f);
}
}
kfree_rcu(head, rcu);
}
-static int fw_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int fw_delete(struct tcf_proto *tp, void *arg, bool *last)
{
struct fw_head *head = rtnl_dereference(tp->root);
- struct fw_filter *f = (struct fw_filter *)arg;
+ struct fw_filter *f = arg;
struct fw_filter __rcu **fp;
struct fw_filter *pfp;
int ret = -EINVAL;
@@ -166,6 +191,7 @@ static int fw_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
if (pfp == f) {
RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
tcf_unbind_filter(tp, &f->res);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, fw_delete_filter);
ret = 0;
break;
@@ -190,22 +216,17 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
[TCA_FW_MASK] = { .type = NLA_U32 },
};
-static int
-fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
- struct nlattr **tb, struct nlattr **tca, unsigned long base,
- bool ovr)
+static int fw_set_parms(struct net *net, struct tcf_proto *tp,
+ struct fw_filter *f, struct nlattr **tb,
+ struct nlattr **tca, unsigned long base, bool ovr)
{
struct fw_head *head = rtnl_dereference(tp->root);
- struct tcf_exts e;
u32 mask;
int err;
- err = tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
- if (err < 0)
- goto errout;
if (tb[TCA_FW_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
@@ -216,10 +237,8 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
if (tb[TCA_FW_INDEV]) {
int ret;
ret = tcf_change_indev(net, tb[TCA_FW_INDEV]);
- if (ret < 0) {
- err = ret;
- goto errout;
- }
+ if (ret < 0)
+ return ret;
f->ifindex = ret;
}
#endif /* CONFIG_NET_CLS_IND */
@@ -228,25 +247,20 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
if (tb[TCA_FW_MASK]) {
mask = nla_get_u32(tb[TCA_FW_MASK]);
if (mask != head->mask)
- goto errout;
+ return err;
} else if (head->mask != 0xFFFFFFFF)
- goto errout;
-
- tcf_exts_change(tp, &f->exts, &e);
+ return err;
return 0;
-errout:
- tcf_exts_destroy(&e);
- return err;
}
static int fw_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
- u32 handle, struct nlattr **tca, unsigned long *arg,
+ u32 handle, struct nlattr **tca, void **arg,
bool ovr)
{
struct fw_head *head = rtnl_dereference(tp->root);
- struct fw_filter *f = (struct fw_filter *) *arg;
+ struct fw_filter *f = *arg;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_FW_MAX + 1];
int err;
@@ -282,7 +296,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
return err;
}
- err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr);
+ err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr);
if (err < 0) {
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
@@ -298,9 +312,10 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next));
rcu_assign_pointer(*fp, fnew);
tcf_unbind_filter(tp, &f->res);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, fw_delete_filter);
- *arg = (unsigned long)fnew;
+ *arg = fnew;
return err;
}
@@ -330,14 +345,14 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
f->id = handle;
f->tp = tp;
- err = fw_change_attrs(net, tp, f, tb, tca, base, ovr);
+ err = fw_set_parms(net, tp, f, tb, tca, base, ovr);
if (err < 0)
goto errout;
RCU_INIT_POINTER(f->next, head->ht[fw_hash(handle)]);
rcu_assign_pointer(head->ht[fw_hash(handle)], f);
- *arg = (unsigned long)f;
+ *arg = f;
return 0;
errout:
@@ -366,7 +381,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
arg->count++;
continue;
}
- if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+ if (arg->fn(tp, f, arg) < 0) {
arg->stop = 1;
return;
}
@@ -375,11 +390,11 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
}
-static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct fw_head *head = rtnl_dereference(tp->root);
- struct fw_filter *f = (struct fw_filter *)fh;
+ struct fw_filter *f = fh;
struct nlattr *nest;
if (f == NULL)
@@ -387,7 +402,7 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
t->tcm_handle = f->id;
- if (!f->res.classid && !tcf_exts_is_available(&f->exts))
+ if (!f->res.classid && !tcf_exts_has_actions(&f->exts))
return skb->len;
nest = nla_nest_start(skb, TCA_OPTIONS);
@@ -424,6 +439,14 @@ nla_put_failure:
return -1;
}
+static void fw_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+ struct fw_filter *f = fh;
+
+ if (f && f->res.classid == classid)
+ f->res.class = cl;
+}
+
static struct tcf_proto_ops cls_fw_ops __read_mostly = {
.kind = "fw",
.classify = fw_classify,
@@ -434,6 +457,7 @@ static struct tcf_proto_ops cls_fw_ops __read_mostly = {
.delete = fw_delete,
.walk = fw_walk,
.dump = fw_dump,
+ .bind_class = fw_bind_class,
.owner = THIS_MODULE,
};
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 9dc26c32cf32..66d4e0099158 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -21,7 +21,10 @@ struct cls_mall_head {
struct tcf_result res;
u32 handle;
u32 flags;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -32,6 +35,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
if (tc_skip_sw(head->flags))
return -1;
+ *res = head->res;
return tcf_exts_exec(skb, &head->exts, res);
}
@@ -40,74 +44,93 @@ static int mall_init(struct tcf_proto *tp)
return 0;
}
+static void __mall_destroy(struct cls_mall_head *head)
+{
+ tcf_exts_destroy(&head->exts);
+ tcf_exts_put_net(&head->exts);
+ kfree(head);
+}
+
+static void mall_destroy_work(struct work_struct *work)
+{
+ struct cls_mall_head *head = container_of(work, struct cls_mall_head,
+ work);
+ rtnl_lock();
+ __mall_destroy(head);
+ rtnl_unlock();
+}
+
static void mall_destroy_rcu(struct rcu_head *rcu)
{
struct cls_mall_head *head = container_of(rcu, struct cls_mall_head,
rcu);
- tcf_exts_destroy(&head->exts);
- kfree(head);
+ INIT_WORK(&head->work, mall_destroy_work);
+ tcf_queue_work(&head->work);
+}
+
+static void mall_destroy_hw_filter(struct tcf_proto *tp,
+ struct cls_mall_head *head,
+ unsigned long cookie)
+{
+ struct tc_cls_matchall_offload cls_mall = {};
+ struct tcf_block *block = tp->chain->block;
+
+ tc_cls_common_offload_init(&cls_mall.common, tp);
+ cls_mall.command = TC_CLSMATCHALL_DESTROY;
+ cls_mall.cookie = cookie;
+
+ tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false);
}
static int mall_replace_hw_filter(struct tcf_proto *tp,
struct cls_mall_head *head,
unsigned long cookie)
{
- struct net_device *dev = tp->q->dev_queue->dev;
- struct tc_to_netdev offload;
- struct tc_cls_matchall_offload mall_offload = {0};
+ struct tc_cls_matchall_offload cls_mall = {};
+ struct tcf_block *block = tp->chain->block;
+ bool skip_sw = tc_skip_sw(head->flags);
int err;
- offload.type = TC_SETUP_MATCHALL;
- offload.cls_mall = &mall_offload;
- offload.cls_mall->command = TC_CLSMATCHALL_REPLACE;
- offload.cls_mall->exts = &head->exts;
- offload.cls_mall->cookie = cookie;
+ tc_cls_common_offload_init(&cls_mall.common, tp);
+ cls_mall.command = TC_CLSMATCHALL_REPLACE;
+ cls_mall.exts = &head->exts;
+ cls_mall.cookie = cookie;
- err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
- tp->chain->index,
- tp->protocol, &offload);
- if (!err)
+ err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL,
+ &cls_mall, skip_sw);
+ if (err < 0) {
+ mall_destroy_hw_filter(tp, head, cookie);
+ return err;
+ } else if (err > 0) {
head->flags |= TCA_CLS_FLAGS_IN_HW;
+ }
- return err;
-}
+ if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW))
+ return -EINVAL;
-static void mall_destroy_hw_filter(struct tcf_proto *tp,
- struct cls_mall_head *head,
- unsigned long cookie)
-{
- struct net_device *dev = tp->q->dev_queue->dev;
- struct tc_to_netdev offload;
- struct tc_cls_matchall_offload mall_offload = {0};
-
- offload.type = TC_SETUP_MATCHALL;
- offload.cls_mall = &mall_offload;
- offload.cls_mall->command = TC_CLSMATCHALL_DESTROY;
- offload.cls_mall->exts = NULL;
- offload.cls_mall->cookie = cookie;
-
- dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index,
- tp->protocol, &offload);
+ return 0;
}
static void mall_destroy(struct tcf_proto *tp)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
- struct net_device *dev = tp->q->dev_queue->dev;
if (!head)
return;
- if (tc_should_offload(dev, tp, head->flags))
+ if (!tc_skip_hw(head->flags))
mall_destroy_hw_filter(tp, head, (unsigned long) head);
- call_rcu(&head->rcu, mall_destroy_rcu);
+ if (tcf_exts_get_net(&head->exts))
+ call_rcu(&head->rcu, mall_destroy_rcu);
+ else
+ __mall_destroy(head);
}
-static unsigned long mall_get(struct tcf_proto *tp, u32 handle)
+static void *mall_get(struct tcf_proto *tp, u32 handle)
{
- return 0UL;
+ return NULL;
}
static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
@@ -120,36 +143,25 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base, struct nlattr **tb,
struct nlattr *est, bool ovr)
{
- struct tcf_exts e;
int err;
- err = tcf_exts_init(&e, TCA_MATCHALL_ACT, 0);
- if (err)
- return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr);
if (err < 0)
- goto errout;
+ return err;
if (tb[TCA_MATCHALL_CLASSID]) {
head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
tcf_bind_filter(tp, &head->res, base);
}
-
- tcf_exts_change(tp, &head->exts, &e);
-
return 0;
-errout:
- tcf_exts_destroy(&e);
- return err;
}
static int mall_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- unsigned long *arg, bool ovr)
+ void **arg, bool ovr)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
- struct net_device *dev = tp->q->dev_queue->dev;
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
struct cls_mall_head *new;
u32 flags = 0;
@@ -189,20 +201,16 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
if (err)
goto err_set_parms;
- if (tc_should_offload(dev, tp, flags)) {
+ if (!tc_skip_hw(new->flags)) {
err = mall_replace_hw_filter(tp, new, (unsigned long) new);
- if (err) {
- if (tc_skip_sw(flags))
- goto err_replace_hw_filter;
- else
- err = 0;
- }
+ if (err)
+ goto err_replace_hw_filter;
}
if (!tc_in_hw(new->flags))
new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
- *arg = (unsigned long) head;
+ *arg = head;
rcu_assign_pointer(tp->root, new);
return 0;
@@ -214,7 +222,7 @@ err_exts_init:
return err;
}
-static int mall_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int mall_delete(struct tcf_proto *tp, void *arg, bool *last)
{
return -EOPNOTSUPP;
}
@@ -225,16 +233,16 @@ static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
if (arg->count < arg->skip)
goto skip;
- if (arg->fn(tp, (unsigned long) head, arg) < 0)
+ if (arg->fn(tp, head, arg) < 0)
arg->stop = 1;
skip:
arg->count++;
}
-static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct cls_mall_head *head = (struct cls_mall_head *) fh;
+ struct cls_mall_head *head = fh;
struct nlattr *nest;
if (!head)
@@ -268,6 +276,14 @@ nla_put_failure:
return -1;
}
+static void mall_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+ struct cls_mall_head *head = fh;
+
+ if (head && head->res.classid == classid)
+ head->res.class = cl;
+}
+
static struct tcf_proto_ops cls_mall_ops __read_mostly = {
.kind = "matchall",
.classify = mall_classify,
@@ -278,6 +294,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = {
.delete = mall_delete,
.walk = mall_walk,
.dump = mall_dump,
+ .bind_class = mall_bind_class,
.owner = THIS_MODULE,
};
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index d63d5502ee02..ac9a5b8825b9 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -57,7 +57,10 @@ struct route4_filter {
u32 handle;
struct route4_bucket *bkt;
struct tcf_proto *tp;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
@@ -113,7 +116,7 @@ static inline int route4_hash_wild(void)
#define ROUTE4_APPLY_RESULT() \
{ \
*res = f->res; \
- if (tcf_exts_is_available(&f->exts)) { \
+ if (tcf_exts_has_actions(&f->exts)) { \
int r = tcf_exts_exec(skb, &f->exts, res); \
if (r < 0) { \
dont_cache = 1; \
@@ -216,7 +219,7 @@ static inline u32 from_hash(u32 id)
return 16 + (id & 0xF);
}
-static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
+static void *route4_get(struct tcf_proto *tp, u32 handle)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_bucket *b;
@@ -225,11 +228,11 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
h1 = to_hash(handle);
if (h1 > 256)
- return 0;
+ return NULL;
h2 = from_hash(handle >> 16);
if (h2 > 32)
- return 0;
+ return NULL;
b = rtnl_dereference(head->table[h1]);
if (b) {
@@ -237,9 +240,9 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
f;
f = rtnl_dereference(f->next))
if (f->handle == handle)
- return (unsigned long)f;
+ return f;
}
- return 0;
+ return NULL;
}
static int route4_init(struct tcf_proto *tp)
@@ -254,12 +257,28 @@ static int route4_init(struct tcf_proto *tp)
return 0;
}
+static void __route4_delete_filter(struct route4_filter *f)
+{
+ tcf_exts_destroy(&f->exts);
+ tcf_exts_put_net(&f->exts);
+ kfree(f);
+}
+
+static void route4_delete_filter_work(struct work_struct *work)
+{
+ struct route4_filter *f = container_of(work, struct route4_filter, work);
+
+ rtnl_lock();
+ __route4_delete_filter(f);
+ rtnl_unlock();
+}
+
static void route4_delete_filter(struct rcu_head *head)
{
struct route4_filter *f = container_of(head, struct route4_filter, rcu);
- tcf_exts_destroy(&f->exts);
- kfree(f);
+ INIT_WORK(&f->work, route4_delete_filter_work);
+ tcf_queue_work(&f->work);
}
static void route4_destroy(struct tcf_proto *tp)
@@ -284,7 +303,10 @@ static void route4_destroy(struct tcf_proto *tp)
next = rtnl_dereference(f->next);
RCU_INIT_POINTER(b->ht[h2], next);
tcf_unbind_filter(tp, &f->res);
- call_rcu(&f->rcu, route4_delete_filter);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, route4_delete_filter);
+ else
+ __route4_delete_filter(f);
}
}
RCU_INIT_POINTER(head->table[h1], NULL);
@@ -294,10 +316,10 @@ static void route4_destroy(struct tcf_proto *tp)
kfree_rcu(head, rcu);
}
-static int route4_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int route4_delete(struct tcf_proto *tp, void *arg, bool *last)
{
struct route4_head *head = rtnl_dereference(tp->root);
- struct route4_filter *f = (struct route4_filter *)arg;
+ struct route4_filter *f = arg;
struct route4_filter __rcu **fp;
struct route4_filter *nf;
struct route4_bucket *b;
@@ -325,6 +347,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
/* Delete it */
tcf_unbind_filter(tp, &f->res);
+ tcf_exts_get_net(&f->exts);
call_rcu(&f->rcu, route4_delete_filter);
/* Strip RTNL protected tree */
@@ -372,37 +395,32 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
struct route4_filter *fp;
unsigned int h1;
struct route4_bucket *b;
- struct tcf_exts e;
int err;
- err = tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
- if (err < 0)
- goto errout;
- err = -EINVAL;
if (tb[TCA_ROUTE4_TO]) {
if (new && handle & 0x8000)
- goto errout;
+ return -EINVAL;
to = nla_get_u32(tb[TCA_ROUTE4_TO]);
if (to > 0xFF)
- goto errout;
+ return -EINVAL;
nhandle = to;
}
if (tb[TCA_ROUTE4_FROM]) {
if (tb[TCA_ROUTE4_IIF])
- goto errout;
+ return -EINVAL;
id = nla_get_u32(tb[TCA_ROUTE4_FROM]);
if (id > 0xFF)
- goto errout;
+ return -EINVAL;
nhandle |= id << 16;
} else if (tb[TCA_ROUTE4_IIF]) {
id = nla_get_u32(tb[TCA_ROUTE4_IIF]);
if (id > 0x7FFF)
- goto errout;
+ return -EINVAL;
nhandle |= (id | 0x8000) << 16;
} else
nhandle |= 0xFFFF << 16;
@@ -410,27 +428,25 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
if (handle && new) {
nhandle |= handle & 0x7F00;
if (nhandle != handle)
- goto errout;
+ return -EINVAL;
}
h1 = to_hash(nhandle);
b = rtnl_dereference(head->table[h1]);
if (!b) {
- err = -ENOBUFS;
b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
if (b == NULL)
- goto errout;
+ return -ENOBUFS;
rcu_assign_pointer(head->table[h1], b);
} else {
unsigned int h2 = from_hash(nhandle >> 16);
- err = -EEXIST;
for (fp = rtnl_dereference(b->ht[h2]);
fp;
fp = rtnl_dereference(fp->next))
if (fp->handle == f->handle)
- goto errout;
+ return -EEXIST;
}
if (tb[TCA_ROUTE4_TO])
@@ -450,17 +466,12 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
tcf_bind_filter(tp, &f->res, base);
}
- tcf_exts_change(tp, &f->exts, &e);
-
return 0;
-errout:
- tcf_exts_destroy(&e);
- return err;
}
static int route4_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, unsigned long *arg, bool ovr)
+ struct nlattr **tca, void **arg, bool ovr)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter __rcu **fp;
@@ -479,7 +490,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- fold = (struct route4_filter *)*arg;
+ fold = *arg;
if (fold && handle && fold->handle != handle)
return -EINVAL;
@@ -537,9 +548,10 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
}
route4_reset_fastmap(head);
- *arg = (unsigned long)f;
+ *arg = f;
if (fold) {
tcf_unbind_filter(tp, &fold->res);
+ tcf_exts_get_net(&fold->exts);
call_rcu(&fold->rcu, route4_delete_filter);
}
return 0;
@@ -576,7 +588,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
arg->count++;
continue;
}
- if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+ if (arg->fn(tp, f, arg) < 0) {
arg->stop = 1;
return;
}
@@ -587,10 +599,10 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
}
-static int route4_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct route4_filter *f = (struct route4_filter *)fh;
+ struct route4_filter *f = fh;
struct nlattr *nest;
u32 id;
@@ -636,6 +648,14 @@ nla_put_failure:
return -1;
}
+static void route4_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+ struct route4_filter *f = fh;
+
+ if (f && f->res.classid == classid)
+ f->res.class = cl;
+}
+
static struct tcf_proto_ops cls_route4_ops __read_mostly = {
.kind = "route",
.classify = route4_classify,
@@ -646,6 +666,7 @@ static struct tcf_proto_ops cls_route4_ops __read_mostly = {
.delete = route4_delete,
.walk = route4_walk,
.dump = route4_dump,
+ .bind_class = route4_bind_class,
.owner = THIS_MODULE,
};
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 0d9d07798699..cf325625c99d 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -97,7 +97,10 @@ struct rsvp_filter {
u32 handle;
struct rsvp_session *sess;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
@@ -248,7 +251,7 @@ static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
BUG_ON(1);
}
-static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
+static void *rsvp_get(struct tcf_proto *tp, u32 handle)
{
struct rsvp_head *head = rtnl_dereference(tp->root);
struct rsvp_session *s;
@@ -257,17 +260,17 @@ static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
unsigned int h2 = (handle >> 8) & 0xFF;
if (h2 > 16)
- return 0;
+ return NULL;
for (s = rtnl_dereference(head->ht[h1]); s;
s = rtnl_dereference(s->next)) {
for (f = rtnl_dereference(s->ht[h2]); f;
f = rtnl_dereference(f->next)) {
if (f->handle == handle)
- return (unsigned long)f;
+ return f;
}
}
- return 0;
+ return NULL;
}
static int rsvp_init(struct tcf_proto *tp)
@@ -282,12 +285,28 @@ static int rsvp_init(struct tcf_proto *tp)
return -ENOBUFS;
}
+static void __rsvp_delete_filter(struct rsvp_filter *f)
+{
+ tcf_exts_destroy(&f->exts);
+ tcf_exts_put_net(&f->exts);
+ kfree(f);
+}
+
+static void rsvp_delete_filter_work(struct work_struct *work)
+{
+ struct rsvp_filter *f = container_of(work, struct rsvp_filter, work);
+
+ rtnl_lock();
+ __rsvp_delete_filter(f);
+ rtnl_unlock();
+}
+
static void rsvp_delete_filter_rcu(struct rcu_head *head)
{
struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
- tcf_exts_destroy(&f->exts);
- kfree(f);
+ INIT_WORK(&f->work, rsvp_delete_filter_work);
+ tcf_queue_work(&f->work);
}
static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
@@ -297,7 +316,10 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
* grace period, since converted-to-rcu actions are relying on that
* in cleanup() callback
*/
- call_rcu(&f->rcu, rsvp_delete_filter_rcu);
+ if (tcf_exts_get_net(&f->exts))
+ call_rcu(&f->rcu, rsvp_delete_filter_rcu);
+ else
+ __rsvp_delete_filter(f);
}
static void rsvp_destroy(struct tcf_proto *tp)
@@ -328,10 +350,10 @@ static void rsvp_destroy(struct tcf_proto *tp)
kfree_rcu(data, rcu);
}
-static int rsvp_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last)
{
struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg;
+ struct rsvp_filter *nfp, *f = arg;
struct rsvp_filter __rcu **fp;
unsigned int h = f->handle;
struct rsvp_session __rcu **sp;
@@ -389,7 +411,7 @@ static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
if ((data->hgenerator += 0x10000) == 0)
data->hgenerator = 0x10000;
h = data->hgenerator|salt;
- if (rsvp_get(tp, h) == 0)
+ if (!rsvp_get(tp, h))
return h;
}
return 0;
@@ -464,7 +486,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
- unsigned long *arg, bool ovr)
+ void **arg, bool ovr)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
struct rsvp_filter *f, *nfp;
@@ -493,7 +515,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout2;
- f = (struct rsvp_filter *)*arg;
+ f = *arg;
if (f) {
/* Node exists: adjust only classid */
struct rsvp_filter *n;
@@ -518,7 +540,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
tcf_bind_filter(tp, &n->res, base);
}
- tcf_exts_change(tp, &n->exts, &e);
+ tcf_exts_change(&n->exts, &e);
rsvp_replace(tp, n, handle);
return 0;
}
@@ -591,7 +613,7 @@ insert:
if (f->tunnelhdr == 0)
tcf_bind_filter(tp, &f->res, base);
- tcf_exts_change(tp, &f->exts, &e);
+ tcf_exts_change(&f->exts, &e);
fp = &s->ht[h2];
for (nfp = rtnl_dereference(*fp); nfp;
@@ -604,7 +626,7 @@ insert:
RCU_INIT_POINTER(f->next, nfp);
rcu_assign_pointer(*fp, f);
- *arg = (unsigned long)f;
+ *arg = f;
return 0;
}
}
@@ -663,7 +685,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
arg->count++;
continue;
}
- if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+ if (arg->fn(tp, f, arg) < 0) {
arg->stop = 1;
return;
}
@@ -674,10 +696,10 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
}
-static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct rsvp_filter *f = (struct rsvp_filter *)fh;
+ struct rsvp_filter *f = fh;
struct rsvp_session *s;
struct nlattr *nest;
struct tc_rsvp_pinfo pinfo;
@@ -723,6 +745,14 @@ nla_put_failure:
return -1;
}
+static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+ struct rsvp_filter *f = fh;
+
+ if (f && f->res.classid == classid)
+ f->res.class = cl;
+}
+
static struct tcf_proto_ops RSVP_OPS __read_mostly = {
.kind = RSVP_ID,
.classify = rsvp_classify,
@@ -733,6 +763,7 @@ static struct tcf_proto_ops RSVP_OPS __read_mostly = {
.delete = rsvp_delete,
.walk = rsvp_walk,
.dump = rsvp_dump,
+ .bind_class = rsvp_bind_class,
.owner = THIS_MODULE,
};
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 8a8a58357c39..67467ae24c97 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -13,6 +13,7 @@
#include <net/act_api.h>
#include <net/netlink.h>
#include <net/pkt_cls.h>
+#include <net/sch_generic.h>
/*
* Passing parameters to the root seems to be done more awkwardly than really
@@ -27,14 +28,20 @@
struct tcindex_filter_result {
struct tcf_exts exts;
struct tcf_result res;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
struct tcindex_filter {
u16 key;
struct tcindex_filter_result result;
struct tcindex_filter __rcu *next;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
@@ -52,7 +59,7 @@ struct tcindex_data {
static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
{
- return tcf_exts_is_predicative(&r->exts) || r->res.classid;
+ return tcf_exts_has_actions(&r->exts) || r->res.classid;
}
static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
@@ -90,9 +97,11 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
f = tcindex_lookup(p, key);
if (!f) {
+ struct Qdisc *q = tcf_block_q(tp->chain->block);
+
if (!p->fall_through)
return -1;
- res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key);
+ res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key);
res->class = 0;
pr_debug("alg 0x%x\n", res->classid);
return 0;
@@ -104,16 +113,16 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
}
-static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
+static void *tcindex_get(struct tcf_proto *tp, u32 handle)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r;
pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
if (p->perfect && handle >= p->alloc_hash)
- return 0;
+ return NULL;
r = tcindex_lookup(p, handle);
- return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL;
+ return r && tcindex_filter_is_set(r) ? r : NULL;
}
static int tcindex_init(struct tcf_proto *tp)
@@ -133,12 +142,46 @@ static int tcindex_init(struct tcf_proto *tp)
return 0;
}
+static void __tcindex_destroy_rexts(struct tcindex_filter_result *r)
+{
+ tcf_exts_destroy(&r->exts);
+ tcf_exts_put_net(&r->exts);
+}
+
+static void tcindex_destroy_rexts_work(struct work_struct *work)
+{
+ struct tcindex_filter_result *r;
+
+ r = container_of(work, struct tcindex_filter_result, work);
+ rtnl_lock();
+ __tcindex_destroy_rexts(r);
+ rtnl_unlock();
+}
+
static void tcindex_destroy_rexts(struct rcu_head *head)
{
struct tcindex_filter_result *r;
r = container_of(head, struct tcindex_filter_result, rcu);
- tcf_exts_destroy(&r->exts);
+ INIT_WORK(&r->work, tcindex_destroy_rexts_work);
+ tcf_queue_work(&r->work);
+}
+
+static void __tcindex_destroy_fexts(struct tcindex_filter *f)
+{
+ tcf_exts_destroy(&f->result.exts);
+ tcf_exts_put_net(&f->result.exts);
+ kfree(f);
+}
+
+static void tcindex_destroy_fexts_work(struct work_struct *work)
+{
+ struct tcindex_filter *f = container_of(work, struct tcindex_filter,
+ work);
+
+ rtnl_lock();
+ __tcindex_destroy_fexts(f);
+ rtnl_unlock();
}
static void tcindex_destroy_fexts(struct rcu_head *head)
@@ -146,18 +189,18 @@ static void tcindex_destroy_fexts(struct rcu_head *head)
struct tcindex_filter *f = container_of(head, struct tcindex_filter,
rcu);
- tcf_exts_destroy(&f->result.exts);
- kfree(f);
+ INIT_WORK(&f->work, tcindex_destroy_fexts_work);
+ tcf_queue_work(&f->work);
}
-static int tcindex_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
+ struct tcindex_filter_result *r = arg;
struct tcindex_filter __rcu **walk;
struct tcindex_filter *f = NULL;
- pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p);
+ pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p);
if (p->perfect) {
if (!r->res.class)
return -ENOENT;
@@ -182,18 +225,24 @@ found:
* grace period, since converted-to-rcu actions are relying on that
* in cleanup() callback
*/
- if (f)
- call_rcu(&f->rcu, tcindex_destroy_fexts);
- else
- call_rcu(&r->rcu, tcindex_destroy_rexts);
+ if (f) {
+ if (tcf_exts_get_net(&f->result.exts))
+ call_rcu(&f->rcu, tcindex_destroy_fexts);
+ else
+ __tcindex_destroy_fexts(f);
+ } else {
+ if (tcf_exts_get_net(&r->exts))
+ call_rcu(&r->rcu, tcindex_destroy_rexts);
+ else
+ __tcindex_destroy_rexts(r);
+ }
*last = false;
return 0;
}
static int tcindex_destroy_element(struct tcf_proto *tp,
- unsigned long arg,
- struct tcf_walker *walker)
+ void *arg, struct tcf_walker *walker)
{
bool last;
@@ -419,9 +468,9 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
}
if (old_r)
- tcf_exts_change(tp, &r->exts, &e);
+ tcf_exts_change(&r->exts, &e);
else
- tcf_exts_change(tp, &cr.exts, &e);
+ tcf_exts_change(&cr.exts, &e);
if (old_r && old_r != r) {
err = tcindex_filter_result_init(old_r);
@@ -439,7 +488,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
struct tcindex_filter *nfp;
struct tcindex_filter __rcu **fp;
- tcf_exts_change(tp, &f->result.exts, &r->exts);
+ tcf_exts_change(&f->result.exts, &r->exts);
fp = cp->h + (handle % cp->hash);
for (nfp = rtnl_dereference(*fp);
@@ -471,17 +520,17 @@ errout:
static int
tcindex_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, unsigned long *arg, bool ovr)
+ struct nlattr **tca, void **arg, bool ovr)
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
+ struct tcindex_filter_result *r = *arg;
int err;
pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
- "p %p,r %p,*arg 0x%lx\n",
- tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L);
+ "p %p,r %p,*arg %p\n",
+ tp, handle, tca, arg, opt, p, r, arg ? *arg : NULL);
if (!opt)
return 0;
@@ -506,9 +555,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
if (!p->perfect[i].res.class)
continue;
if (walker->count >= walker->skip) {
- if (walker->fn(tp,
- (unsigned long) (p->perfect+i), walker)
- < 0) {
+ if (walker->fn(tp, p->perfect + i, walker) < 0) {
walker->stop = 1;
return;
}
@@ -522,8 +569,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
for (f = rtnl_dereference(p->h[i]); f; f = next) {
next = rtnl_dereference(f->next);
if (walker->count >= walker->skip) {
- if (walker->fn(tp, (unsigned long) &f->result,
- walker) < 0) {
+ if (walker->fn(tp, &f->result, walker) < 0) {
walker->stop = 1;
return;
}
@@ -548,14 +594,14 @@ static void tcindex_destroy(struct tcf_proto *tp)
}
-static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
+ struct tcindex_filter_result *r = fh;
struct nlattr *nest;
- pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p\n",
+ pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n",
tp, fh, skb, t, p, r);
pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
@@ -610,6 +656,14 @@ nla_put_failure:
return -1;
}
+static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+ struct tcindex_filter_result *r = fh;
+
+ if (r && r->res.classid == classid)
+ r->res.class = cl;
+}
+
static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
.kind = "tcindex",
.classify = tcindex_classify,
@@ -620,6 +674,7 @@ static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
.delete = tcindex_delete,
.walk = tcindex_walk,
.dump = tcindex_dump,
+ .bind_class = tcindex_bind_class,
.owner = THIS_MODULE,
};
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 2d01195153e6..ac152b4f4247 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -40,10 +40,13 @@
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/bitmap.h>
+#include <linux/netdevice.h>
+#include <linux/hash.h>
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
#include <linux/netdevice.h>
+#include <linux/idr.h>
struct tc_u_knode {
struct tc_u_knode __rcu *next;
@@ -66,7 +69,10 @@ struct tc_u_knode {
u32 __percpu *pcpu_success;
#endif
struct tcf_proto *tp;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
/* The 'sel' field MUST be the last field in structure to allow for
* tc_u32_keys allocated at end of structure.
*/
@@ -80,6 +86,7 @@ struct tc_u_hnode {
struct tc_u_common *tp_c;
int refcnt;
unsigned int divisor;
+ struct idr handle_idr;
struct rcu_head rcu;
/* The 'ht' field MUST be the last field in structure to allow for
* more entries allocated at end of structure.
@@ -89,9 +96,10 @@ struct tc_u_hnode {
struct tc_u_common {
struct tc_u_hnode __rcu *hlist;
- struct Qdisc *q;
+ struct tcf_block *block;
int refcnt;
- u32 hgenerator;
+ struct idr handle_idr;
+ struct hlist_node hnode;
struct rcu_head rcu;
};
@@ -289,7 +297,7 @@ out:
}
-static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
+static void *u32_get(struct tcf_proto *tp, u32 handle)
{
struct tc_u_hnode *ht;
struct tc_u_common *tp_c = tp->data;
@@ -300,43 +308,68 @@ static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
if (!ht)
- return 0;
+ return NULL;
if (TC_U32_KEY(handle) == 0)
- return (unsigned long)ht;
+ return ht;
- return (unsigned long)u32_lookup_key(ht, handle);
+ return u32_lookup_key(ht, handle);
}
-static u32 gen_new_htid(struct tc_u_common *tp_c)
+static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
{
- int i = 0x800;
+ unsigned long idr_index;
+ int err;
- /* hgenerator only used inside rtnl lock it is safe to increment
+ /* This is only used inside rtnl lock it is safe to increment
* without read _copy_ update semantics
*/
- do {
- if (++tp_c->hgenerator == 0x7FF)
- tp_c->hgenerator = 1;
- } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
+ err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index,
+ 1, 0x7FF, GFP_KERNEL);
+ if (err)
+ return 0;
+ return (u32)(idr_index | 0x800) << 20;
+}
+
+static struct hlist_head *tc_u_common_hash;
+
+#define U32_HASH_SHIFT 10
+#define U32_HASH_SIZE (1 << U32_HASH_SHIFT)
- return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
+static unsigned int tc_u_hash(const struct tcf_proto *tp)
+{
+ return hash_ptr(tp->chain->block, U32_HASH_SHIFT);
+}
+
+static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
+{
+ struct tc_u_common *tc;
+ unsigned int h;
+
+ h = tc_u_hash(tp);
+ hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
+ if (tc->block == tp->chain->block)
+ return tc;
+ }
+ return NULL;
}
static int u32_init(struct tcf_proto *tp)
{
struct tc_u_hnode *root_ht;
struct tc_u_common *tp_c;
+ unsigned int h;
- tp_c = tp->q->u32_node;
+ tp_c = tc_u_common_find(tp);
root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
if (root_ht == NULL)
return -ENOBUFS;
root_ht->refcnt++;
- root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
+ root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
root_ht->prio = tp->prio;
+ idr_init(&root_ht->handle_idr);
if (tp_c == NULL) {
tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
@@ -344,8 +377,12 @@ static int u32_init(struct tcf_proto *tp)
kfree(root_ht);
return -ENOBUFS;
}
- tp_c->q = tp->q;
- tp->q->u32_node = tp_c;
+ tp_c->block = tp->chain->block;
+ INIT_HLIST_NODE(&tp_c->hnode);
+ idr_init(&tp_c->handle_idr);
+
+ h = tc_u_hash(tp);
+ hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
}
tp_c->refcnt++;
@@ -362,6 +399,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
bool free_pf)
{
tcf_exts_destroy(&n->exts);
+ tcf_exts_put_net(&n->exts);
if (n->ht_down)
n->ht_down->refcnt--;
#ifdef CONFIG_CLS_U32_PERF
@@ -384,11 +422,21 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
* this the u32_delete_key_rcu variant does not free the percpu
* statistics.
*/
+static void u32_delete_key_work(struct work_struct *work)
+{
+ struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);
+
+ rtnl_lock();
+ u32_destroy_key(key->tp, key, false);
+ rtnl_unlock();
+}
+
static void u32_delete_key_rcu(struct rcu_head *rcu)
{
struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
- u32_destroy_key(key->tp, key, false);
+ INIT_WORK(&key->work, u32_delete_key_work);
+ tcf_queue_work(&key->work);
}
/* u32_delete_key_freepf_rcu is the rcu callback variant
@@ -398,11 +446,21 @@ static void u32_delete_key_rcu(struct rcu_head *rcu)
* for the variant that should be used with keys return from
* u32_init_knode()
*/
+static void u32_delete_key_freepf_work(struct work_struct *work)
+{
+ struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);
+
+ rtnl_lock();
+ u32_destroy_key(key->tp, key, true);
+ rtnl_unlock();
+}
+
static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
{
struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
- u32_destroy_key(key->tp, key, true);
+ INIT_WORK(&key->work, u32_delete_key_freepf_work);
+ tcf_queue_work(&key->work);
}
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
@@ -419,6 +477,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
RCU_INIT_POINTER(*kp, key->next);
tcf_unbind_filter(tp, &key->res);
+ tcf_exts_get_net(&key->exts);
call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
return 0;
}
@@ -428,111 +487,95 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
return 0;
}
-static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
+static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
{
- struct net_device *dev = tp->q->dev_queue->dev;
- struct tc_cls_u32_offload u32_offload = {0};
- struct tc_to_netdev offload;
-
- offload.type = TC_SETUP_CLSU32;
- offload.cls_u32 = &u32_offload;
-
- if (tc_should_offload(dev, tp, 0)) {
- offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
- offload.cls_u32->knode.handle = handle;
- dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
- tp->chain->index, tp->protocol,
- &offload);
- }
+ struct tcf_block *block = tp->chain->block;
+ struct tc_cls_u32_offload cls_u32 = {};
+
+ tc_cls_common_offload_init(&cls_u32.common, tp);
+ cls_u32.command = TC_CLSU32_DELETE_HNODE;
+ cls_u32.hnode.divisor = h->divisor;
+ cls_u32.hnode.handle = h->handle;
+ cls_u32.hnode.prio = h->prio;
+
+ tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
}
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
u32 flags)
{
- struct net_device *dev = tp->q->dev_queue->dev;
- struct tc_cls_u32_offload u32_offload = {0};
- struct tc_to_netdev offload;
+ struct tcf_block *block = tp->chain->block;
+ struct tc_cls_u32_offload cls_u32 = {};
+ bool skip_sw = tc_skip_sw(flags);
+ bool offloaded = false;
int err;
- if (!tc_should_offload(dev, tp, flags))
- return tc_skip_sw(flags) ? -EINVAL : 0;
-
- offload.type = TC_SETUP_CLSU32;
- offload.cls_u32 = &u32_offload;
-
- offload.cls_u32->command = TC_CLSU32_NEW_HNODE;
- offload.cls_u32->hnode.divisor = h->divisor;
- offload.cls_u32->hnode.handle = h->handle;
- offload.cls_u32->hnode.prio = h->prio;
+ tc_cls_common_offload_init(&cls_u32.common, tp);
+ cls_u32.command = TC_CLSU32_NEW_HNODE;
+ cls_u32.hnode.divisor = h->divisor;
+ cls_u32.hnode.handle = h->handle;
+ cls_u32.hnode.prio = h->prio;
- err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
- tp->chain->index, tp->protocol,
- &offload);
- if (tc_skip_sw(flags))
+ err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+ if (err < 0) {
+ u32_clear_hw_hnode(tp, h);
return err;
+ } else if (err > 0) {
+ offloaded = true;
+ }
+
+ if (skip_sw && !offloaded)
+ return -EINVAL;
return 0;
}
-static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
+static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
{
- struct net_device *dev = tp->q->dev_queue->dev;
- struct tc_cls_u32_offload u32_offload = {0};
- struct tc_to_netdev offload;
-
- offload.type = TC_SETUP_CLSU32;
- offload.cls_u32 = &u32_offload;
-
- if (tc_should_offload(dev, tp, 0)) {
- offload.cls_u32->command = TC_CLSU32_DELETE_HNODE;
- offload.cls_u32->hnode.divisor = h->divisor;
- offload.cls_u32->hnode.handle = h->handle;
- offload.cls_u32->hnode.prio = h->prio;
-
- dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
- tp->chain->index, tp->protocol,
- &offload);
- }
+ struct tcf_block *block = tp->chain->block;
+ struct tc_cls_u32_offload cls_u32 = {};
+
+ tc_cls_common_offload_init(&cls_u32.common, tp);
+ cls_u32.command = TC_CLSU32_DELETE_KNODE;
+ cls_u32.knode.handle = handle;
+
+ tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
}
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
u32 flags)
{
- struct net_device *dev = tp->q->dev_queue->dev;
- struct tc_cls_u32_offload u32_offload = {0};
- struct tc_to_netdev offload;
+ struct tcf_block *block = tp->chain->block;
+ struct tc_cls_u32_offload cls_u32 = {};
+ bool skip_sw = tc_skip_sw(flags);
int err;
- offload.type = TC_SETUP_CLSU32;
- offload.cls_u32 = &u32_offload;
-
- if (!tc_should_offload(dev, tp, flags))
- return tc_skip_sw(flags) ? -EINVAL : 0;
-
- offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE;
- offload.cls_u32->knode.handle = n->handle;
- offload.cls_u32->knode.fshift = n->fshift;
+ tc_cls_common_offload_init(&cls_u32.common, tp);
+ cls_u32.command = TC_CLSU32_REPLACE_KNODE;
+ cls_u32.knode.handle = n->handle;
+ cls_u32.knode.fshift = n->fshift;
#ifdef CONFIG_CLS_U32_MARK
- offload.cls_u32->knode.val = n->val;
- offload.cls_u32->knode.mask = n->mask;
+ cls_u32.knode.val = n->val;
+ cls_u32.knode.mask = n->mask;
#else
- offload.cls_u32->knode.val = 0;
- offload.cls_u32->knode.mask = 0;
+ cls_u32.knode.val = 0;
+ cls_u32.knode.mask = 0;
#endif
- offload.cls_u32->knode.sel = &n->sel;
- offload.cls_u32->knode.exts = &n->exts;
+ cls_u32.knode.sel = &n->sel;
+ cls_u32.knode.exts = &n->exts;
if (n->ht_down)
- offload.cls_u32->knode.link_handle = n->ht_down->handle;
-
- err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
- tp->chain->index, tp->protocol,
- &offload);
+ cls_u32.knode.link_handle = n->ht_down->handle;
- if (!err)
+ err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+ if (err < 0) {
+ u32_remove_hw_knode(tp, n->handle);
+ return err;
+ } else if (err > 0) {
n->flags |= TCA_CLS_FLAGS_IN_HW;
+ }
- if (tc_skip_sw(flags))
- return err;
+ if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
+ return -EINVAL;
return 0;
}
@@ -548,7 +591,11 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
rtnl_dereference(n->next));
tcf_unbind_filter(tp, &n->res);
u32_remove_hw_knode(tp, n->handle);
- call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
+ idr_remove_ext(&ht->handle_idr, n->handle);
+ if (tcf_exts_get_net(&n->exts))
+ call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
+ else
+ u32_destroy_key(n->tp, n, true);
}
}
}
@@ -569,6 +616,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
hn = &phn->next, phn = rtnl_dereference(*hn)) {
if (phn == ht) {
u32_clear_hw_hnode(tp, ht);
+ idr_destroy(&ht->handle_idr);
+ idr_remove_ext(&tp_c->handle_idr, ht->handle);
RCU_INIT_POINTER(*hn, ht->next);
kfree_rcu(ht, rcu);
return 0;
@@ -602,7 +651,7 @@ static void u32_destroy(struct tcf_proto *tp)
if (--tp_c->refcnt == 0) {
struct tc_u_hnode *ht;
- tp->q->u32_node = NULL;
+ hlist_del(&tp_c->hnode);
for (ht = rtnl_dereference(tp_c->hlist);
ht;
@@ -616,15 +665,16 @@ static void u32_destroy(struct tcf_proto *tp)
kfree_rcu(ht, rcu);
}
+ idr_destroy(&tp_c->handle_idr);
kfree(tp_c);
}
tp->data = NULL;
}
-static int u32_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int u32_delete(struct tcf_proto *tp, void *arg, bool *last)
{
- struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
+ struct tc_u_hnode *ht = arg;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
struct tc_u_common *tp_c = tp->data;
int ret = 0;
@@ -684,27 +734,21 @@ ret:
return ret;
}
-#define NR_U32_NODE (1<<12)
-static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
+static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
{
- struct tc_u_knode *n;
- unsigned long i;
- unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
- GFP_KERNEL);
- if (!bitmap)
- return handle | 0xFFF;
-
- for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]);
- n;
- n = rtnl_dereference(n->next))
- set_bit(TC_U32_NODE(n->handle), bitmap);
-
- i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
- if (i >= NR_U32_NODE)
- i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);
+ unsigned long idr_index;
+ u32 start = htid | 0x800;
+ u32 max = htid | 0xFFF;
+ u32 min = htid;
+
+ if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
+ start, max + 1, GFP_KERNEL)) {
+ if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
+ min + 1, max + 1, GFP_KERNEL))
+ return max;
+ }
- kfree(bitmap);
- return handle | (i >= NR_U32_NODE ? 0xFFF : i);
+ return (u32)idr_index;
}
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -723,29 +767,24 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
struct tc_u_knode *n, struct nlattr **tb,
struct nlattr *est, bool ovr)
{
- struct tcf_exts e;
int err;
- err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
+ err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
- if (err < 0)
- goto errout;
- err = -EINVAL;
if (tb[TCA_U32_LINK]) {
u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
struct tc_u_hnode *ht_down = NULL, *ht_old;
if (TC_U32_KEY(handle))
- goto errout;
+ return -EINVAL;
if (handle) {
ht_down = u32_lookup_ht(ht->tp_c, handle);
if (ht_down == NULL)
- goto errout;
+ return -EINVAL;
ht_down->refcnt++;
}
@@ -765,16 +804,11 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
int ret;
ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
if (ret < 0)
- goto errout;
+ return -EINVAL;
n->ifindex = ret;
}
#endif
- tcf_exts_change(tp, &n->exts, &e);
-
return 0;
-errout:
- tcf_exts_destroy(&e);
- return err;
}
static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
@@ -799,6 +833,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
if (pins->handle == n->handle)
break;
+ idr_replace_ext(&ht->handle_idr, n, n->handle);
RCU_INIT_POINTER(n->next, pins->next);
rcu_assign_pointer(*ins, n);
}
@@ -858,7 +893,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, unsigned long *arg, bool ovr)
+ struct nlattr **tca, void **arg, bool ovr)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *ht;
@@ -885,7 +920,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
- n = (struct tc_u_knode *)*arg;
+ n = *arg;
if (n) {
struct tc_u_knode *new;
@@ -919,6 +954,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
u32_replace_knode(tp, tp_c, new);
tcf_unbind_filter(tp, &n->res);
+ tcf_exts_get_net(&n->exts);
call_rcu(&n->rcu, u32_delete_key_rcu);
return 0;
}
@@ -930,29 +966,40 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
if (TC_U32_KEY(handle))
return -EINVAL;
- if (handle == 0) {
- handle = gen_new_htid(tp->data);
- if (handle == 0)
- return -ENOMEM;
- }
ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
if (ht == NULL)
return -ENOBUFS;
+ if (handle == 0) {
+ handle = gen_new_htid(tp->data, ht);
+ if (handle == 0) {
+ kfree(ht);
+ return -ENOMEM;
+ }
+ } else {
+ err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL,
+ handle, handle + 1, GFP_KERNEL);
+ if (err) {
+ kfree(ht);
+ return err;
+ }
+ }
ht->tp_c = tp_c;
ht->refcnt = 1;
ht->divisor = divisor;
ht->handle = handle;
ht->prio = tp->prio;
+ idr_init(&ht->handle_idr);
err = u32_replace_hw_hnode(tp, ht, flags);
if (err) {
+ idr_remove_ext(&tp_c->handle_idr, handle);
kfree(ht);
return err;
}
RCU_INIT_POINTER(ht->next, tp_c->hlist);
rcu_assign_pointer(tp_c->hlist, ht);
- *arg = (unsigned long)ht;
+ *arg = ht;
return 0;
}
@@ -979,24 +1026,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
return -EINVAL;
handle = htid | TC_U32_NODE(handle);
+ err = idr_alloc_ext(&ht->handle_idr, NULL, NULL,
+ handle, handle + 1,
+ GFP_KERNEL);
+ if (err)
+ return err;
} else
handle = gen_new_kid(ht, htid);
- if (tb[TCA_U32_SEL] == NULL)
- return -EINVAL;
+ if (tb[TCA_U32_SEL] == NULL) {
+ err = -EINVAL;
+ goto erridr;
+ }
s = nla_data(tb[TCA_U32_SEL]);
n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
- if (n == NULL)
- return -ENOBUFS;
+ if (n == NULL) {
+ err = -ENOBUFS;
+ goto erridr;
+ }
#ifdef CONFIG_CLS_U32_PERF
size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
if (!n->pf) {
- kfree(n);
- return -ENOBUFS;
+ err = -ENOBUFS;
+ goto errfree;
}
#endif
@@ -1047,7 +1103,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
RCU_INIT_POINTER(n->next, pins);
rcu_assign_pointer(*ins, n);
- *arg = (unsigned long)n;
+ *arg = n;
return 0;
}
@@ -1059,9 +1115,12 @@ errhw:
errout:
tcf_exts_destroy(&n->exts);
#ifdef CONFIG_CLS_U32_PERF
+errfree:
free_percpu(n->pf);
#endif
kfree(n);
+erridr:
+ idr_remove_ext(&ht->handle_idr, handle);
return err;
}
@@ -1081,7 +1140,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
if (ht->prio != tp->prio)
continue;
if (arg->count >= arg->skip) {
- if (arg->fn(tp, (unsigned long)ht, arg) < 0) {
+ if (arg->fn(tp, ht, arg) < 0) {
arg->stop = 1;
return;
}
@@ -1095,7 +1154,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
arg->count++;
continue;
}
- if (arg->fn(tp, (unsigned long)n, arg) < 0) {
+ if (arg->fn(tp, n, arg) < 0) {
arg->stop = 1;
return;
}
@@ -1105,10 +1164,18 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
}
-static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+ struct tc_u_knode *n = fh;
+
+ if (n && n->res.classid == classid)
+ n->res.class = cl;
+}
+
+static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct tc_u_knode *n = (struct tc_u_knode *)fh;
+ struct tc_u_knode *n = fh;
struct tc_u_hnode *ht_up, *ht_down;
struct nlattr *nest;
@@ -1122,7 +1189,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
goto nla_put_failure;
if (TC_U32_KEY(n->handle) == 0) {
- struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
+ struct tc_u_hnode *ht = fh;
u32 divisor = ht->divisor + 1;
if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
@@ -1235,11 +1302,14 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = {
.delete = u32_delete,
.walk = u32_walk,
.dump = u32_dump,
+ .bind_class = u32_bind_class,
.owner = THIS_MODULE,
};
static int __init init_u32(void)
{
+ int i, ret;
+
pr_info("u32 classifier\n");
#ifdef CONFIG_CLS_U32_PERF
pr_info(" Performance counters on\n");
@@ -1250,12 +1320,25 @@ static int __init init_u32(void)
#ifdef CONFIG_NET_CLS_ACT
pr_info(" Actions configured\n");
#endif
- return register_tcf_proto_ops(&cls_u32_ops);
+ tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!tc_u_common_hash)
+ return -ENOMEM;
+
+ for (i = 0; i < U32_HASH_SIZE; i++)
+ INIT_HLIST_HEAD(&tc_u_common_hash[i]);
+
+ ret = register_tcf_proto_ops(&cls_u32_ops);
+ if (ret)
+ kvfree(tc_u_common_hash);
+ return ret;
}
static void __exit exit_u32(void)
{
unregister_tcf_proto_ops(&cls_u32_ops);
+ kvfree(tc_u_common_hash);
}
module_init(init_u32)
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 03b677bc0700..1331a4c2d8ff 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -178,7 +178,7 @@ static int tcf_em_validate(struct tcf_proto *tp,
struct tcf_ematch_hdr *em_hdr = nla_data(nla);
int data_len = nla_len(nla) - sizeof(*em_hdr);
void *data = (void *) em_hdr + sizeof(*em_hdr);
- struct net *net = dev_net(qdisc_dev(tp->q));
+ struct net *net = tp->chain->block->net;
if (!TCF_EM_REL_VALID(em_hdr->flags))
goto errout;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a3fa144b8648..b6c4f536876b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -35,13 +35,7 @@
#include <net/sock.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
-
-static int qdisc_notify(struct net *net, struct sk_buff *oskb,
- struct nlmsghdr *n, u32 clid,
- struct Qdisc *old, struct Qdisc *new);
-static int tclass_notify(struct net *net, struct sk_buff *oskb,
- struct nlmsghdr *n, struct Qdisc *q,
- unsigned long cl, int event);
+#include <net/pkt_cls.h>
/*
@@ -160,7 +154,7 @@ int register_qdisc(struct Qdisc_ops *qops)
if (qops->cl_ops) {
const struct Qdisc_class_ops *cops = qops->cl_ops;
- if (!(cops->get && cops->put && cops->walk && cops->leaf))
+ if (!(cops->find && cops->walk && cops->leaf))
goto out_einval;
if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
@@ -307,6 +301,8 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{
struct Qdisc *q;
+ if (!handle)
+ return NULL;
q = qdisc_match_from_root(dev->qdisc, handle);
if (q)
goto out;
@@ -327,12 +323,11 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
if (cops == NULL)
return NULL;
- cl = cops->get(p, classid);
+ cl = cops->find(p, classid);
if (cl == 0)
return NULL;
leaf = cops->leaf(p, cl);
- cops->put(p, cl);
return leaf;
}
@@ -621,14 +616,10 @@ EXPORT_SYMBOL(qdisc_watchdog_cancel);
static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
{
- unsigned int size = n * sizeof(struct hlist_head), i;
struct hlist_head *h;
+ unsigned int i;
- if (size <= PAGE_SIZE)
- h = kmalloc(size, GFP_KERNEL);
- else
- h = (struct hlist_head *)
- __get_free_pages(GFP_KERNEL, get_order(size));
+ h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
if (h != NULL) {
for (i = 0; i < n; i++)
@@ -637,16 +628,6 @@ static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
return h;
}
-static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
-{
- unsigned int size = n * sizeof(struct hlist_head);
-
- if (size <= PAGE_SIZE)
- kfree(h);
- else
- free_pages((unsigned long)h, get_order(size));
-}
-
void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
{
struct Qdisc_class_common *cl;
@@ -679,7 +660,7 @@ void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
clhash->hashmask = nmask;
sch_tree_unlock(sch);
- qdisc_class_hash_free(ohash, osize);
+ kvfree(ohash);
}
EXPORT_SYMBOL(qdisc_class_hash_grow);
@@ -699,7 +680,7 @@ EXPORT_SYMBOL(qdisc_class_hash_init);
void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
{
- qdisc_class_hash_free(clhash->hash, clhash->hashsize);
+ kvfree(clhash->hash);
}
EXPORT_SYMBOL(qdisc_class_hash_destroy);
@@ -749,6 +730,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
+ bool notify;
int drops;
if (n == 0 && len == 0)
@@ -761,6 +743,13 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
if (sch->flags & TCQ_F_NOPARENT)
break;
+ /* Notify parent qdisc only if child qdisc becomes empty.
+ *
+ * If child was empty even before update then backlog
+ * counter is screwed and we skip notification because
+ * parent class is already passive.
+ */
+ notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
/* TODO: perform the search on a per txq basis */
sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
@@ -768,10 +757,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
break;
}
cops = sch->ops->cl_ops;
- if (cops->qlen_notify) {
- cl = cops->get(sch, parentid);
+ if (notify && cops->qlen_notify) {
+ cl = cops->find(sch, parentid);
cops->qlen_notify(sch, cl);
- cops->put(sch, cl);
}
sch->q.qlen -= n;
sch->qstats.backlog -= len;
@@ -781,6 +769,111 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
}
EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
+static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
+ u32 portid, u32 seq, u16 flags, int event)
+{
+ struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
+ struct gnet_stats_queue __percpu *cpu_qstats = NULL;
+ struct tcmsg *tcm;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb_tail_pointer(skb);
+ struct gnet_dump d;
+ struct qdisc_size_table *stab;
+ __u32 qlen;
+
+ cond_resched();
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+ if (!nlh)
+ goto out_nlmsg_trim;
+ tcm = nlmsg_data(nlh);
+ tcm->tcm_family = AF_UNSPEC;
+ tcm->tcm__pad1 = 0;
+ tcm->tcm__pad2 = 0;
+ tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+ tcm->tcm_parent = clid;
+ tcm->tcm_handle = q->handle;
+ tcm->tcm_info = refcount_read(&q->refcnt);
+ if (nla_put_string(skb, TCA_KIND, q->ops->id))
+ goto nla_put_failure;
+ if (q->ops->dump && q->ops->dump(q, skb) < 0)
+ goto nla_put_failure;
+ qlen = q->q.qlen;
+
+ stab = rtnl_dereference(q->stab);
+ if (stab && qdisc_dump_stab(skb, stab) < 0)
+ goto nla_put_failure;
+
+ if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
+ NULL, &d, TCA_PAD) < 0)
+ goto nla_put_failure;
+
+ if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
+ goto nla_put_failure;
+
+ if (qdisc_is_percpu_stats(q)) {
+ cpu_bstats = q->cpu_bstats;
+ cpu_qstats = q->cpu_qstats;
+ }
+
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
+ &d, cpu_bstats, &q->bstats) < 0 ||
+ gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
+ gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
+ goto nla_put_failure;
+
+ if (gnet_stats_finish_copy(&d) < 0)
+ goto nla_put_failure;
+
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ return skb->len;
+
+out_nlmsg_trim:
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
+{
+ if (q->flags & TCQ_F_BUILTIN)
+ return true;
+ if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
+ return true;
+
+ return false;
+}
+
+static int qdisc_notify(struct net *net, struct sk_buff *oskb,
+ struct nlmsghdr *n, u32 clid,
+ struct Qdisc *old, struct Qdisc *new)
+{
+ struct sk_buff *skb;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (old && !tc_qdisc_dump_ignore(old, false)) {
+ if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
+ 0, RTM_DELQDISC) < 0)
+ goto err_out;
+ }
+ if (new && !tc_qdisc_dump_ignore(new, false)) {
+ if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
+ old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+ goto err_out;
+ }
+
+ if (skb->len)
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+
+err_out:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
static void notify_and_destroy(struct net *net, struct sk_buff *skb,
struct nlmsghdr *n, u32 clid,
struct Qdisc *old, struct Qdisc *new)
@@ -836,7 +929,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
old = dev_graft_qdisc(dev_queue, new);
if (new && i > 0)
- refcount_inc(&new->refcnt);
+ qdisc_refcount_inc(new);
if (!ingress)
qdisc_destroy(old);
@@ -847,7 +940,7 @@ skip:
notify_and_destroy(net, skb, n, classid,
dev->qdisc, new);
if (new && !new->ops->attach)
- refcount_inc(&new->refcnt);
+ qdisc_refcount_inc(new);
dev->qdisc = new ? : &noop_qdisc;
if (new && new->ops->attach)
@@ -863,11 +956,11 @@ skip:
err = -EOPNOTSUPP;
if (cops && cops->graft) {
- unsigned long cl = cops->get(parent, classid);
- if (cl) {
+ unsigned long cl = cops->find(parent, classid);
+
+ if (cl)
err = cops->graft(parent, cl, new, &old);
- cops->put(parent, cl);
- } else
+ else
err = -ENOENT;
}
if (!err)
@@ -1256,7 +1349,7 @@ replay:
if (q == p ||
(p && check_loop(q, p, 0)))
return -ELOOP;
- refcount_inc(&q->refcnt);
+ qdisc_refcount_inc(q);
goto graft;
} else {
if (!q)
@@ -1348,111 +1441,6 @@ graft:
return 0;
}
-static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
- u32 portid, u32 seq, u16 flags, int event)
-{
- struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
- struct gnet_stats_queue __percpu *cpu_qstats = NULL;
- struct tcmsg *tcm;
- struct nlmsghdr *nlh;
- unsigned char *b = skb_tail_pointer(skb);
- struct gnet_dump d;
- struct qdisc_size_table *stab;
- __u32 qlen;
-
- cond_resched();
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
- if (!nlh)
- goto out_nlmsg_trim;
- tcm = nlmsg_data(nlh);
- tcm->tcm_family = AF_UNSPEC;
- tcm->tcm__pad1 = 0;
- tcm->tcm__pad2 = 0;
- tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
- tcm->tcm_parent = clid;
- tcm->tcm_handle = q->handle;
- tcm->tcm_info = refcount_read(&q->refcnt);
- if (nla_put_string(skb, TCA_KIND, q->ops->id))
- goto nla_put_failure;
- if (q->ops->dump && q->ops->dump(q, skb) < 0)
- goto nla_put_failure;
- qlen = q->q.qlen;
-
- stab = rtnl_dereference(q->stab);
- if (stab && qdisc_dump_stab(skb, stab) < 0)
- goto nla_put_failure;
-
- if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
- NULL, &d, TCA_PAD) < 0)
- goto nla_put_failure;
-
- if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
- goto nla_put_failure;
-
- if (qdisc_is_percpu_stats(q)) {
- cpu_bstats = q->cpu_bstats;
- cpu_qstats = q->cpu_qstats;
- }
-
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
- &d, cpu_bstats, &q->bstats) < 0 ||
- gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
- gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
- goto nla_put_failure;
-
- if (gnet_stats_finish_copy(&d) < 0)
- goto nla_put_failure;
-
- nlh->nlmsg_len = skb_tail_pointer(skb) - b;
- return skb->len;
-
-out_nlmsg_trim:
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
-{
- if (q->flags & TCQ_F_BUILTIN)
- return true;
- if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
- return true;
-
- return false;
-}
-
-static int qdisc_notify(struct net *net, struct sk_buff *oskb,
- struct nlmsghdr *n, u32 clid,
- struct Qdisc *old, struct Qdisc *new)
-{
- struct sk_buff *skb;
- u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
-
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb)
- return -ENOBUFS;
-
- if (old && !tc_qdisc_dump_ignore(old, false)) {
- if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
- 0, RTM_DELQDISC) < 0)
- goto err_out;
- }
- if (new && !tc_qdisc_dump_ignore(new, false)) {
- if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
- old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
- goto err_out;
- }
-
- if (skb->len)
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
-
-err_out:
- kfree_skb(skb);
- return -EINVAL;
-}
-
static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
struct netlink_callback *cb,
int *q_idx_p, int s_q_idx, bool recur,
@@ -1514,7 +1502,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
int s_idx, s_q_idx;
struct net_device *dev;
const struct nlmsghdr *nlh = cb->nlh;
- struct tcmsg *tcm = nlmsg_data(nlh);
struct nlattr *tca[TCA_MAX + 1];
int err;
@@ -1524,7 +1511,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
idx = 0;
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+ err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
if (err < 0)
return err;
@@ -1565,7 +1552,163 @@ done:
* Traffic classes manipulation. *
************************************************/
+static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
+ unsigned long cl,
+ u32 portid, u32 seq, u16 flags, int event)
+{
+ struct tcmsg *tcm;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb_tail_pointer(skb);
+ struct gnet_dump d;
+ const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
+
+ cond_resched();
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+ if (!nlh)
+ goto out_nlmsg_trim;
+ tcm = nlmsg_data(nlh);
+ tcm->tcm_family = AF_UNSPEC;
+ tcm->tcm__pad1 = 0;
+ tcm->tcm__pad2 = 0;
+ tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+ tcm->tcm_parent = q->handle;
+ tcm->tcm_handle = q->handle;
+ tcm->tcm_info = 0;
+ if (nla_put_string(skb, TCA_KIND, q->ops->id))
+ goto nla_put_failure;
+ if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
+ goto nla_put_failure;
+
+ if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
+ NULL, &d, TCA_PAD) < 0)
+ goto nla_put_failure;
+
+ if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
+ goto nla_put_failure;
+
+ if (gnet_stats_finish_copy(&d) < 0)
+ goto nla_put_failure;
+
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ return skb->len;
+
+out_nlmsg_trim:
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static int tclass_notify(struct net *net, struct sk_buff *oskb,
+ struct nlmsghdr *n, struct Qdisc *q,
+ unsigned long cl, int event)
+{
+ struct sk_buff *skb;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+}
+
+static int tclass_del_notify(struct net *net,
+ const struct Qdisc_class_ops *cops,
+ struct sk_buff *oskb, struct nlmsghdr *n,
+ struct Qdisc *q, unsigned long cl)
+{
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ struct sk_buff *skb;
+ int err = 0;
+
+ if (!cops->delete)
+ return -EOPNOTSUPP;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+ if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
+ RTM_DELTCLASS) < 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ err = cops->delete(q, cl);
+ if (err) {
+ kfree_skb(skb);
+ return err;
+ }
+
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+}
+
+#ifdef CONFIG_NET_CLS
+
+struct tcf_bind_args {
+ struct tcf_walker w;
+ u32 classid;
+ unsigned long cl;
+};
+
+static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
+{
+ struct tcf_bind_args *a = (void *)arg;
+
+ if (tp->ops->bind_class) {
+ struct Qdisc *q = tcf_block_q(tp->chain->block);
+
+ sch_tree_lock(q);
+ tp->ops->bind_class(n, a->classid, a->cl);
+ sch_tree_unlock(q);
+ }
+ return 0;
+}
+
+static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
+ unsigned long new_cl)
+{
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+ struct tcf_block *block;
+ struct tcf_chain *chain;
+ unsigned long cl;
+
+ cl = cops->find(q, portid);
+ if (!cl)
+ return;
+ block = cops->tcf_block(q, cl);
+ if (!block)
+ return;
+ list_for_each_entry(chain, &block->chain_list, list) {
+ struct tcf_proto *tp;
+
+ for (tp = rtnl_dereference(chain->filter_chain);
+ tp; tp = rtnl_dereference(tp->next)) {
+ struct tcf_bind_args arg = {};
+
+ arg.w.fn = tcf_node_bind;
+ arg.classid = clid;
+ arg.cl = new_cl;
+ tp->ops->walk(tp, &arg.w);
+ }
+ }
+}
+
+#else
+
+static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
+ unsigned long new_cl)
+{
+}
+
+#endif
static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
@@ -1656,7 +1799,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
clid = TC_H_MAKE(qid, clid);
if (clid)
- cl = cops->get(q, clid);
+ cl = cops->find(q, clid);
if (cl == 0) {
err = -ENOENT;
@@ -1671,12 +1814,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
goto out;
break;
case RTM_DELTCLASS:
- err = -EOPNOTSUPP;
- if (cops->delete)
- err = cops->delete(q, cl);
- if (err == 0)
- tclass_notify(net, skb, n, q, cl,
- RTM_DELTCLASS);
+ err = tclass_del_notify(net, cops, skb, n, q, cl);
+ /* Unbind the class with flilters with 0 */
+ tc_bind_tclass(q, portid, clid, 0);
goto out;
case RTM_GETTCLASS:
err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
@@ -1691,83 +1831,16 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
err = -EOPNOTSUPP;
if (cops->change)
err = cops->change(q, clid, portid, tca, &new_cl);
- if (err == 0)
+ if (err == 0) {
tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
-
+ /* We just create a new class, need to do reverse binding. */
+ if (cl != new_cl)
+ tc_bind_tclass(q, portid, clid, new_cl);
+ }
out:
- if (cl)
- cops->put(q, cl);
-
return err;
}
-
-static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
- unsigned long cl,
- u32 portid, u32 seq, u16 flags, int event)
-{
- struct tcmsg *tcm;
- struct nlmsghdr *nlh;
- unsigned char *b = skb_tail_pointer(skb);
- struct gnet_dump d;
- const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
-
- cond_resched();
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
- if (!nlh)
- goto out_nlmsg_trim;
- tcm = nlmsg_data(nlh);
- tcm->tcm_family = AF_UNSPEC;
- tcm->tcm__pad1 = 0;
- tcm->tcm__pad2 = 0;
- tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
- tcm->tcm_parent = q->handle;
- tcm->tcm_handle = q->handle;
- tcm->tcm_info = 0;
- if (nla_put_string(skb, TCA_KIND, q->ops->id))
- goto nla_put_failure;
- if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
- goto nla_put_failure;
-
- if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
- NULL, &d, TCA_PAD) < 0)
- goto nla_put_failure;
-
- if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
- goto nla_put_failure;
-
- if (gnet_stats_finish_copy(&d) < 0)
- goto nla_put_failure;
-
- nlh->nlmsg_len = skb_tail_pointer(skb) - b;
- return skb->len;
-
-out_nlmsg_trim:
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static int tclass_notify(struct net *net, struct sk_buff *oskb,
- struct nlmsghdr *n, struct Qdisc *q,
- unsigned long cl, int event)
-{
- struct sk_buff *skb;
- u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
-
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb)
- return -ENOBUFS;
-
- if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
- kfree_skb(skb);
- return -EINVAL;
- }
-
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
-}
-
struct qdisc_dump_args {
struct qdisc_walker w;
struct sk_buff *skb;
@@ -1949,14 +2022,14 @@ static int __init pktsched_init(void)
register_qdisc(&mq_qdisc_ops);
register_qdisc(&noqueue_qdisc_ops);
- rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
- NULL);
- rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
+ 0);
+ rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
- NULL);
+ 0);
return 0;
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index c403c87aff7a..2dbd249c0b2f 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -41,6 +41,7 @@
#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
struct atm_flow_data {
+ struct Qdisc_class_common common;
struct Qdisc *q; /* FIFO, TBF, etc. */
struct tcf_proto __rcu *filter_list;
struct tcf_block *block;
@@ -49,7 +50,6 @@ struct atm_flow_data {
struct sk_buff *skb); /* chaining */
struct atm_qdisc_data *parent; /* parent qdisc */
struct socket *sock; /* for closing */
- u32 classid; /* x:y type ID */
int ref; /* reference count */
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
@@ -75,7 +75,7 @@ static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
struct atm_flow_data *flow;
list_for_each_entry(flow, &p->flows, list) {
- if (flow->classid == classid)
+ if (flow->common.classid == classid)
return flow;
}
return NULL;
@@ -108,23 +108,29 @@ static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl)
return flow ? flow->q : NULL;
}
-static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid)
+static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid)
{
struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
struct atm_flow_data *flow;
- pr_debug("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid);
+ pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
flow = lookup_flow(sch, classid);
- if (flow)
- flow->ref++;
- pr_debug("atm_tc_get: flow %p\n", flow);
+ pr_debug("%s: flow %p\n", __func__, flow);
return (unsigned long)flow;
}
static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
unsigned long parent, u32 classid)
{
- return atm_tc_get(sch, classid);
+ struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
+ struct atm_flow_data *flow;
+
+ pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
+ flow = lookup_flow(sch, classid);
+ if (flow)
+ flow->ref++;
+ pr_debug("%s: flow %p\n", __func__, flow);
+ return (unsigned long)flow;
}
/*
@@ -234,7 +240,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
excess = NULL;
else {
excess = (struct atm_flow_data *)
- atm_tc_get(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
+ atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
if (!excess)
return -ENOENT;
}
@@ -262,10 +268,9 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
for (i = 1; i < 0x8000; i++) {
classid = TC_H_MAKE(sch->handle, 0x8000 | i);
- cl = atm_tc_get(sch, classid);
+ cl = atm_tc_find(sch, classid);
if (!cl)
break;
- atm_tc_put(sch, cl);
}
}
pr_debug("atm_tc_change: new id %x\n", classid);
@@ -276,7 +281,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
goto err_out;
}
- error = tcf_block_get(&flow->block, &flow->filter_list);
+ error = tcf_block_get(&flow->block, &flow->filter_list, sch);
if (error) {
kfree(flow);
goto err_out;
@@ -293,7 +298,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
flow->old_pop = flow->vcc->pop;
flow->parent = p;
flow->vcc->pop = sch_atm_pop;
- flow->classid = classid;
+ flow->common.classid = classid;
flow->ref = 1;
flow->excess = excess;
list_add(&flow->list, &p->link.list);
@@ -305,8 +310,6 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
*arg = (unsigned long)flow;
return 0;
err_out:
- if (excess)
- atm_tc_put(sch, (unsigned long)excess);
sockfd_put(sock);
return error;
}
@@ -377,7 +380,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
result = TC_ACT_OK; /* be nice to gcc */
flow = NULL;
if (TC_H_MAJ(skb->priority) != sch->handle ||
- !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) {
+ !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) {
struct tcf_proto *fl;
list_for_each_entry(flow, &p->flows, list) {
@@ -543,13 +546,13 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
p->link.q = &noop_qdisc;
pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
- err = tcf_block_get(&p->link.block, &p->link.filter_list);
+ err = tcf_block_get(&p->link.block, &p->link.filter_list, sch);
if (err)
return err;
p->link.vcc = NULL;
p->link.sock = NULL;
- p->link.classid = sch->handle;
+ p->link.common.classid = sch->handle;
p->link.ref = 1;
tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch);
return 0;
@@ -596,7 +599,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
sch, p, flow, skb, tcm);
if (list_empty(&flow->list))
return -EINVAL;
- tcm->tcm_handle = flow->classid;
+ tcm->tcm_handle = flow->common.classid;
tcm->tcm_info = flow->q->handle;
nest = nla_nest_start(skb, TCA_OPTIONS);
@@ -621,7 +624,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
goto nla_put_failure;
}
if (flow->excess) {
- if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->classid))
+ if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid))
goto nla_put_failure;
} else {
if (nla_put_u32(skb, TCA_ATM_EXCESS, 0))
@@ -655,8 +658,7 @@ static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
static const struct Qdisc_class_ops atm_class_ops = {
.graft = atm_tc_graft,
.leaf = atm_tc_leaf,
- .get = atm_tc_get,
- .put = atm_tc_put,
+ .find = atm_tc_find,
.change = atm_tc_change,
.delete = atm_tc_delete,
.walk = atm_tc_walk,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 780db43300b1..6361be7881f1 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -129,7 +129,6 @@ struct cbq_class {
struct tcf_proto __rcu *filter_list;
struct tcf_block *block;
- int refcnt;
int filters;
struct cbq_class *defaults[TC_PRIO_MAX + 1];
@@ -256,6 +255,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
case TC_ACT_RECLASSIFY:
@@ -1139,6 +1139,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
struct tc_ratespec *r;
int err;
+ qdisc_watchdog_init(&q->watchdog, sch);
+ hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+ q->delay_timer.function = cbq_undelay;
+
+ if (!opt)
+ return -EINVAL;
+
err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL);
if (err < 0)
return err;
@@ -1155,7 +1162,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
if (err < 0)
goto put_rtab;
- q->link.refcnt = 1;
q->link.sibling = &q->link;
q->link.common.classid = sch->handle;
q->link.qdisc = sch;
@@ -1177,9 +1183,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
q->link.avpkt = q->link.allot/2;
q->link.minidle = -0x7FFFFFFF;
- qdisc_watchdog_init(&q->watchdog, sch);
- hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
- q->delay_timer.function = cbq_undelay;
q->toplevel = TC_CBQ_MAXLEVEL;
q->now = psched_get_time();
@@ -1385,20 +1388,14 @@ static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
{
struct cbq_class *cl = (struct cbq_class *)arg;
- if (cl->q->q.qlen == 0)
- cbq_deactivate_class(cl);
+ cbq_deactivate_class(cl);
}
-static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
+static unsigned long cbq_find(struct Qdisc *sch, u32 classid)
{
struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = cbq_class_lookup(q, classid);
- if (cl) {
- cl->refcnt++;
- return (unsigned long)cl;
- }
- return 0;
+ return (unsigned long)cbq_class_lookup(q, classid);
}
static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
@@ -1444,25 +1441,6 @@ static void cbq_destroy(struct Qdisc *sch)
qdisc_class_hash_destroy(&q->clhash);
}
-static void cbq_put(struct Qdisc *sch, unsigned long arg)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- if (--cl->refcnt == 0) {
-#ifdef CONFIG_NET_CLS_ACT
- spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
- struct cbq_sched_data *q = qdisc_priv(sch);
-
- spin_lock_bh(root_lock);
- if (q->rx_class == cl)
- q->rx_class = NULL;
- spin_unlock_bh(root_lock);
-#endif
-
- cbq_destroy_class(sch, cl);
- }
-}
-
static int
cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
unsigned long *arg)
@@ -1589,7 +1567,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (cl == NULL)
goto failure;
- err = tcf_block_get(&cl->block, &cl->filter_list);
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch);
if (err) {
kfree(cl);
return err;
@@ -1609,7 +1587,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
cl->R_tab = rtab;
rtab = NULL;
- cl->refcnt = 1;
cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
if (!cl->q)
cl->q = &noop_qdisc;
@@ -1690,12 +1667,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
cbq_rmprio(q, cl);
sch_tree_unlock(sch);
- BUG_ON(--cl->refcnt == 0);
- /*
- * This shouldn't happen: we "hold" one cops->get() when called
- * from tc_ctl_tclass; the destroy method is done from cops->put().
- */
-
+ cbq_destroy_class(sch, cl);
return 0;
}
@@ -1761,8 +1733,7 @@ static const struct Qdisc_class_ops cbq_class_ops = {
.graft = cbq_graft,
.leaf = cbq_leaf,
.qlen_notify = cbq_qlen_notify,
- .get = cbq_get,
- .put = cbq_put,
+ .find = cbq_find,
.change = cbq_change_class,
.delete = cbq_delete,
.walk = cbq_walk,
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
new file mode 100644
index 000000000000..7a72980c1509
--- /dev/null
+++ b/net/sched/sch_cbs.c
@@ -0,0 +1,373 @@
+/*
+ * net/sched/sch_cbs.c Credit Based Shaper
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+ *
+ */
+
+/* Credit Based Shaper (CBS)
+ * =========================
+ *
+ * This is a simple rate-limiting shaper aimed at TSN applications on
+ * systems with known traffic workloads.
+ *
+ * Its algorithm is defined by the IEEE 802.1Q-2014 Specification,
+ * Section 8.6.8.2, and explained in more detail in the Annex L of the
+ * same specification.
+ *
+ * There are four tunables to be considered:
+ *
+ * 'idleslope': Idleslope is the rate of credits that is
+ * accumulated (in kilobits per second) when there is at least
+ * one packet waiting for transmission. Packets are transmitted
+ * when the current value of credits is equal or greater than
+ * zero. When there is no packet to be transmitted the amount of
+ * credits is set to zero. This is the main tunable of the CBS
+ * algorithm.
+ *
+ * 'sendslope':
+ * Sendslope is the rate of credits that is depleted (it should be a
+ * negative number of kilobits per second) when a transmission is
+ * ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section
+ * 8.6.8.2 item g):
+ *
+ * sendslope = idleslope - port_transmit_rate
+ *
+ * 'hicredit': Hicredit defines the maximum amount of credits (in
+ * bytes) that can be accumulated. Hicredit depends on the
+ * characteristics of interfering traffic,
+ * 'max_interference_size' is the maximum size of any burst of
+ * traffic that can delay the transmission of a frame that is
+ * available for transmission for this traffic class, (IEEE
+ * 802.1Q-2014 Annex L, Equation L-3):
+ *
+ * hicredit = max_interference_size * (idleslope / port_transmit_rate)
+ *
+ * 'locredit': Locredit is the minimum amount of credits that can
+ * be reached. It is a function of the traffic flowing through
+ * this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2):
+ *
+ * locredit = max_frame_size * (sendslope / port_transmit_rate)
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
+
+#define BYTES_PER_KBIT (1000LL / 8)
+
+struct cbs_sched_data {
+ bool offload;
+ int queue;
+ s64 port_rate; /* in bytes/s */
+ s64 last; /* timestamp in ns */
+ s64 credits; /* in bytes */
+ s32 locredit; /* in bytes */
+ s32 hicredit; /* in bytes */
+ s64 sendslope; /* in bytes/s */
+ s64 idleslope; /* in bytes/s */
+ struct qdisc_watchdog watchdog;
+ int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch);
+ struct sk_buff *(*dequeue)(struct Qdisc *sch);
+};
+
+static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch)
+{
+ return qdisc_enqueue_tail(skb, sch);
+}
+
+static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+
+ if (sch->q.qlen == 0 && q->credits > 0) {
+ /* We need to stop accumulating credits when there's
+ * no enqueued packets and q->credits is positive.
+ */
+ q->credits = 0;
+ q->last = ktime_get_ns();
+ }
+
+ return qdisc_enqueue_tail(skb, sch);
+}
+
+static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+
+ return q->enqueue(skb, sch);
+}
+
+/* timediff is in ns, slope is in bytes/s */
+static s64 timediff_to_credits(s64 timediff, s64 slope)
+{
+ return div64_s64(timediff * slope, NSEC_PER_SEC);
+}
+
+static s64 delay_from_credits(s64 credits, s64 slope)
+{
+ if (unlikely(slope == 0))
+ return S64_MAX;
+
+ return div64_s64(-credits * NSEC_PER_SEC, slope);
+}
+
+static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate)
+{
+ if (unlikely(port_rate == 0))
+ return S64_MAX;
+
+ return div64_s64(len * slope, port_rate);
+}
+
+static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+ s64 now = ktime_get_ns();
+ struct sk_buff *skb;
+ s64 credits;
+ int len;
+
+ if (q->credits < 0) {
+ credits = timediff_to_credits(now - q->last, q->idleslope);
+
+ credits = q->credits + credits;
+ q->credits = min_t(s64, credits, q->hicredit);
+
+ if (q->credits < 0) {
+ s64 delay;
+
+ delay = delay_from_credits(q->credits, q->idleslope);
+ qdisc_watchdog_schedule_ns(&q->watchdog, now + delay);
+
+ q->last = now;
+
+ return NULL;
+ }
+ }
+
+ skb = qdisc_dequeue_head(sch);
+ if (!skb)
+ return NULL;
+
+ len = qdisc_pkt_len(skb);
+
+ /* As sendslope is a negative number, this will decrease the
+ * amount of q->credits.
+ */
+ credits = credits_from_len(len, q->sendslope, q->port_rate);
+ credits += q->credits;
+
+ q->credits = max_t(s64, credits, q->locredit);
+ q->last = now;
+
+ return skb;
+}
+
+static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch)
+{
+ return qdisc_dequeue_head(sch);
+}
+
+static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+
+ return q->dequeue(sch);
+}
+
+static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
+ [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) },
+};
+
+static void cbs_disable_offload(struct net_device *dev,
+ struct cbs_sched_data *q)
+{
+ struct tc_cbs_qopt_offload cbs = { };
+ const struct net_device_ops *ops;
+ int err;
+
+ if (!q->offload)
+ return;
+
+ q->enqueue = cbs_enqueue_soft;
+ q->dequeue = cbs_dequeue_soft;
+
+ ops = dev->netdev_ops;
+ if (!ops->ndo_setup_tc)
+ return;
+
+ cbs.queue = q->queue;
+ cbs.enable = 0;
+
+ err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
+ if (err < 0)
+ pr_warn("Couldn't disable CBS offload for queue %d\n",
+ cbs.queue);
+}
+
+static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
+ const struct tc_cbs_qopt *opt)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct tc_cbs_qopt_offload cbs = { };
+ int err;
+
+ if (!ops->ndo_setup_tc)
+ return -EOPNOTSUPP;
+
+ cbs.queue = q->queue;
+
+ cbs.enable = 1;
+ cbs.hicredit = opt->hicredit;
+ cbs.locredit = opt->locredit;
+ cbs.idleslope = opt->idleslope;
+ cbs.sendslope = opt->sendslope;
+
+ err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
+ if (err < 0)
+ return err;
+
+ q->enqueue = cbs_enqueue_offload;
+ q->dequeue = cbs_dequeue_offload;
+
+ return 0;
+}
+
+static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct nlattr *tb[TCA_CBS_MAX + 1];
+ struct tc_cbs_qopt *qopt;
+ int err;
+
+ err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_CBS_PARMS])
+ return -EINVAL;
+
+ qopt = nla_data(tb[TCA_CBS_PARMS]);
+
+ if (!qopt->offload) {
+ struct ethtool_link_ksettings ecmd;
+ s64 link_speed;
+
+ if (!__ethtool_get_link_ksettings(dev, &ecmd))
+ link_speed = ecmd.base.speed;
+ else
+ link_speed = SPEED_1000;
+
+ q->port_rate = link_speed * 1000 * BYTES_PER_KBIT;
+
+ cbs_disable_offload(dev, q);
+ } else {
+ err = cbs_enable_offload(dev, q, qopt);
+ if (err < 0)
+ return err;
+ }
+
+ /* Everything went OK, save the parameters used. */
+ q->hicredit = qopt->hicredit;
+ q->locredit = qopt->locredit;
+ q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
+ q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
+ q->offload = qopt->offload;
+
+ return 0;
+}
+
+static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+
+ if (!opt)
+ return -EINVAL;
+
+ q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
+
+ q->enqueue = cbs_enqueue_soft;
+ q->dequeue = cbs_dequeue_soft;
+
+ qdisc_watchdog_init(&q->watchdog, sch);
+
+ return cbs_change(sch, opt);
+}
+
+static void cbs_destroy(struct Qdisc *sch)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+
+ qdisc_watchdog_cancel(&q->watchdog);
+
+ cbs_disable_offload(dev, q);
+}
+
+static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+ struct tc_cbs_qopt opt = { };
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (!nest)
+ goto nla_put_failure;
+
+ opt.hicredit = q->hicredit;
+ opt.locredit = q->locredit;
+ opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
+ opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
+ opt.offload = q->offload;
+
+ if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ return nla_nest_end(skb, nest);
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
+static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
+ .id = "cbs",
+ .priv_size = sizeof(struct cbs_sched_data),
+ .enqueue = cbs_enqueue,
+ .dequeue = cbs_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .init = cbs_init,
+ .reset = qdisc_reset_queue,
+ .destroy = cbs_destroy,
+ .change = cbs_change,
+ .dump = cbs_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init cbs_module_init(void)
+{
+ return register_qdisc(&cbs_qdisc_ops);
+}
+
+static void __exit cbs_module_exit(void)
+{
+ unregister_qdisc(&cbs_qdisc_ops);
+}
+module_init(cbs_module_init)
+module_exit(cbs_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index a413dc1c2098..5bbcef3dcd8c 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -20,7 +20,6 @@
struct drr_class {
struct Qdisc_class_common common;
- unsigned int refcnt;
unsigned int filter_cnt;
struct gnet_stats_basic_packed bstats;
@@ -111,7 +110,6 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
- cl->refcnt = 1;
cl->common.classid = classid;
cl->quantum = quantum;
cl->qdisc = qdisc_create_dflt(sch->dev_queue,
@@ -163,32 +161,15 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
drr_purge_queue(cl);
qdisc_class_hash_remove(&q->clhash, &cl->common);
- BUG_ON(--cl->refcnt == 0);
- /*
- * This shouldn't happen: we "hold" one cops->get() when called
- * from tc_ctl_tclass; the destroy method is done from cops->put().
- */
-
sch_tree_unlock(sch);
- return 0;
-}
-
-static unsigned long drr_get_class(struct Qdisc *sch, u32 classid)
-{
- struct drr_class *cl = drr_find_class(sch, classid);
- if (cl != NULL)
- cl->refcnt++;
-
- return (unsigned long)cl;
+ drr_destroy_class(sch, cl);
+ return 0;
}
-static void drr_put_class(struct Qdisc *sch, unsigned long arg)
+static unsigned long drr_search_class(struct Qdisc *sch, u32 classid)
{
- struct drr_class *cl = (struct drr_class *)arg;
-
- if (--cl->refcnt == 0)
- drr_destroy_class(sch, cl);
+ return (unsigned long)drr_find_class(sch, classid);
}
static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl)
@@ -246,8 +227,7 @@ static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg)
{
struct drr_class *cl = (struct drr_class *)arg;
- if (cl->qdisc->q.qlen == 0)
- list_del(&cl->alist);
+ list_del(&cl->alist);
}
static int drr_dump_class(struct Qdisc *sch, unsigned long arg,
@@ -341,6 +321,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -432,7 +413,7 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
struct drr_sched *q = qdisc_priv(sch);
int err;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
err = qdisc_class_hash_init(&q->clhash);
@@ -479,8 +460,7 @@ static void drr_destroy_qdisc(struct Qdisc *sch)
static const struct Qdisc_class_ops drr_class_ops = {
.change = drr_change_class,
.delete = drr_delete_class,
- .get = drr_get_class,
- .put = drr_put_class,
+ .find = drr_search_class,
.tcf_block = drr_tcf_block,
.bind_tcf = drr_bind_tcf,
.unbind_tcf = drr_unbind_tcf,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 6d94fcc3592a..fb4fb71c68cf 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -85,21 +85,21 @@ static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
return p->q;
}
-static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
+static unsigned long dsmark_find(struct Qdisc *sch, u32 classid)
{
- pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
- __func__, sch, qdisc_priv(sch), classid);
-
return TC_H_MIN(classid) + 1;
}
static unsigned long dsmark_bind_filter(struct Qdisc *sch,
unsigned long parent, u32 classid)
{
- return dsmark_get(sch, classid);
+ pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
+ __func__, sch, qdisc_priv(sch), classid);
+
+ return dsmark_find(sch, classid);
}
-static void dsmark_put(struct Qdisc *sch, unsigned long cl)
+static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl)
{
}
@@ -344,7 +344,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
goto errout;
- err = tcf_block_get(&p->block, &p->filter_list);
+ err = tcf_block_get(&p->block, &p->filter_list, sch);
if (err)
return err;
@@ -469,14 +469,13 @@ nla_put_failure:
static const struct Qdisc_class_ops dsmark_class_ops = {
.graft = dsmark_graft,
.leaf = dsmark_leaf,
- .get = dsmark_get,
- .put = dsmark_put,
+ .find = dsmark_find,
.change = dsmark_change,
.delete = dsmark_delete,
.walk = dsmark_walk,
.tcf_block = dsmark_tcf_block,
.bind_tcf = dsmark_bind_filter,
- .unbind_tcf = dsmark_put,
+ .unbind_tcf = dsmark_unbind_filter,
.dump = dsmark_dump_class,
};
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 337f2d6d81e4..0305d791ea94 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -105,6 +105,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return 0;
}
@@ -481,7 +482,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
return err;
}
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
@@ -491,10 +492,8 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
if (!q->flows)
return -ENOMEM;
q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL);
- if (!q->backlogs) {
- kvfree(q->flows);
+ if (!q->backlogs)
return -ENOMEM;
- }
for (i = 0; i < q->flows_cnt; i++) {
struct fq_codel_flow *flow = q->flows + i;
@@ -579,7 +578,7 @@ static struct Qdisc *fq_codel_leaf(struct Qdisc *sch, unsigned long arg)
return NULL;
}
-static unsigned long fq_codel_get(struct Qdisc *sch, u32 classid)
+static unsigned long fq_codel_find(struct Qdisc *sch, u32 classid)
{
return 0;
}
@@ -592,7 +591,7 @@ static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent,
return 0;
}
-static void fq_codel_put(struct Qdisc *q, unsigned long cl)
+static void fq_codel_unbind(struct Qdisc *q, unsigned long cl)
{
}
@@ -683,11 +682,10 @@ static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg)
static const struct Qdisc_class_ops fq_codel_class_ops = {
.leaf = fq_codel_leaf,
- .get = fq_codel_get,
- .put = fq_codel_put,
+ .find = fq_codel_find,
.tcf_block = fq_codel_tcf_block,
.bind_tcf = fq_codel_bind,
- .unbind_tcf = fq_codel_put,
+ .unbind_tcf = fq_codel_unbind,
.dump = fq_codel_dump_class,
.dump_stats = fq_codel_dump_class_stats,
.walk = fq_codel_walk,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 57ba406f1437..3839cbbdc32b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -29,6 +29,7 @@
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
+#include <trace/events/qdisc.h>
/* Qdisc to use by default */
const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
@@ -126,7 +127,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
q->q.qlen--;
} else
skb = NULL;
- return skb;
+ goto trace;
}
*validate = true;
skb = q->skb_bad_txq;
@@ -139,7 +140,8 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
q->q.qlen--;
goto bulk;
}
- return NULL;
+ skb = NULL;
+ goto trace;
}
if (!(q->flags & TCQ_F_ONETXQUEUE) ||
!netif_xmit_frozen_or_stopped(txq))
@@ -151,6 +153,8 @@ bulk:
else
try_bulk_dequeue_skb_slow(q, skb, packets);
}
+trace:
+ trace_qdisc_dequeue(q, txq, *packets, skb);
return skb;
}
@@ -284,9 +288,9 @@ unsigned long dev_trans_start(struct net_device *dev)
}
EXPORT_SYMBOL(dev_trans_start);
-static void dev_watchdog(unsigned long arg)
+static void dev_watchdog(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)arg;
+ struct net_device *dev = from_timer(dev, t, watchdog_timer);
netif_tx_lock(dev);
if (!qdisc_tx_is_noop(dev)) {
@@ -599,8 +603,14 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
struct Qdisc *sch;
unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
int err = -ENOBUFS;
- struct net_device *dev = dev_queue->dev;
+ struct net_device *dev;
+ if (!dev_queue) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ dev = dev_queue->dev;
p = kzalloc_node(size, GFP_KERNEL,
netdev_queue_numa_node_read(dev_queue));
@@ -681,13 +691,12 @@ void qdisc_reset(struct Qdisc *qdisc)
qdisc->gso_skb = NULL;
}
qdisc->q.qlen = 0;
+ qdisc->qstats.backlog = 0;
}
EXPORT_SYMBOL(qdisc_reset);
-static void qdisc_rcu_free(struct rcu_head *head)
+static void qdisc_free(struct Qdisc *qdisc)
{
- struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
-
if (qdisc_is_percpu_stats(qdisc)) {
free_percpu(qdisc->cpu_bstats);
free_percpu(qdisc->cpu_qstats);
@@ -720,11 +729,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
kfree_skb_list(qdisc->gso_skb);
kfree_skb(qdisc->skb_bad_txq);
- /*
- * gen_estimator est_timer() might access qdisc->q.lock,
- * wait a RCU grace period before freeing qdisc.
- */
- call_rcu(&qdisc->rcu_head, qdisc_rcu_free);
+ qdisc_free(qdisc);
}
EXPORT_SYMBOL(qdisc_destroy);
@@ -785,7 +790,7 @@ static void attach_default_qdiscs(struct net_device *dev)
dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
dev->qdisc = txq->qdisc_sleeping;
- refcount_inc(&dev->qdisc->refcnt);
+ qdisc_refcount_inc(dev->qdisc);
} else {
qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
if (qdisc) {
@@ -955,7 +960,7 @@ void dev_init_scheduler(struct net_device *dev)
if (dev_ingress_queue(dev))
dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
- setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
+ timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
}
static void shutdown_scheduler_queue(struct net_device *dev,
@@ -1019,3 +1024,49 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
}
}
EXPORT_SYMBOL(psched_ratecfg_precompute);
+
+static void mini_qdisc_rcu_func(struct rcu_head *head)
+{
+}
+
+void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
+ struct tcf_proto *tp_head)
+{
+ struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq);
+ struct mini_Qdisc *miniq;
+
+ if (!tp_head) {
+ RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
+ return;
+ }
+
+ miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
+ &miniqp->miniq1 : &miniqp->miniq2;
+
+ /* We need to make sure that readers won't see the miniq
+ * we are about to modify. So wait until previous call_rcu_bh callback
+ * is done.
+ */
+ rcu_barrier_bh();
+ miniq->filter_list = tp_head;
+ rcu_assign_pointer(*miniqp->p_miniq, miniq);
+
+ if (miniq_old)
+ /* This is counterpart of the rcu barrier above. We need to
+ * block potential new user of miniq_old until all readers
+ * are not seeing it.
+ */
+ call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func);
+}
+EXPORT_SYMBOL(mini_qdisc_pair_swap);
+
+void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
+ struct mini_Qdisc __rcu **p_miniq)
+{
+ miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats;
+ miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
+ miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
+ miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
+ miniqp->p_miniq = p_miniq;
+}
+EXPORT_SYMBOL(mini_qdisc_pair_init);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index fd15200f8627..d04068a97d81 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -110,7 +110,6 @@ enum hfsc_class_flags {
struct hfsc_class {
struct Qdisc_class_common cl_common;
- unsigned int refcnt; /* usage count */
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
@@ -829,28 +828,6 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
}
}
-static void
-set_active(struct hfsc_class *cl, unsigned int len)
-{
- if (cl->cl_flags & HFSC_RSC)
- init_ed(cl, len);
- if (cl->cl_flags & HFSC_FSC)
- init_vf(cl, len);
-
-}
-
-static void
-set_passive(struct hfsc_class *cl)
-{
- if (cl->cl_flags & HFSC_RSC)
- eltree_remove(cl);
-
- /*
- * vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
- * needs to be called explicitly to remove a class from vttree.
- */
-}
-
static unsigned int
qdisc_peek_len(struct Qdisc *sch)
{
@@ -981,6 +958,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
}
if (cl != NULL) {
+ int old_flags;
+
if (parentid) {
if (cl->cl_parent &&
cl->cl_parent->cl_common.classid != parentid)
@@ -1001,6 +980,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
}
sch_tree_lock(sch);
+ old_flags = cl->cl_flags;
+
if (rsc != NULL)
hfsc_change_rsc(cl, rsc, cur_time);
if (fsc != NULL)
@@ -1009,10 +990,21 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
hfsc_change_usc(cl, usc, cur_time);
if (cl->qdisc->q.qlen != 0) {
- if (cl->cl_flags & HFSC_RSC)
- update_ed(cl, qdisc_peek_len(cl->qdisc));
- if (cl->cl_flags & HFSC_FSC)
- update_vf(cl, 0, cur_time);
+ int len = qdisc_peek_len(cl->qdisc);
+
+ if (cl->cl_flags & HFSC_RSC) {
+ if (old_flags & HFSC_RSC)
+ update_ed(cl, len);
+ else
+ init_ed(cl, len);
+ }
+
+ if (cl->cl_flags & HFSC_FSC) {
+ if (old_flags & HFSC_FSC)
+ update_vf(cl, 0, cur_time);
+ else
+ init_vf(cl, len);
+ }
}
sch_tree_unlock(sch);
@@ -1041,7 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
- err = tcf_block_get(&cl->block, &cl->filter_list);
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch);
if (err) {
kfree(cl);
return err;
@@ -1067,7 +1059,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
hfsc_change_usc(cl, usc, 0);
cl->cl_common.classid = classid;
- cl->refcnt = 1;
cl->sched = q;
cl->cl_parent = parent;
cl->qdisc = qdisc_create_dflt(sch->dev_queue,
@@ -1123,13 +1114,9 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
hfsc_purge_queue(sch, cl);
qdisc_class_hash_remove(&q->clhash, &cl->cl_common);
- BUG_ON(--cl->refcnt == 0);
- /*
- * This shouldn't happen: we "hold" one cops->get() when called
- * from tc_ctl_tclass; the destroy method is done from cops->put().
- */
-
sch_tree_unlock(sch);
+
+ hfsc_destroy_class(sch, cl);
return 0;
}
@@ -1157,6 +1144,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -1221,30 +1209,18 @@ hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg)
{
struct hfsc_class *cl = (struct hfsc_class *)arg;
- if (cl->qdisc->q.qlen == 0) {
- update_vf(cl, 0, 0);
- set_passive(cl);
- }
+ /* vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
+ * needs to be called explicitly to remove a class from vttree.
+ */
+ update_vf(cl, 0, 0);
+ if (cl->cl_flags & HFSC_RSC)
+ eltree_remove(cl);
}
static unsigned long
-hfsc_get_class(struct Qdisc *sch, u32 classid)
-{
- struct hfsc_class *cl = hfsc_find_class(classid, sch);
-
- if (cl != NULL)
- cl->refcnt++;
-
- return (unsigned long)cl;
-}
-
-static void
-hfsc_put_class(struct Qdisc *sch, unsigned long arg)
+hfsc_search_class(struct Qdisc *sch, u32 classid)
{
- struct hfsc_class *cl = (struct hfsc_class *)arg;
-
- if (--cl->refcnt == 0)
- hfsc_destroy_class(sch, cl);
+ return (unsigned long)hfsc_find_class(classid, sch);
}
static unsigned long
@@ -1418,6 +1394,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
struct tc_hfsc_qopt *qopt;
int err;
+ qdisc_watchdog_init(&q->watchdog, sch);
+
if (opt == NULL || nla_len(opt) < sizeof(*qopt))
return -EINVAL;
qopt = nla_data(opt);
@@ -1428,12 +1406,11 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
return err;
q->eligible = RB_ROOT;
- err = tcf_block_get(&q->root.block, &q->root.filter_list);
+ err = tcf_block_get(&q->root.block, &q->root.filter_list, sch);
if (err)
- goto err_tcf;
+ return err;
q->root.cl_common.classid = sch->handle;
- q->root.refcnt = 1;
q->root.sched = q;
q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
sch->handle);
@@ -1448,13 +1425,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
qdisc_class_hash_insert(&q->clhash, &q->root.cl_common);
qdisc_class_hash_grow(sch, &q->clhash);
- qdisc_watchdog_init(&q->watchdog, sch);
-
return 0;
-
-err_tcf:
- qdisc_class_hash_destroy(&q->clhash);
- return err;
}
static int
@@ -1585,7 +1556,12 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
}
if (cl->qdisc->q.qlen == 1) {
- set_active(cl, qdisc_pkt_len(skb));
+ unsigned int len = qdisc_pkt_len(skb);
+
+ if (cl->cl_flags & HFSC_RSC)
+ init_ed(cl, len);
+ if (cl->cl_flags & HFSC_FSC)
+ init_vf(cl, len);
/*
* If this is the first packet, isolate the head so an eventual
* head drop before the first dequeue operation has no chance
@@ -1649,18 +1625,18 @@ hfsc_dequeue(struct Qdisc *sch)
if (realtime)
cl->cl_cumul += qdisc_pkt_len(skb);
- if (cl->qdisc->q.qlen != 0) {
- if (cl->cl_flags & HFSC_RSC) {
+ if (cl->cl_flags & HFSC_RSC) {
+ if (cl->qdisc->q.qlen != 0) {
/* update ed */
next_len = qdisc_peek_len(cl->qdisc);
if (realtime)
update_ed(cl, next_len);
else
update_d(cl, next_len);
+ } else {
+ /* the class becomes passive */
+ eltree_remove(cl);
}
- } else {
- /* the class becomes passive */
- set_passive(cl);
}
qdisc_bstats_update(sch, skb);
@@ -1676,8 +1652,7 @@ static const struct Qdisc_class_ops hfsc_class_ops = {
.graft = hfsc_graft_class,
.leaf = hfsc_class_leaf,
.qlen_notify = hfsc_qlen_notify,
- .get = hfsc_get_class,
- .put = hfsc_put_class,
+ .find = hfsc_search_class,
.bind_tcf = hfsc_bind_tcf,
.unbind_tcf = hfsc_unbind_tcf,
.tcf_block = hfsc_tcf_block,
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 51d3ba682af9..73a53c08091b 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -477,6 +477,9 @@ static void hhf_destroy(struct Qdisc *sch)
kvfree(q->hhf_valid_bits[i]);
}
+ if (!q->hh_flows)
+ return;
+
for (i = 0; i < HH_FLOWS_CNT; i++) {
struct hh_flow_state *flow, *next;
struct list_head *head = &q->hh_flows[i];
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 5d65ec5207e9..fa0380730ff0 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -107,7 +107,6 @@ struct htb_class {
struct tcf_proto __rcu *filter_list; /* class attached filters */
struct tcf_block *block;
int filter_cnt;
- int refcnt; /* usage count of this class */
int level; /* our level (see above) */
unsigned int children;
@@ -143,6 +142,7 @@ struct htb_class {
struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
unsigned int drops ____cacheline_aligned_in_smp;
+ unsigned int overlimits;
};
struct htb_level {
@@ -193,6 +193,10 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
return container_of(clc, struct htb_class, common);
}
+static unsigned long htb_search(struct Qdisc *sch, u32 handle)
+{
+ return (unsigned long)htb_find(handle, sch);
+}
/**
* htb_classify - classify a packet into class
*
@@ -240,6 +244,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -530,6 +535,9 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
if (new_mode == cl->cmode)
return;
+ if (new_mode == HTB_CANT_SEND)
+ cl->overlimits++;
+
if (cl->prio_activity) { /* not necessary: speed optimization */
if (cl->cmode != HTB_CANT_SEND)
htb_deactivate_prios(q, cl);
@@ -1017,10 +1025,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
int err;
int i;
+ qdisc_watchdog_init(&q->watchdog, sch);
+ INIT_WORK(&q->work, htb_work_func);
+
if (!opt)
return -EINVAL;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
@@ -1041,8 +1052,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
for (i = 0; i < TC_HTB_NUMPRIO; i++)
INIT_LIST_HEAD(q->drops + i);
- qdisc_watchdog_init(&q->watchdog, sch);
- INIT_WORK(&q->work, htb_work_func);
qdisc_skb_head_init(&q->direct_queue);
if (tb[TCA_HTB_DIRECT_QLEN])
@@ -1139,6 +1148,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
struct htb_class *cl = (struct htb_class *)arg;
struct gnet_stats_queue qs = {
.drops = cl->drops,
+ .overlimits = cl->overlimits,
};
__u32 qlen = 0;
@@ -1186,16 +1196,7 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
{
struct htb_class *cl = (struct htb_class *)arg;
- if (cl->un.leaf.q->q.qlen == 0)
- htb_deactivate(qdisc_priv(sch), cl);
-}
-
-static unsigned long htb_get(struct Qdisc *sch, u32 classid)
-{
- struct htb_class *cl = htb_find(classid, sch);
- if (cl)
- cl->refcnt++;
- return (unsigned long)cl;
+ htb_deactivate(qdisc_priv(sch), cl);
}
static inline int htb_parent_last_child(struct htb_class *cl)
@@ -1317,22 +1318,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
if (last_child)
htb_parent_to_leaf(q, cl, new_q);
- BUG_ON(--cl->refcnt == 0);
- /*
- * This shouldn't happen: we "hold" one cops->get() when called
- * from tc_ctl_tclass; the destroy method is done from cops->put().
- */
-
sch_tree_unlock(sch);
- return 0;
-}
-static void htb_put(struct Qdisc *sch, unsigned long arg)
-{
- struct htb_class *cl = (struct htb_class *)arg;
-
- if (--cl->refcnt == 0)
- htb_destroy_class(sch, cl);
+ htb_destroy_class(sch, cl);
+ return 0;
}
static int htb_change_class(struct Qdisc *sch, u32 classid,
@@ -1405,7 +1394,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!cl)
goto failure;
- err = tcf_block_get(&cl->block, &cl->filter_list);
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch);
if (err) {
kfree(cl);
goto failure;
@@ -1423,7 +1412,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
}
}
- cl->refcnt = 1;
cl->children = 0;
INIT_LIST_HEAD(&cl->un.leaf.drop_list);
RB_CLEAR_NODE(&cl->pq_node);
@@ -1599,8 +1587,7 @@ static const struct Qdisc_class_ops htb_class_ops = {
.graft = htb_graft,
.leaf = htb_leaf,
.qlen_notify = htb_qlen_notify,
- .get = htb_get,
- .put = htb_put,
+ .find = htb_search,
.change = htb_change_class,
.delete = htb_delete,
.walk = htb_walk,
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index d8a9bebcab90..5ecc38f35d47 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -20,6 +20,8 @@
struct ingress_sched_data {
struct tcf_block *block;
+ struct tcf_block_ext_info block_info;
+ struct mini_Qdisc_pair miniqp;
};
static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
@@ -27,23 +29,18 @@ static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
return NULL;
}
-static unsigned long ingress_get(struct Qdisc *sch, u32 classid)
+static unsigned long ingress_find(struct Qdisc *sch, u32 classid)
{
return TC_H_MIN(classid) + 1;
}
-static bool ingress_cl_offload(u32 classid)
-{
- return true;
-}
-
static unsigned long ingress_bind_filter(struct Qdisc *sch,
unsigned long parent, u32 classid)
{
- return ingress_get(sch, classid);
+ return ingress_find(sch, classid);
}
-static void ingress_put(struct Qdisc *sch, unsigned long cl)
+static void ingress_unbind_filter(struct Qdisc *sch, unsigned long cl)
{
}
@@ -58,13 +55,26 @@ static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl)
return q->block;
}
+static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv)
+{
+ struct mini_Qdisc_pair *miniqp = priv;
+
+ mini_qdisc_pair_swap(miniqp, tp_head);
+}
+
static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
{
struct ingress_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
int err;
- err = tcf_block_get(&q->block, &dev->ingress_cl_list);
+ mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
+
+ q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ q->block_info.chain_head_change = clsact_chain_head_change;
+ q->block_info.chain_head_change_priv = &q->miniqp;
+
+ err = tcf_block_get_ext(&q->block, sch, &q->block_info);
if (err)
return err;
@@ -78,7 +88,7 @@ static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_sched_data *q = qdisc_priv(sch);
- tcf_block_put(q->block);
+ tcf_block_put_ext(q->block, sch, &q->block_info);
net_dec_ingress_queue();
}
@@ -99,13 +109,11 @@ nla_put_failure:
static const struct Qdisc_class_ops ingress_class_ops = {
.leaf = ingress_leaf,
- .get = ingress_get,
- .put = ingress_put,
+ .find = ingress_find,
.walk = ingress_walk,
.tcf_block = ingress_tcf_block,
- .tcf_cl_offload = ingress_cl_offload,
.bind_tcf = ingress_bind_filter,
- .unbind_tcf = ingress_put,
+ .unbind_tcf = ingress_unbind_filter,
};
static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
@@ -121,9 +129,13 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
struct clsact_sched_data {
struct tcf_block *ingress_block;
struct tcf_block *egress_block;
+ struct tcf_block_ext_info ingress_block_info;
+ struct tcf_block_ext_info egress_block_info;
+ struct mini_Qdisc_pair miniqp_ingress;
+ struct mini_Qdisc_pair miniqp_egress;
};
-static unsigned long clsact_get(struct Qdisc *sch, u32 classid)
+static unsigned long clsact_find(struct Qdisc *sch, u32 classid)
{
switch (TC_H_MIN(classid)) {
case TC_H_MIN(TC_H_MIN_INGRESS):
@@ -134,15 +146,10 @@ static unsigned long clsact_get(struct Qdisc *sch, u32 classid)
}
}
-static bool clsact_cl_offload(u32 classid)
-{
- return TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS);
-}
-
static unsigned long clsact_bind_filter(struct Qdisc *sch,
unsigned long parent, u32 classid)
{
- return clsact_get(sch, classid);
+ return clsact_find(sch, classid);
}
static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl)
@@ -165,13 +172,25 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
struct net_device *dev = qdisc_dev(sch);
int err;
- err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list);
+ mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
+
+ q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ q->ingress_block_info.chain_head_change = clsact_chain_head_change;
+ q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress;
+
+ err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info);
if (err)
return err;
- err = tcf_block_get(&q->egress_block, &dev->egress_cl_list);
+ mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
+
+ q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
+ q->egress_block_info.chain_head_change = clsact_chain_head_change;
+ q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
+
+ err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
if (err)
- return err;
+ goto err_egress_block_get;
net_inc_ingress_queue();
net_inc_egress_queue();
@@ -179,14 +198,18 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
sch->flags |= TCQ_F_CPUSTATS;
return 0;
+
+err_egress_block_get:
+ tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
+ return err;
}
static void clsact_destroy(struct Qdisc *sch)
{
struct clsact_sched_data *q = qdisc_priv(sch);
- tcf_block_put(q->egress_block);
- tcf_block_put(q->ingress_block);
+ tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
+ tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
net_dec_ingress_queue();
net_dec_egress_queue();
@@ -194,13 +217,11 @@ static void clsact_destroy(struct Qdisc *sch)
static const struct Qdisc_class_ops clsact_class_ops = {
.leaf = ingress_leaf,
- .get = clsact_get,
- .put = ingress_put,
+ .find = clsact_find,
.walk = ingress_walk,
.tcf_block = clsact_tcf_block,
- .tcf_cl_offload = clsact_cl_offload,
.bind_tcf = clsact_bind_filter,
- .unbind_tcf = ingress_put,
+ .unbind_tcf = ingress_unbind_filter,
};
static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index cadfdd4f1e52..213b586a06a0 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -130,15 +130,7 @@ static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
static struct netdev_queue *mq_select_queue(struct Qdisc *sch,
struct tcmsg *tcm)
{
- unsigned int ntx = TC_H_MIN(tcm->tcm_parent);
- struct netdev_queue *dev_queue = mq_queue_get(sch, ntx);
-
- if (!dev_queue) {
- struct net_device *dev = qdisc_dev(sch);
-
- return netdev_get_tx_queue(dev, 0);
- }
- return dev_queue;
+ return mq_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
}
static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
@@ -165,7 +157,7 @@ static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
return dev_queue->qdisc_sleeping;
}
-static unsigned long mq_get(struct Qdisc *sch, u32 classid)
+static unsigned long mq_find(struct Qdisc *sch, u32 classid)
{
unsigned int ntx = TC_H_MIN(classid);
@@ -174,10 +166,6 @@ static unsigned long mq_get(struct Qdisc *sch, u32 classid)
return ntx;
}
-static void mq_put(struct Qdisc *sch, unsigned long cl)
-{
-}
-
static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
struct sk_buff *skb, struct tcmsg *tcm)
{
@@ -223,8 +211,7 @@ static const struct Qdisc_class_ops mq_class_ops = {
.select_queue = mq_select_queue,
.graft = mq_graft,
.leaf = mq_leaf,
- .get = mq_get,
- .put = mq_put,
+ .find = mq_find,
.walk = mq_walk,
.dump = mq_dump_class,
.dump_stats = mq_dump_class_stats,
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index e0c02725cd48..b85885a9d8a1 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -18,10 +18,16 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
struct mqprio_sched {
struct Qdisc **qdiscs;
+ u16 mode;
+ u16 shaper;
int hw_offload;
+ u32 flags;
+ u64 min_rate[TC_QOPT_MAX_QUEUE];
+ u64 max_rate[TC_QOPT_MAX_QUEUE];
};
static void mqprio_destroy(struct Qdisc *sch)
@@ -39,11 +45,18 @@ static void mqprio_destroy(struct Qdisc *sch)
}
if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
- struct tc_mqprio_qopt offload = { 0 };
- struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
- { .mqprio = &offload } };
-
- dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, 0, &tc);
+ struct tc_mqprio_qopt_offload mqprio = { { 0 } };
+
+ switch (priv->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ case TC_MQPRIO_MODE_CHANNEL:
+ dev->netdev_ops->ndo_setup_tc(dev,
+ TC_SETUP_QDISC_MQPRIO,
+ &mqprio);
+ break;
+ default:
+ return;
+ }
} else {
netdev_set_num_tc(dev, 0);
}
@@ -99,6 +112,26 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
return 0;
}
+static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = {
+ [TCA_MQPRIO_MODE] = { .len = sizeof(u16) },
+ [TCA_MQPRIO_SHAPER] = { .len = sizeof(u16) },
+ [TCA_MQPRIO_MIN_RATE64] = { .type = NLA_NESTED },
+ [TCA_MQPRIO_MAX_RATE64] = { .type = NLA_NESTED },
+};
+
+static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+ const struct nla_policy *policy, int len)
+{
+ int nested_len = nla_len(nla) - NLA_ALIGN(len);
+
+ if (nested_len >= nla_attr_size(0))
+ return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
+ nested_len, policy, NULL);
+
+ memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+ return 0;
+}
+
static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
{
struct net_device *dev = qdisc_dev(sch);
@@ -107,6 +140,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
struct Qdisc *qdisc;
int i, err = -EOPNOTSUPP;
struct tc_mqprio_qopt *qopt = NULL;
+ struct nlattr *tb[TCA_MQPRIO_MAX + 1];
+ struct nlattr *attr;
+ int rem;
+ int len;
BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
@@ -117,6 +154,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
if (!netif_is_multiqueue(dev))
return -EOPNOTSUPP;
+ /* make certain can allocate enough classids to handle queues */
+ if (dev->num_tx_queues >= TC_H_MIN_PRIORITY)
+ return -ENOMEM;
+
if (!opt || nla_len(opt) < sizeof(*qopt))
return -EINVAL;
@@ -124,6 +165,59 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
if (mqprio_parse_opt(dev, qopt))
return -EINVAL;
+ len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
+ if (len > 0) {
+ err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
+ sizeof(*qopt));
+ if (err < 0)
+ return err;
+
+ if (!qopt->hw)
+ return -EINVAL;
+
+ if (tb[TCA_MQPRIO_MODE]) {
+ priv->flags |= TC_MQPRIO_F_MODE;
+ priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
+ }
+
+ if (tb[TCA_MQPRIO_SHAPER]) {
+ priv->flags |= TC_MQPRIO_F_SHAPER;
+ priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
+ }
+
+ if (tb[TCA_MQPRIO_MIN_RATE64]) {
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+ return -EINVAL;
+ i = 0;
+ nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
+ rem) {
+ if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
+ return -EINVAL;
+ if (i >= qopt->num_tc)
+ break;
+ priv->min_rate[i] = *(u64 *)nla_data(attr);
+ i++;
+ }
+ priv->flags |= TC_MQPRIO_F_MIN_RATE;
+ }
+
+ if (tb[TCA_MQPRIO_MAX_RATE64]) {
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+ return -EINVAL;
+ i = 0;
+ nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
+ rem) {
+ if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
+ return -EINVAL;
+ if (i >= qopt->num_tc)
+ break;
+ priv->max_rate[i] = *(u64 *)nla_data(attr);
+ i++;
+ }
+ priv->flags |= TC_MQPRIO_F_MAX_RATE;
+ }
+ }
+
/* pre-allocate qdisc, attachment can't fail */
priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
GFP_KERNEL);
@@ -148,16 +242,36 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
* supplied and verified mapping
*/
if (qopt->hw) {
- struct tc_mqprio_qopt offload = *qopt;
- struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
- { .mqprio = &offload } };
+ struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
- err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle,
- 0, 0, &tc);
+ switch (priv->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
+ return -EINVAL;
+ break;
+ case TC_MQPRIO_MODE_CHANNEL:
+ mqprio.flags = priv->flags;
+ if (priv->flags & TC_MQPRIO_F_MODE)
+ mqprio.mode = priv->mode;
+ if (priv->flags & TC_MQPRIO_F_SHAPER)
+ mqprio.shaper = priv->shaper;
+ if (priv->flags & TC_MQPRIO_F_MIN_RATE)
+ for (i = 0; i < mqprio.qopt.num_tc; i++)
+ mqprio.min_rate[i] = priv->min_rate[i];
+ if (priv->flags & TC_MQPRIO_F_MAX_RATE)
+ for (i = 0; i < mqprio.qopt.num_tc; i++)
+ mqprio.max_rate[i] = priv->max_rate[i];
+ break;
+ default:
+ return -EINVAL;
+ }
+ err = dev->netdev_ops->ndo_setup_tc(dev,
+ TC_SETUP_QDISC_MQPRIO,
+ &mqprio);
if (err)
return err;
- priv->hw_offload = offload.hw;
+ priv->hw_offload = mqprio.qopt.hw;
} else {
netdev_set_num_tc(dev, qopt->num_tc);
for (i = 0; i < qopt->num_tc; i++)
@@ -197,7 +311,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
unsigned long cl)
{
struct net_device *dev = qdisc_dev(sch);
- unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
+ unsigned long ntx = cl - 1;
if (ntx >= dev->num_tx_queues)
return NULL;
@@ -227,11 +341,51 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
return 0;
}
+static int dump_rates(struct mqprio_sched *priv,
+ struct tc_mqprio_qopt *opt, struct sk_buff *skb)
+{
+ struct nlattr *nest;
+ int i;
+
+ if (priv->flags & TC_MQPRIO_F_MIN_RATE) {
+ nest = nla_nest_start(skb, TCA_MQPRIO_MIN_RATE64);
+ if (!nest)
+ goto nla_put_failure;
+
+ for (i = 0; i < opt->num_tc; i++) {
+ if (nla_put(skb, TCA_MQPRIO_MIN_RATE64,
+ sizeof(priv->min_rate[i]),
+ &priv->min_rate[i]))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+ }
+
+ if (priv->flags & TC_MQPRIO_F_MAX_RATE) {
+ nest = nla_nest_start(skb, TCA_MQPRIO_MAX_RATE64);
+ if (!nest)
+ goto nla_put_failure;
+
+ for (i = 0; i < opt->num_tc; i++) {
+ if (nla_put(skb, TCA_MQPRIO_MAX_RATE64,
+ sizeof(priv->max_rate[i]),
+ &priv->max_rate[i]))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+ }
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct net_device *dev = qdisc_dev(sch);
struct mqprio_sched *priv = qdisc_priv(sch);
- unsigned char *b = skb_tail_pointer(skb);
+ struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
struct tc_mqprio_qopt opt = { 0 };
struct Qdisc *qdisc;
unsigned int i;
@@ -262,12 +416,25 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.offset[i] = dev->tc_to_txq[i].offset;
}
- if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+ if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt))
+ goto nla_put_failure;
+
+ if ((priv->flags & TC_MQPRIO_F_MODE) &&
+ nla_put_u16(skb, TCA_MQPRIO_MODE, priv->mode))
goto nla_put_failure;
- return skb->len;
+ if ((priv->flags & TC_MQPRIO_F_SHAPER) &&
+ nla_put_u16(skb, TCA_MQPRIO_SHAPER, priv->shaper))
+ goto nla_put_failure;
+
+ if ((priv->flags & TC_MQPRIO_F_MIN_RATE ||
+ priv->flags & TC_MQPRIO_F_MAX_RATE) &&
+ (dump_rates(priv, &opt, skb) != 0))
+ goto nla_put_failure;
+
+ return nla_nest_end(skb, nla);
nla_put_failure:
- nlmsg_trim(skb, b);
+ nlmsg_trim(skb, nla);
return -1;
}
@@ -281,47 +448,40 @@ static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
return dev_queue->qdisc_sleeping;
}
-static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
+static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
{
struct net_device *dev = qdisc_dev(sch);
unsigned int ntx = TC_H_MIN(classid);
- if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
- return 0;
- return ntx;
-}
+ /* There are essentially two regions here that have valid classid
+ * values. The first region will have a classid value of 1 through
+ * num_tx_queues. All of these are backed by actual Qdiscs.
+ */
+ if (ntx < TC_H_MIN_PRIORITY)
+ return (ntx <= dev->num_tx_queues) ? ntx : 0;
-static void mqprio_put(struct Qdisc *sch, unsigned long cl)
-{
+ /* The second region represents the hardware traffic classes. These
+ * are represented by classid values of TC_H_MIN_PRIORITY through
+ * TC_H_MIN_PRIORITY + netdev_get_num_tc - 1
+ */
+ return ((ntx - TC_H_MIN_PRIORITY) < netdev_get_num_tc(dev)) ? ntx : 0;
}
static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
struct sk_buff *skb, struct tcmsg *tcm)
{
- struct net_device *dev = qdisc_dev(sch);
+ if (cl < TC_H_MIN_PRIORITY) {
+ struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+ struct net_device *dev = qdisc_dev(sch);
+ int tc = netdev_txq_to_tc(dev, cl - 1);
- if (cl <= netdev_get_num_tc(dev)) {
+ tcm->tcm_parent = (tc < 0) ? 0 :
+ TC_H_MAKE(TC_H_MAJ(sch->handle),
+ TC_H_MIN(tc + TC_H_MIN_PRIORITY));
+ tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+ } else {
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_info = 0;
- } else {
- int i;
- struct netdev_queue *dev_queue;
-
- dev_queue = mqprio_queue_get(sch, cl);
- tcm->tcm_parent = 0;
- for (i = 0; i < netdev_get_num_tc(dev); i++) {
- struct netdev_tc_txq tc = dev->tc_to_txq[i];
- int q_idx = cl - netdev_get_num_tc(dev);
-
- if (q_idx > tc.offset &&
- q_idx <= tc.offset + tc.count) {
- tcm->tcm_parent =
- TC_H_MAKE(TC_H_MAJ(sch->handle),
- TC_H_MIN(i + 1));
- break;
- }
- }
- tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
}
tcm->tcm_handle |= TC_H_MIN(cl);
return 0;
@@ -332,15 +492,14 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
__releases(d->lock)
__acquires(d->lock)
{
- struct net_device *dev = qdisc_dev(sch);
-
- if (cl <= netdev_get_num_tc(dev)) {
+ if (cl >= TC_H_MIN_PRIORITY) {
int i;
__u32 qlen = 0;
struct Qdisc *qdisc;
struct gnet_stats_queue qstats = {0};
struct gnet_stats_basic_packed bstats = {0};
- struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
+ struct net_device *dev = qdisc_dev(sch);
+ struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
/* Drop lock here it will be reclaimed before touching
* statistics this is required because the d->lock we
@@ -393,25 +552,44 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
/* Walk hierarchy with a virtual class per tc */
arg->count = arg->skip;
- for (ntx = arg->skip;
- ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
- ntx++) {
+ for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) {
+ if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) {
+ arg->stop = 1;
+ return;
+ }
+ arg->count++;
+ }
+
+ /* Pad the values and skip over unused traffic classes */
+ if (ntx < TC_MAX_QUEUE) {
+ arg->count = TC_MAX_QUEUE;
+ ntx = TC_MAX_QUEUE;
+ }
+
+ /* Reset offset, sort out remaining per-queue qdiscs */
+ for (ntx -= TC_MAX_QUEUE; ntx < dev->num_tx_queues; ntx++) {
if (arg->fn(sch, ntx + 1, arg) < 0) {
arg->stop = 1;
- break;
+ return;
}
arg->count++;
}
}
+static struct netdev_queue *mqprio_select_queue(struct Qdisc *sch,
+ struct tcmsg *tcm)
+{
+ return mqprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
+}
+
static const struct Qdisc_class_ops mqprio_class_ops = {
.graft = mqprio_graft,
.leaf = mqprio_leaf,
- .get = mqprio_get,
- .put = mqprio_put,
+ .find = mqprio_find,
.walk = mqprio_walk,
.dump = mqprio_dump_class,
.dump_stats = mqprio_dump_class_stats,
+ .select_queue = mqprio_select_queue,
};
static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index f143b7bbaa0d..012216386c0b 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -54,6 +54,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -245,7 +246,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
@@ -257,12 +258,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
for (i = 0; i < q->max_bands; i++)
q->queues[i] = &noop_qdisc;
- err = multiq_tune(sch, opt);
-
- if (err)
- kfree(q->queues);
-
- return err;
+ return multiq_tune(sch, opt);
}
static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -306,7 +302,7 @@ multiq_leaf(struct Qdisc *sch, unsigned long arg)
return q->queues[band];
}
-static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
+static unsigned long multiq_find(struct Qdisc *sch, u32 classid)
{
struct multiq_sched_data *q = qdisc_priv(sch);
unsigned long band = TC_H_MIN(classid);
@@ -319,11 +315,11 @@ static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
u32 classid)
{
- return multiq_get(sch, classid);
+ return multiq_find(sch, classid);
}
-static void multiq_put(struct Qdisc *q, unsigned long cl)
+static void multiq_unbind(struct Qdisc *q, unsigned long cl)
{
}
@@ -385,12 +381,11 @@ static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl)
static const struct Qdisc_class_ops multiq_class_ops = {
.graft = multiq_graft,
.leaf = multiq_leaf,
- .get = multiq_get,
- .put = multiq_put,
+ .find = multiq_find,
.walk = multiq_walk,
.tcf_block = multiq_tcf_block,
.bind_tcf = multiq_bind,
- .unbind_tcf = multiq_put,
+ .unbind_tcf = multiq_unbind,
.dump = multiq_dump_class,
.dump_stats = multiq_dump_class_stats,
};
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1b3dd6190e93..dd70924cbcdf 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -77,8 +77,8 @@ struct netem_sched_data {
struct qdisc_watchdog watchdog;
- psched_tdiff_t latency;
- psched_tdiff_t jitter;
+ s64 latency;
+ s64 jitter;
u32 loss;
u32 ecn;
@@ -135,6 +135,13 @@ struct netem_sched_data {
u32 a5; /* p23 used only in 4-states */
} clg;
+ struct tc_netem_slot slot_config;
+ struct slotstate {
+ u64 slot_next;
+ s32 packets_left;
+ s32 bytes_left;
+ } slot;
+
};
/* Time stamp put into socket buffer control block
@@ -145,16 +152,9 @@ struct netem_sched_data {
* we save skb->tstamp value in skb->cb[] before destroying it.
*/
struct netem_skb_cb {
- psched_time_t time_to_send;
- ktime_t tstamp_save;
+ u64 time_to_send;
};
-
-static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
-{
- return rb_entry(rb, struct sk_buff, rbnode);
-}
-
static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
{
/* we assume we can use skb next/prev/tstamp as storage for rb_node */
@@ -312,11 +312,11 @@ static bool loss_event(struct netem_sched_data *q)
* std deviation sigma. Uses table lookup to approximate the desired
* distribution, and a uniformly-distributed pseudo-random source.
*/
-static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
- struct crndstate *state,
- const struct disttable *dist)
+static s64 tabledist(s64 mu, s32 sigma,
+ struct crndstate *state,
+ const struct disttable *dist)
{
- psched_tdiff_t x;
+ s64 x;
long t;
u32 rnd;
@@ -327,7 +327,7 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
/* default uniform distribution */
if (dist == NULL)
- return (rnd % (2*sigma)) - sigma + mu;
+ return (rnd % (2 * sigma)) - sigma + mu;
t = dist->table[rnd % dist->size];
x = (sigma % NETEM_DIST_SCALE) * t;
@@ -339,10 +339,8 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
}
-static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
+static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
{
- u64 ticks;
-
len += q->packet_overhead;
if (q->cell_size) {
@@ -353,21 +351,19 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
len = cells * (q->cell_size + q->cell_overhead);
}
- ticks = (u64)len * NSEC_PER_SEC;
-
- do_div(ticks, q->rate);
- return PSCHED_NS2TICKS(ticks);
+ return div64_u64(len * NSEC_PER_SEC, q->rate);
}
static void tfifo_reset(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
- struct rb_node *p;
+ struct rb_node *p = rb_first(&q->t_root);
- while ((p = rb_first(&q->t_root))) {
- struct sk_buff *skb = netem_rb_to_skb(p);
+ while (p) {
+ struct sk_buff *skb = rb_to_skb(p);
- rb_erase(p, &q->t_root);
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, &q->t_root);
rtnl_kfree_skbs(skb, skb);
}
}
@@ -375,14 +371,14 @@ static void tfifo_reset(struct Qdisc *sch)
static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
- psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
+ u64 tnext = netem_skb_cb(nskb)->time_to_send;
struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
while (*p) {
struct sk_buff *skb;
parent = *p;
- skb = netem_rb_to_skb(parent);
+ skb = rb_to_skb(parent);
if (tnext >= netem_skb_cb(skb)->time_to_send)
p = &parent->rb_right;
else
@@ -521,13 +517,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (q->gap == 0 || /* not doing reordering */
q->counter < q->gap - 1 || /* inside last reordering gap */
q->reorder < get_crandom(&q->reorder_cor)) {
- psched_time_t now;
- psched_tdiff_t delay;
+ u64 now;
+ s64 delay;
delay = tabledist(q->latency, q->jitter,
&q->delay_cor, q->delay_dist);
- now = psched_get_time();
+ now = ktime_get_ns();
if (q->rate) {
struct netem_skb_cb *last = NULL;
@@ -538,7 +534,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff *t_skb;
struct netem_skb_cb *t_last;
- t_skb = netem_rb_to_skb(rb_last(&q->t_root));
+ t_skb = skb_rb_last(&q->t_root);
t_last = netem_skb_cb(t_skb);
if (!last ||
t_last->time_to_send > last->time_to_send) {
@@ -553,15 +549,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
* from delay.
*/
delay -= last->time_to_send - now;
- delay = max_t(psched_tdiff_t, 0, delay);
+ delay = max_t(s64, 0, delay);
now = last->time_to_send;
}
- delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);
+ delay += packet_time_ns(qdisc_pkt_len(skb), q);
}
cb->time_to_send = now + delay;
- cb->tstamp_save = skb->tstamp;
++q->counter;
tfifo_enqueue(skb, sch);
} else {
@@ -569,7 +564,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
* Do re-ordering by putting one out of N packets at the front
* of the queue.
*/
- cb->time_to_send = psched_get_time();
+ cb->time_to_send = ktime_get_ns();
q->counter = 0;
netem_enqueue_skb_head(&sch->q, skb);
@@ -600,6 +595,20 @@ finish_segs:
return NET_XMIT_SUCCESS;
}
+/* Delay the next round with a new future slot with a
+ * correct number of bytes and packets.
+ */
+
+static void get_slot_next(struct netem_sched_data *q, u64 now)
+{
+ q->slot.slot_next = now + q->slot_config.min_delay +
+ (prandom_u32() *
+ (q->slot_config.max_delay -
+ q->slot_config.min_delay) >> 32);
+ q->slot.packets_left = q->slot_config.max_packets;
+ q->slot.bytes_left = q->slot_config.max_bytes;
+}
+
static struct sk_buff *netem_dequeue(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -616,20 +625,26 @@ deliver:
}
p = rb_first(&q->t_root);
if (p) {
- psched_time_t time_to_send;
+ u64 time_to_send;
+ u64 now = ktime_get_ns();
- skb = netem_rb_to_skb(p);
+ skb = rb_to_skb(p);
/* if more time remaining? */
time_to_send = netem_skb_cb(skb)->time_to_send;
- if (time_to_send <= psched_get_time()) {
- rb_erase(p, &q->t_root);
+ if (q->slot.slot_next && q->slot.slot_next < time_to_send)
+ get_slot_next(q, now);
+ if (time_to_send <= now && q->slot.slot_next <= now) {
+ rb_erase(p, &q->t_root);
sch->q.qlen--;
qdisc_qstats_backlog_dec(sch, skb);
skb->next = NULL;
skb->prev = NULL;
- skb->tstamp = netem_skb_cb(skb)->tstamp_save;
+ /* skb->dev shares skb->rbnode area,
+ * we need to restore its value.
+ */
+ skb->dev = qdisc_dev(sch);
#ifdef CONFIG_NET_CLS_ACT
/*
@@ -640,6 +655,14 @@ deliver:
skb->tstamp = 0;
#endif
+ if (q->slot.slot_next) {
+ q->slot.packets_left--;
+ q->slot.bytes_left -= qdisc_pkt_len(skb);
+ if (q->slot.packets_left <= 0 ||
+ q->slot.bytes_left <= 0)
+ get_slot_next(q, now);
+ }
+
if (q->qdisc) {
unsigned int pkt_len = qdisc_pkt_len(skb);
struct sk_buff *to_free = NULL;
@@ -663,7 +686,10 @@ deliver:
if (skb)
goto deliver;
}
- qdisc_watchdog_schedule(&q->watchdog, time_to_send);
+
+ qdisc_watchdog_schedule_ns(&q->watchdog,
+ max(time_to_send,
+ q->slot.slot_next));
}
if (q->qdisc) {
@@ -694,6 +720,7 @@ static void dist_free(struct disttable *d)
* Distribution data is a variable size payload containing
* signed 16 bit values.
*/
+
static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -724,6 +751,23 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
return 0;
}
+static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
+{
+ const struct tc_netem_slot *c = nla_data(attr);
+
+ q->slot_config = *c;
+ if (q->slot_config.max_packets == 0)
+ q->slot_config.max_packets = INT_MAX;
+ if (q->slot_config.max_bytes == 0)
+ q->slot_config.max_bytes = INT_MAX;
+ q->slot.packets_left = q->slot_config.max_packets;
+ q->slot.bytes_left = q->slot_config.max_bytes;
+ if (q->slot_config.min_delay | q->slot_config.max_delay)
+ q->slot.slot_next = ktime_get_ns();
+ else
+ q->slot.slot_next = 0;
+}
+
static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
{
const struct tc_netem_corr *c = nla_data(attr);
@@ -825,6 +869,9 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
[TCA_NETEM_LOSS] = { .type = NLA_NESTED },
[TCA_NETEM_ECN] = { .type = NLA_U32 },
[TCA_NETEM_RATE64] = { .type = NLA_U64 },
+ [TCA_NETEM_LATENCY64] = { .type = NLA_S64 },
+ [TCA_NETEM_JITTER64] = { .type = NLA_S64 },
+ [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) },
};
static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -892,8 +939,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
sch->limit = qopt->limit;
- q->latency = qopt->latency;
- q->jitter = qopt->jitter;
+ q->latency = PSCHED_TICKS2NS(qopt->latency);
+ q->jitter = PSCHED_TICKS2NS(qopt->jitter);
q->limit = qopt->limit;
q->gap = qopt->gap;
q->counter = 0;
@@ -922,9 +969,18 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
q->rate = max_t(u64, q->rate,
nla_get_u64(tb[TCA_NETEM_RATE64]));
+ if (tb[TCA_NETEM_LATENCY64])
+ q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
+
+ if (tb[TCA_NETEM_JITTER64])
+ q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
+
if (tb[TCA_NETEM_ECN])
q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
+ if (tb[TCA_NETEM_SLOT])
+ get_slot(q, tb[TCA_NETEM_SLOT]);
+
return ret;
}
@@ -933,11 +989,11 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
struct netem_sched_data *q = qdisc_priv(sch);
int ret;
+ qdisc_watchdog_init(&q->watchdog, sch);
+
if (!opt)
return -EINVAL;
- qdisc_watchdog_init(&q->watchdog, sch);
-
q->loss_model = CLG_RANDOM;
ret = netem_change(sch, opt);
if (ret)
@@ -1014,9 +1070,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
struct tc_netem_reorder reorder;
struct tc_netem_corrupt corrupt;
struct tc_netem_rate rate;
+ struct tc_netem_slot slot;
- qopt.latency = q->latency;
- qopt.jitter = q->jitter;
+ qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
+ UINT_MAX);
+ qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
+ UINT_MAX);
qopt.limit = q->limit;
qopt.loss = q->loss;
qopt.gap = q->gap;
@@ -1024,6 +1083,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
goto nla_put_failure;
+ if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
+ goto nla_put_failure;
+
+ if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
+ goto nla_put_failure;
+
cor.delay_corr = q->delay_cor.rho;
cor.loss_corr = q->loss_cor.rho;
cor.dup_corr = q->dup_cor.rho;
@@ -1060,6 +1125,16 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
if (dump_loss_model(q, skb) != 0)
goto nla_put_failure;
+ if (q->slot_config.min_delay | q->slot_config.max_delay) {
+ slot = q->slot_config;
+ if (slot.max_packets == INT_MAX)
+ slot.max_packets = 0;
+ if (slot.max_bytes == INT_MAX)
+ slot.max_bytes = 0;
+ if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
+ goto nla_put_failure;
+ }
+
return nla_nest_end(skb, nla);
nla_put_failure:
@@ -1096,15 +1171,11 @@ static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
return q->qdisc;
}
-static unsigned long netem_get(struct Qdisc *sch, u32 classid)
+static unsigned long netem_find(struct Qdisc *sch, u32 classid)
{
return 1;
}
-static void netem_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
if (!walker->stop) {
@@ -1120,8 +1191,7 @@ static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
static const struct Qdisc_class_ops netem_class_ops = {
.graft = netem_graft,
.leaf = netem_leaf,
- .get = netem_get,
- .put = netem_put,
+ .find = netem_find,
.walk = netem_walk,
.dump = netem_dump_class,
};
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 6c2791d6102d..776c694c77c7 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -74,6 +74,7 @@ struct pie_sched_data {
struct pie_vars vars;
struct pie_stats stats;
struct timer_list adapt_timer;
+ struct Qdisc *sch;
};
static void pie_params_init(struct pie_params *params)
@@ -422,10 +423,10 @@ static void calculate_probability(struct Qdisc *sch)
pie_vars_init(&q->vars);
}
-static void pie_timer(unsigned long arg)
+static void pie_timer(struct timer_list *t)
{
- struct Qdisc *sch = (struct Qdisc *)arg;
- struct pie_sched_data *q = qdisc_priv(sch);
+ struct pie_sched_data *q = from_timer(q, t, adapt_timer);
+ struct Qdisc *sch = q->sch;
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
@@ -446,7 +447,8 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt)
pie_vars_init(&q->vars);
sch->limit = q->params.limit;
- setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch);
+ q->sch = sch;
+ timer_setup(&q->adapt_timer, pie_timer, 0);
if (opt) {
int err = pie_change(sch, opt);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index e3e364cc9a70..2c79559a0d31 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -50,6 +50,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -80,7 +81,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
return ret;
}
#endif
@@ -212,7 +213,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
@@ -260,7 +261,7 @@ prio_leaf(struct Qdisc *sch, unsigned long arg)
return q->queues[band];
}
-static unsigned long prio_get(struct Qdisc *sch, u32 classid)
+static unsigned long prio_find(struct Qdisc *sch, u32 classid)
{
struct prio_sched_data *q = qdisc_priv(sch);
unsigned long band = TC_H_MIN(classid);
@@ -272,11 +273,11 @@ static unsigned long prio_get(struct Qdisc *sch, u32 classid)
static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
{
- return prio_get(sch, classid);
+ return prio_find(sch, classid);
}
-static void prio_put(struct Qdisc *q, unsigned long cl)
+static void prio_unbind(struct Qdisc *q, unsigned long cl)
{
}
@@ -338,12 +339,11 @@ static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl)
static const struct Qdisc_class_ops prio_class_ops = {
.graft = prio_graft,
.leaf = prio_leaf,
- .get = prio_get,
- .put = prio_put,
+ .find = prio_find,
.walk = prio_walk,
.tcf_block = prio_tcf_block,
.bind_tcf = prio_bind,
- .unbind_tcf = prio_put,
+ .unbind_tcf = prio_unbind,
.dump = prio_dump_class,
.dump_stats = prio_dump_class_stats,
};
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 0e16dfda0bd7..6962b37a3ad3 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -132,7 +132,6 @@ struct qfq_aggregate;
struct qfq_class {
struct Qdisc_class_common common;
- unsigned int refcnt;
unsigned int filter_cnt;
struct gnet_stats_basic_packed bstats;
@@ -477,7 +476,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
- cl->refcnt = 1;
cl->common.classid = classid;
cl->deficit = lmax;
@@ -555,32 +553,15 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
qfq_purge_queue(cl);
qdisc_class_hash_remove(&q->clhash, &cl->common);
- BUG_ON(--cl->refcnt == 0);
- /*
- * This shouldn't happen: we "hold" one cops->get() when called
- * from tc_ctl_tclass; the destroy method is done from cops->put().
- */
-
sch_tree_unlock(sch);
- return 0;
-}
-
-static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid)
-{
- struct qfq_class *cl = qfq_find_class(sch, classid);
- if (cl != NULL)
- cl->refcnt++;
-
- return (unsigned long)cl;
+ qfq_destroy_class(sch, cl);
+ return 0;
}
-static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
+static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid)
{
- struct qfq_class *cl = (struct qfq_class *)arg;
-
- if (--cl->refcnt == 0)
- qfq_destroy_class(sch, cl);
+ return (unsigned long)qfq_find_class(sch, classid);
}
static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl)
@@ -728,6 +709,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -1234,7 +1216,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (cl == NULL) {
if (err & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
return err;
}
pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
@@ -1428,8 +1410,7 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_class *cl = (struct qfq_class *)arg;
- if (cl->qdisc->q.qlen == 0)
- qfq_deactivate_class(q, cl);
+ qfq_deactivate_class(q, cl);
}
static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
@@ -1439,7 +1420,7 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
int i, j, err;
u32 max_cl_shift, maxbudg_shift, max_classes;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
@@ -1511,8 +1492,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
static const struct Qdisc_class_ops qfq_class_ops = {
.change = qfq_change_class,
.delete = qfq_delete_class,
- .get = qfq_get_class,
- .put = qfq_put_class,
+ .find = qfq_search_class,
.tcf_block = qfq_tcf_block,
.bind_tcf = qfq_bind_tcf,
.unbind_tcf = qfq_unbind_tcf,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 11292adce412..7f8ea9e297c3 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -19,6 +19,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/inet_ecn.h>
#include <net/red.h>
@@ -40,6 +41,7 @@ struct red_sched_data {
u32 limit; /* HARD maximal queue length */
unsigned char flags;
struct timer_list adapt_timer;
+ struct Qdisc *sch;
struct red_parms parms;
struct red_vars vars;
struct red_stats stats;
@@ -147,11 +149,37 @@ static void red_reset(struct Qdisc *sch)
red_restart(&q->vars);
}
+static int red_offload(struct Qdisc *sch, bool enable)
+{
+ struct red_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_red_qopt_offload opt = {
+ .handle = sch->handle,
+ .parent = sch->parent,
+ };
+
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return -EOPNOTSUPP;
+
+ if (enable) {
+ opt.command = TC_RED_REPLACE;
+ opt.set.min = q->parms.qth_min >> q->parms.Wlog;
+ opt.set.max = q->parms.qth_max >> q->parms.Wlog;
+ opt.set.probability = q->parms.max_P;
+ opt.set.is_ecn = red_use_ecn(q);
+ } else {
+ opt.command = TC_RED_DESTROY;
+ }
+
+ return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
+}
+
static void red_destroy(struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
del_timer_sync(&q->adapt_timer);
+ red_offload(sch, false);
qdisc_destroy(q->qdisc);
}
@@ -218,13 +246,14 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
red_start_of_idle_period(&q->vars);
sch_tree_unlock(sch);
+ red_offload(sch, true);
return 0;
}
-static inline void red_adaptative_timer(unsigned long arg)
+static inline void red_adaptative_timer(struct timer_list *t)
{
- struct Qdisc *sch = (struct Qdisc *)arg;
- struct red_sched_data *q = qdisc_priv(sch);
+ struct red_sched_data *q = from_timer(q, t, adapt_timer);
+ struct Qdisc *sch = q->sch;
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
@@ -238,10 +267,40 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
struct red_sched_data *q = qdisc_priv(sch);
q->qdisc = &noop_qdisc;
- setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
+ q->sch = sch;
+ timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
return red_change(sch, opt);
}
+static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_red_qopt_offload hw_stats = {
+ .command = TC_RED_STATS,
+ .handle = sch->handle,
+ .parent = sch->parent,
+ {
+ .stats.bstats = &sch->bstats,
+ .stats.qstats = &sch->qstats,
+ },
+ };
+ int err;
+
+ opt->flags &= ~TC_RED_OFFLOADED;
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return 0;
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
+ &hw_stats);
+ if (err == -EOPNOTSUPP)
+ return 0;
+
+ if (!err)
+ opt->flags |= TC_RED_OFFLOADED;
+
+ return err;
+}
+
static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct red_sched_data *q = qdisc_priv(sch);
@@ -255,8 +314,13 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
.Plog = q->parms.Plog,
.Scell_log = q->parms.Scell_log,
};
+ int err;
sch->qstats.backlog = q->qdisc->qstats.backlog;
+ err = red_dump_offload(sch, &opt);
+ if (err)
+ goto nla_put_failure;
+
opts = nla_nest_start(skb, TCA_OPTIONS);
if (opts == NULL)
goto nla_put_failure;
@@ -273,6 +337,7 @@ nla_put_failure:
static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
struct red_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
struct tc_red_xstats st = {
.early = q->stats.prob_drop + q->stats.forced_drop,
.pdrop = q->stats.pdrop,
@@ -280,6 +345,26 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
.marked = q->stats.prob_mark + q->stats.forced_mark,
};
+ if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) {
+ struct red_stats hw_stats = {0};
+ struct tc_red_qopt_offload hw_stats_request = {
+ .command = TC_RED_XSTATS,
+ .handle = sch->handle,
+ .parent = sch->parent,
+ {
+ .xstats = &hw_stats,
+ },
+ };
+ if (!dev->netdev_ops->ndo_setup_tc(dev,
+ TC_SETUP_QDISC_RED,
+ &hw_stats_request)) {
+ st.early += hw_stats.prob_drop + hw_stats.forced_drop;
+ st.pdrop += hw_stats.pdrop;
+ st.other += hw_stats.other;
+ st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
+ }
+ }
+
return gnet_stats_copy_app(d, &st, sizeof(st));
}
@@ -311,15 +396,11 @@ static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
return q->qdisc;
}
-static unsigned long red_get(struct Qdisc *sch, u32 classid)
+static unsigned long red_find(struct Qdisc *sch, u32 classid)
{
return 1;
}
-static void red_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
if (!walker->stop) {
@@ -335,8 +416,7 @@ static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
static const struct Qdisc_class_ops red_class_ops = {
.graft = red_graft,
.leaf = red_leaf,
- .get = red_get,
- .put = red_put,
+ .find = red_find,
.walk = red_walk,
.dump = red_dump_class,
};
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 11fb6ec878d6..0678debdd856 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -268,6 +268,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return false;
}
@@ -553,7 +554,7 @@ static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
struct sfb_sched_data *q = qdisc_priv(sch);
int err;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
@@ -632,12 +633,12 @@ static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg)
return q->qdisc;
}
-static unsigned long sfb_get(struct Qdisc *sch, u32 classid)
+static unsigned long sfb_find(struct Qdisc *sch, u32 classid)
{
return 1;
}
-static void sfb_put(struct Qdisc *sch, unsigned long arg)
+static void sfb_unbind(struct Qdisc *sch, unsigned long arg)
{
}
@@ -683,14 +684,13 @@ static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent,
static const struct Qdisc_class_ops sfb_class_ops = {
.graft = sfb_graft,
.leaf = sfb_leaf,
- .get = sfb_get,
- .put = sfb_put,
+ .find = sfb_find,
.change = sfb_change_class,
.delete = sfb_delete,
.walk = sfb_walk,
.tcf_block = sfb_tcf_block,
.bind_tcf = sfb_bind,
- .unbind_tcf = sfb_put,
+ .unbind_tcf = sfb_unbind,
.dump = sfb_dump_class,
};
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 82469ef9655e..890f4a4564e7 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -145,6 +145,7 @@ struct sfq_sched_data {
int perturb_period;
unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
struct timer_list perturb_timer;
+ struct Qdisc *sch;
};
/*
@@ -189,6 +190,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return 0;
}
@@ -292,7 +294,7 @@ static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
slot->skblist_prev = skb;
}
-static unsigned int sfq_drop(struct Qdisc *sch)
+static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free)
{
struct sfq_sched_data *q = qdisc_priv(sch);
sfq_index x, d = q->cur_depth;
@@ -310,9 +312,8 @@ drop:
slot->backlog -= len;
sfq_dec(q, x);
sch->q.qlen--;
- qdisc_qstats_drop(sch);
qdisc_qstats_backlog_dec(sch, skb);
- kfree_skb(skb);
+ qdisc_drop(skb, sch, to_free);
return len;
}
@@ -360,7 +361,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
if (hash == 0) {
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
- kfree_skb(skb);
+ __qdisc_drop(skb, to_free);
return ret;
}
hash--;
@@ -465,7 +466,7 @@ enqueue:
return NET_XMIT_SUCCESS;
qlen = slot->qlen;
- dropped = sfq_drop(sch);
+ dropped = sfq_drop(sch, to_free);
/* Return Congestion Notification only if we dropped a packet
* from this flow.
*/
@@ -605,10 +606,10 @@ drop:
qdisc_tree_reduce_backlog(sch, dropped, drop_len);
}
-static void sfq_perturbation(unsigned long arg)
+static void sfq_perturbation(struct timer_list *t)
{
- struct Qdisc *sch = (struct Qdisc *)arg;
- struct sfq_sched_data *q = qdisc_priv(sch);
+ struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
+ struct Qdisc *sch = q->sch;
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
@@ -628,6 +629,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
unsigned int qlen, dropped = 0;
struct red_parms *p = NULL;
+ struct sk_buff *to_free = NULL;
+ struct sk_buff *tail = NULL;
if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
return -EINVAL;
@@ -674,8 +677,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
}
qlen = sch->q.qlen;
- while (sch->q.qlen > q->limit)
- dropped += sfq_drop(sch);
+ while (sch->q.qlen > q->limit) {
+ dropped += sfq_drop(sch, &to_free);
+ if (!tail)
+ tail = to_free;
+ }
+
+ rtnl_kfree_skbs(to_free, tail);
qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
del_timer(&q->perturb_timer);
@@ -716,13 +724,12 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
int i;
int err;
- err = tcf_block_get(&q->block, &q->filter_list);
+ timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
+
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
- setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
- (unsigned long)sch);
-
for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
q->dep[i].next = i + SFQ_MAX_FLOWS;
q->dep[i].prev = i + SFQ_MAX_FLOWS;
@@ -808,7 +815,7 @@ static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
return NULL;
}
-static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
+static unsigned long sfq_find(struct Qdisc *sch, u32 classid)
{
return 0;
}
@@ -821,7 +828,7 @@ static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
return 0;
}
-static void sfq_put(struct Qdisc *q, unsigned long cl)
+static void sfq_unbind(struct Qdisc *q, unsigned long cl)
{
}
@@ -885,11 +892,10 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
static const struct Qdisc_class_ops sfq_class_ops = {
.leaf = sfq_leaf,
- .get = sfq_get,
- .put = sfq_put,
+ .find = sfq_find,
.tcf_block = sfq_tcf_block,
.bind_tcf = sfq_bind,
- .unbind_tcf = sfq_put,
+ .unbind_tcf = sfq_unbind,
.dump = sfq_dump_class,
.dump_stats = sfq_dump_class_stats,
.walk = sfq_walk,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index b2e4b6ad241a..120f4f365967 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -425,12 +425,13 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
{
struct tbf_sched_data *q = qdisc_priv(sch);
+ qdisc_watchdog_init(&q->watchdog, sch);
+ q->qdisc = &noop_qdisc;
+
if (opt == NULL)
return -EINVAL;
q->t_c = ktime_get_ns();
- qdisc_watchdog_init(&q->watchdog, sch);
- q->qdisc = &noop_qdisc;
return tbf_change(sch, opt);
}
@@ -510,15 +511,11 @@ static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
return q->qdisc;
}
-static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
+static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
{
return 1;
}
-static void tbf_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
if (!walker->stop) {
@@ -534,8 +531,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
static const struct Qdisc_class_ops tbf_class_ops = {
.graft = tbf_graft,
.leaf = tbf_leaf,
- .get = tbf_get,
- .put = tbf_put,
+ .find = tbf_find,
.walk = tbf_walk,
.dump = tbf_dump_class,
};
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 70f1b570bab9..1ca84a288443 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for SCTP support code.
#
@@ -12,7 +13,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
inqueue.o outqueue.o ulpqueue.o \
tsnmap.o bind_addr.o socket.o primitive.o \
output.o input.o debug.o stream.o auth.o \
- offload.o
+ offload.o stream_sched.o stream_sched_prio.o \
+ stream_sched_rr.o
sctp_probe-y := probe.o
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 40ec83679d6e..69394f4d6091 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -63,11 +63,11 @@ static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc);
/* 1st Level Abstractions. */
/* Initialize a new association from provided memory. */
-static struct sctp_association *sctp_association_init(struct sctp_association *asoc,
- const struct sctp_endpoint *ep,
- const struct sock *sk,
- sctp_scope_t scope,
- gfp_t gfp)
+static struct sctp_association *sctp_association_init(
+ struct sctp_association *asoc,
+ const struct sctp_endpoint *ep,
+ const struct sock *sk,
+ enum sctp_scope scope, gfp_t gfp)
{
struct net *net = sock_net(sk);
struct sctp_sock *sp;
@@ -149,8 +149,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
/* Initializes the timers */
for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
- setup_timer(&asoc->timers[i], sctp_timer_events[i],
- (unsigned long)asoc);
+ timer_setup(&asoc->timers[i], sctp_timer_events[i], 0);
/* Pull default initialization values from the sock options.
* Note: This assumes that the values have already been
@@ -301,9 +300,8 @@ fail_init:
/* Allocate and initialize a new association */
struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
- const struct sock *sk,
- sctp_scope_t scope,
- gfp_t gfp)
+ const struct sock *sk,
+ enum sctp_scope scope, gfp_t gfp)
{
struct sctp_association *asoc;
@@ -797,7 +795,7 @@ void sctp_assoc_del_nonprimary_peers(struct sctp_association *asoc,
*/
void sctp_assoc_control_transport(struct sctp_association *asoc,
struct sctp_transport *transport,
- sctp_transport_cmd_t command,
+ enum sctp_transport_cmd command,
sctp_sn_error_t error)
{
struct sctp_ulpevent *event;
@@ -1022,11 +1020,11 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
container_of(work, struct sctp_association,
base.inqueue.immediate);
struct net *net = sock_net(asoc->base.sk);
+ union sctp_subtype subtype;
struct sctp_endpoint *ep;
struct sctp_chunk *chunk;
struct sctp_inq *inqueue;
int state;
- sctp_subtype_t subtype;
int error = 0;
/* The association should be held so we should be safe. */
@@ -1564,7 +1562,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
* local endpoint and the remote peer.
*/
int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
- sctp_scope_t scope, gfp_t gfp)
+ enum sctp_scope scope, gfp_t gfp)
{
int flags;
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index e001b01b0e68..00667c50efa7 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -185,9 +185,9 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1,
* are called the two key vectors.
*/
static struct sctp_auth_bytes *sctp_auth_make_key_vector(
- sctp_random_param_t *random,
- sctp_chunks_param_t *chunks,
- sctp_hmac_algo_param_t *hmacs,
+ struct sctp_random_param *random,
+ struct sctp_chunks_param *chunks,
+ struct sctp_hmac_algo_param *hmacs,
gfp_t gfp)
{
struct sctp_auth_bytes *new;
@@ -226,10 +226,9 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector(
gfp_t gfp)
{
return sctp_auth_make_key_vector(
- (sctp_random_param_t *)asoc->c.auth_random,
- (sctp_chunks_param_t *)asoc->c.auth_chunks,
- (sctp_hmac_algo_param_t *)asoc->c.auth_hmacs,
- gfp);
+ (struct sctp_random_param *)asoc->c.auth_random,
+ (struct sctp_chunks_param *)asoc->c.auth_chunks,
+ (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs, gfp);
}
/* Make a key vector based on peer's parameters */
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 1ebc184a0e23..7df3704982f5 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -45,9 +45,9 @@
#include <net/sctp/sm.h>
/* Forward declarations for internal helpers. */
-static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *,
- union sctp_addr *, sctp_scope_t scope, gfp_t gfp,
- int flags);
+static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
+ union sctp_addr *addr, enum sctp_scope scope,
+ gfp_t gfp, int flags);
static void sctp_bind_addr_clean(struct sctp_bind_addr *);
/* First Level Abstractions. */
@@ -57,7 +57,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *);
*/
int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
const struct sctp_bind_addr *src,
- sctp_scope_t scope, gfp_t gfp,
+ enum sctp_scope scope, gfp_t gfp,
int flags)
{
struct sctp_sockaddr_entry *addr;
@@ -440,9 +440,8 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
/* Copy out addresses from the global local address list. */
static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
- union sctp_addr *addr,
- sctp_scope_t scope, gfp_t gfp,
- int flags)
+ union sctp_addr *addr, enum sctp_scope scope,
+ gfp_t gfp, int flags)
{
int error = 0;
@@ -485,9 +484,10 @@ int sctp_is_any(struct sock *sk, const union sctp_addr *addr)
}
/* Is 'addr' valid for 'scope'? */
-int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t scope)
+int sctp_in_scope(struct net *net, const union sctp_addr *addr,
+ enum sctp_scope scope)
{
- sctp_scope_t addr_scope = sctp_scope(addr);
+ enum sctp_scope addr_scope = sctp_scope(addr);
/* The unusable SCTP addresses will not be considered with
* any defined scopes.
@@ -545,7 +545,7 @@ int sctp_is_ep_boundall(struct sock *sk)
********************************************************************/
/* What is the scope of 'addr'? */
-sctp_scope_t sctp_scope(const union sctp_addr *addr)
+enum sctp_scope sctp_scope(const union sctp_addr *addr)
{
struct sctp_af *af;
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 1323d41e68b8..7b261afc47b9 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -201,7 +201,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc);
if (hmac_desc)
- max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) +
+ max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) +
hmac_desc->hmac_len);
}
@@ -221,7 +221,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
asoc->outqueue.out_qlen == 0 &&
list_empty(&asoc->outqueue.retransmit) &&
msg_len > max_data)
- first_len -= SCTP_PAD4(sizeof(sctp_sack_chunk_t));
+ first_len -= SCTP_PAD4(sizeof(struct sctp_sack_chunk));
/* Encourage Cookie-ECHO bundling. */
if (asoc->state < SCTP_STATE_COOKIE_ECHOED)
@@ -311,10 +311,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
if (chunk->sent_count) {
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
- streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
+ streamout->ext->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
} else {
chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
- streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+ streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
}
return 1;
} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
@@ -323,7 +323,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
&chunk->asoc->stream.out[chunk->sinfo.sinfo_stream];
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
- streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
+ streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
return 1;
} else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
chunk->msg->expires_at &&
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 2e47eb2f05cb..3f619fdcbf0a 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -60,7 +60,7 @@ static const char *const sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = {
};
/* Lookup "chunk type" debug name. */
-const char *sctp_cname(const sctp_subtype_t cid)
+const char *sctp_cname(const union sctp_subtype cid)
{
if (cid.chunk <= SCTP_CID_BASE_MAX)
return sctp_cid_tbl[cid.chunk];
@@ -130,7 +130,7 @@ static const char *const sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = {
};
/* Lookup primitive debug name. */
-const char *sctp_pname(const sctp_subtype_t id)
+const char *sctp_pname(const union sctp_subtype id)
{
if (id.primitive <= SCTP_EVENT_PRIMITIVE_MAX)
return sctp_primitive_tbl[id.primitive];
@@ -143,7 +143,7 @@ static const char *const sctp_other_tbl[] = {
};
/* Lookup "other" debug name. */
-const char *sctp_oname(const sctp_subtype_t id)
+const char *sctp_oname(const union sctp_subtype id)
{
if (id.other <= SCTP_EVENT_OTHER_MAX)
return sctp_other_tbl[id.other];
@@ -165,7 +165,7 @@ static const char *const sctp_timer_tbl[] = {
};
/* Lookup timer debug name. */
-const char *sctp_tname(const sctp_subtype_t id)
+const char *sctp_tname(const union sctp_subtype id)
{
BUILD_BUG_ON(SCTP_EVENT_TIMEOUT_MAX + 1 != ARRAY_SIZE(sctp_timer_tbl));
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 0e86f988f836..ee1e601a0b11 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -73,13 +73,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
* variables. There are arrays that we encode directly
* into parameters to make the rest of the operations easier.
*/
- auth_hmacs = kzalloc(sizeof(sctp_hmac_algo_param_t) +
- sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp);
+ auth_hmacs = kzalloc(sizeof(*auth_hmacs) +
+ sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp);
if (!auth_hmacs)
goto nomem;
- auth_chunks = kzalloc(sizeof(sctp_chunks_param_t) +
- SCTP_NUM_CHUNK_TYPES, gfp);
+ auth_chunks = kzalloc(sizeof(*auth_chunks) +
+ SCTP_NUM_CHUNK_TYPES, gfp);
if (!auth_chunks)
goto nomem;
@@ -382,8 +382,8 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work)
struct sctp_transport *transport;
struct sctp_chunk *chunk;
struct sctp_inq *inqueue;
- sctp_subtype_t subtype;
- sctp_state_t state;
+ union sctp_subtype subtype;
+ enum sctp_state state;
int error = 0;
int first_time = 1; /* is this the first time through the loop */
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 41eb2ec10460..621b5ca3fd1c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -421,7 +421,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t,
{
struct dst_entry *dst;
- if (!t)
+ if (sock_owned_by_user(sk) || !t)
return;
dst = sctp_transport_dst_check(t);
if (dst)
@@ -794,7 +794,7 @@ hit:
struct sctp_hash_cmp_arg {
const union sctp_addr *paddr;
const struct net *net;
- u16 lport;
+ __be16 lport;
};
static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -820,37 +820,37 @@ out:
return err;
}
-static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
+static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
{
const struct sctp_transport *t = data;
const union sctp_addr *paddr = &t->ipaddr;
const struct net *net = sock_net(t->asoc->base.sk);
- u16 lport = htons(t->asoc->base.bind_addr.port);
- u32 addr;
+ __be16 lport = htons(t->asoc->base.bind_addr.port);
+ __u32 addr;
if (paddr->sa.sa_family == AF_INET6)
addr = jhash(&paddr->v6.sin6_addr, 16, seed);
else
- addr = paddr->v4.sin_addr.s_addr;
+ addr = (__force __u32)paddr->v4.sin_addr.s_addr;
- return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 |
+ return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
(__force __u32)lport, net_hash_mix(net), seed);
}
-static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed)
+static inline __u32 sctp_hash_key(const void *data, u32 len, u32 seed)
{
const struct sctp_hash_cmp_arg *x = data;
const union sctp_addr *paddr = x->paddr;
const struct net *net = x->net;
- u16 lport = x->lport;
- u32 addr;
+ __be16 lport = x->lport;
+ __u32 addr;
if (paddr->sa.sa_family == AF_INET6)
addr = jhash(&paddr->v6.sin6_addr, 16, seed);
else
- addr = paddr->v4.sin_addr.s_addr;
+ addr = (__force __u32)paddr->v4.sin_addr.s_addr;
- return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 |
+ return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
(__force __u32)lport, net_hash_mix(net), seed);
}
@@ -1111,7 +1111,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
__be16 peer_port,
struct sctp_transport **transportp)
{
- sctp_addip_chunk_t *asconf = (struct sctp_addip_chunk *)ch;
+ struct sctp_addip_chunk *asconf = (struct sctp_addip_chunk *)ch;
struct sctp_af *af;
union sctp_addr_param *param;
union sctp_addr paddr;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 2a186b201ad2..3b18085e3b10 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -243,8 +243,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
union sctp_addr *daddr = &t->ipaddr;
union sctp_addr dst_saddr;
struct in6_addr *final_p, final;
+ enum sctp_scope scope;
__u8 matchlen = 0;
- sctp_scope_t scope;
memset(fl6, 0, sizeof(struct flowi6));
fl6->daddr = daddr->v6.sin6_addr;
@@ -497,7 +497,7 @@ static void sctp_v6_from_addr_param(union sctp_addr *addr,
static int sctp_v6_to_addr_param(const union sctp_addr *addr,
union sctp_addr_param *param)
{
- int length = sizeof(sctp_ipv6addr_param_t);
+ int length = sizeof(struct sctp_ipv6addr_param);
param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS;
param->v6.param_hdr.length = htons(length);
@@ -512,7 +512,9 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr,
{
addr->sa.sa_family = AF_INET6;
addr->v6.sin6_port = port;
+ addr->v6.sin6_flowinfo = 0;
addr->v6.sin6_addr = *saddr;
+ addr->v6.sin6_scope_id = 0;
}
/* Compare addresses exactly.
@@ -624,10 +626,10 @@ static int sctp_v6_addr_valid(union sctp_addr *addr,
}
/* What is the scope of 'addr'? */
-static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
+static enum sctp_scope sctp_v6_scope(union sctp_addr *addr)
{
+ enum sctp_scope retval;
int v6scope;
- sctp_scope_t retval;
/* The IPv6 scope is really a set of bit fields.
* See IFA_* in <net/if_inet6.h>. Map to a generic SCTP scope.
@@ -736,7 +738,7 @@ static int sctp_v6_skb_iif(const struct sk_buff *skb)
/* Was this packet marked by Explicit Congestion Notification? */
static int sctp_v6_is_ce(const struct sk_buff *skb)
{
- return *((__u32 *)(ipv6_hdr(skb))) & htonl(1 << 20);
+ return *((__u32 *)(ipv6_hdr(skb))) & (__force __u32)htonl(1 << 20);
}
/* Dump the v6 addr to the seq file. */
@@ -805,9 +807,10 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
addr->v6.sin6_flowinfo = 0;
addr->v6.sin6_port = sh->source;
addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
- if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
+ if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb);
- }
+ else
+ addr->v6.sin6_scope_id = 0;
}
*addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr);
@@ -880,8 +883,10 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
net = sock_net(&opt->inet.sk);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id);
- if (!dev ||
- !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) {
+ if (!dev || !(opt->inet.freebind ||
+ net->ipv6.sysctl.ip_nonlocal_bind ||
+ ipv6_chk_addr(net, &addr->v6.sin6_addr,
+ dev, 0))) {
rcu_read_unlock();
return 0;
}
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index 105ac3327b28..aeea6da81441 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -57,7 +57,7 @@ SCTP_DBG_OBJCNT(keys);
/* An array to make it easy to pretty print the debug information
* to the proc fs.
*/
-static sctp_dbg_objcnt_entry_t sctp_dbg_objcnt[] = {
+static struct sctp_dbg_objcnt_entry sctp_dbg_objcnt[] = {
SCTP_DBG_OBJCNT_ENTRY(sock),
SCTP_DBG_OBJCNT_ENTRY(ep),
SCTP_DBG_OBJCNT_ENTRY(assoc),
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 9d8504985744..4a865cd06d76 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -57,15 +57,15 @@
#include <net/sctp/checksum.h>
/* Forward declarations for private helpers. */
-static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
- struct sctp_chunk *chunk);
-static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
- struct sctp_chunk *chunk);
+static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet,
+ struct sctp_chunk *chunk);
+static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet,
+ struct sctp_chunk *chunk);
static void sctp_packet_append_data(struct sctp_packet *packet,
- struct sctp_chunk *chunk);
-static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
- struct sctp_chunk *chunk,
- u16 chunk_len);
+ struct sctp_chunk *chunk);
+static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
+ struct sctp_chunk *chunk,
+ u16 chunk_len);
static void sctp_packet_reset(struct sctp_packet *packet)
{
@@ -181,11 +181,11 @@ void sctp_packet_free(struct sctp_packet *packet)
* as it can fit in the packet, but any more data that does not fit in this
* packet can be sent only after receiving the COOKIE_ACK.
*/
-sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
- struct sctp_chunk *chunk,
- int one_packet, gfp_t gfp)
+enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet,
+ struct sctp_chunk *chunk,
+ int one_packet, gfp_t gfp)
{
- sctp_xmit_t retval;
+ enum sctp_xmit retval;
pr_debug("%s: packet:%p size:%zu chunk:%p size:%d\n", __func__,
packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1);
@@ -218,12 +218,12 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
}
/* Try to bundle an auth chunk into the packet. */
-static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt,
- struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
+ struct sctp_chunk *chunk)
{
struct sctp_association *asoc = pkt->transport->asoc;
+ enum sctp_xmit retval = SCTP_XMIT_OK;
struct sctp_chunk *auth;
- sctp_xmit_t retval = SCTP_XMIT_OK;
/* if we don't have an association, we can't do authentication */
if (!asoc)
@@ -254,10 +254,10 @@ static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt,
}
/* Try to bundle a SACK with the packet. */
-static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt,
- struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_bundle_sack(struct sctp_packet *pkt,
+ struct sctp_chunk *chunk)
{
- sctp_xmit_t retval = SCTP_XMIT_OK;
+ enum sctp_xmit retval = SCTP_XMIT_OK;
/* If sending DATA and haven't aleady bundled a SACK, try to
* bundle one in to the packet.
@@ -299,11 +299,11 @@ out:
/* Append a chunk to the offered packet reporting back any inability to do
* so.
*/
-static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
- struct sctp_chunk *chunk)
+static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet,
+ struct sctp_chunk *chunk)
{
- sctp_xmit_t retval = SCTP_XMIT_OK;
__u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length));
+ enum sctp_xmit retval = SCTP_XMIT_OK;
/* Check to see if this chunk will fit into the packet */
retval = sctp_packet_will_fit(packet, chunk, chunk_len);
@@ -353,10 +353,10 @@ finish:
/* Append a chunk to the offered packet reporting back any inability to do
* so.
*/
-sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet,
- struct sctp_chunk *chunk)
+enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet,
+ struct sctp_chunk *chunk)
{
- sctp_xmit_t retval = SCTP_XMIT_OK;
+ enum sctp_xmit retval = SCTP_XMIT_OK;
pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk);
@@ -653,8 +653,8 @@ out:
********************************************************************/
/* This private function check to see if a chunk can be added */
-static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
- struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet,
+ struct sctp_chunk *chunk)
{
size_t datasize, rwnd, inflight, flight_size;
struct sctp_transport *transport = packet->transport;
@@ -762,12 +762,12 @@ static void sctp_packet_append_data(struct sctp_packet *packet,
sctp_chunk_assign_ssn(chunk);
}
-static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
- struct sctp_chunk *chunk,
- u16 chunk_len)
+static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
+ struct sctp_chunk *chunk,
+ u16 chunk_len)
{
+ enum sctp_xmit retval = SCTP_XMIT_OK;
size_t psize, pmtu, maxsize;
- sctp_xmit_t retval = SCTP_XMIT_OK;
psize = packet->size;
if (packet->transport->asoc)
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index e8762702a313..4db012aa25f7 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -50,6 +50,7 @@
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
/* Declare internal functions here. */
static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn);
@@ -72,32 +73,38 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
/* Add data to the front of the queue. */
static inline void sctp_outq_head_data(struct sctp_outq *q,
- struct sctp_chunk *ch)
+ struct sctp_chunk *ch)
{
+ struct sctp_stream_out_ext *oute;
+ __u16 stream;
+
list_add(&ch->list, &q->out_chunk_list);
q->out_qlen += ch->skb->len;
+
+ stream = sctp_chunk_stream_no(ch);
+ oute = q->asoc->stream.out[stream].ext;
+ list_add(&ch->stream_list, &oute->outq);
}
/* Take data from the front of the queue. */
static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q)
{
- struct sctp_chunk *ch = NULL;
-
- if (!list_empty(&q->out_chunk_list)) {
- struct list_head *entry = q->out_chunk_list.next;
-
- ch = list_entry(entry, struct sctp_chunk, list);
- list_del_init(entry);
- q->out_qlen -= ch->skb->len;
- }
- return ch;
+ return q->sched->dequeue(q);
}
+
/* Add data chunk to the end of the queue. */
static inline void sctp_outq_tail_data(struct sctp_outq *q,
struct sctp_chunk *ch)
{
+ struct sctp_stream_out_ext *oute;
+ __u16 stream;
+
list_add_tail(&ch->list, &q->out_chunk_list);
q->out_qlen += ch->skb->len;
+
+ stream = sctp_chunk_stream_no(ch);
+ oute = q->asoc->stream.out[stream].ext;
+ list_add_tail(&ch->stream_list, &oute->outq);
}
/*
@@ -207,6 +214,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
INIT_LIST_HEAD(&q->retransmit);
INIT_LIST_HEAD(&q->sacked);
INIT_LIST_HEAD(&q->abandoned);
+ sctp_sched_set_sched(asoc, SCTP_SS_FCFS);
}
/* Free the outqueue structure and any related pending chunks.
@@ -258,6 +266,7 @@ static void __sctp_outq_teardown(struct sctp_outq *q)
/* Throw away any leftover data chunks. */
while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
+ sctp_sched_dequeue_done(q, chunk);
/* Mark as send failure. */
sctp_chunk_fail(chunk, q->error);
@@ -366,7 +375,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
streamout = &asoc->stream.out[chk->sinfo.sinfo_stream];
asoc->sent_cnt_removable--;
asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
- streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
+ streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
if (!chk->tsn_gap_acked) {
if (chk->transport)
@@ -391,20 +400,21 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
struct sctp_outq *q = &asoc->outqueue;
struct sctp_chunk *chk, *temp;
+ q->sched->unsched_all(&asoc->stream);
+
list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
continue;
- list_del_init(&chk->list);
- q->out_qlen -= chk->skb->len;
+ sctp_sched_dequeue_common(q, chk);
asoc->sent_cnt_removable--;
asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) {
struct sctp_stream_out *streamout =
&asoc->stream.out[chk->sinfo.sinfo_stream];
- streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+ streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
}
msg_len -= SCTP_DATA_SNDSIZE(chk) +
@@ -415,6 +425,8 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
break;
}
+ q->sched->sched_all(&asoc->stream);
+
return msg_len;
}
@@ -534,7 +546,7 @@ void sctp_retransmit_mark(struct sctp_outq *q,
* one packet out.
*/
void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
- sctp_retransmit_reason_t reason)
+ enum sctp_retransmit_reason reason)
{
struct net *net = sock_net(q->asoc->base.sk);
@@ -594,14 +606,14 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
int rtx_timeout, int *start_timer)
{
- struct list_head *lqueue;
struct sctp_transport *transport = pkt->transport;
- sctp_xmit_t status;
struct sctp_chunk *chunk, *chunk1;
- int fast_rtx;
+ struct list_head *lqueue;
+ enum sctp_xmit status;
int error = 0;
int timer = 0;
int done = 0;
+ int fast_rtx;
lqueue = &q->retransmit;
fast_rtx = q->fast_rtx;
@@ -781,7 +793,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
struct sctp_transport *transport = NULL;
struct sctp_transport *new_transport;
struct sctp_chunk *chunk, *tmp;
- sctp_xmit_t status;
+ enum sctp_xmit status;
int error = 0;
int start_timer = 0;
int one_packet = 0;
@@ -1033,22 +1045,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
__u32 sid = ntohs(chunk->subh.data_hdr->stream);
- /* RFC 2960 6.5 Every DATA chunk MUST carry a valid
- * stream identifier.
- */
- if (chunk->sinfo.sinfo_stream >= asoc->stream.outcnt) {
-
- /* Mark as failed send. */
- sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
- if (asoc->peer.prsctp_capable &&
- SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
- asoc->sent_cnt_removable--;
- sctp_chunk_free(chunk);
- continue;
- }
-
/* Has this chunk expired? */
if (sctp_chunk_abandoned(chunk)) {
+ sctp_sched_dequeue_done(q, chunk);
sctp_chunk_fail(chunk, 0);
sctp_chunk_free(chunk);
continue;
@@ -1070,6 +1069,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
new_transport = asoc->peer.active_path;
if (new_transport->state == SCTP_UNCONFIRMED) {
WARN_ONCE(1, "Attempt to send packet on unconfirmed path.");
+ sctp_sched_dequeue_done(q, chunk);
sctp_chunk_fail(chunk, 0);
sctp_chunk_free(chunk);
continue;
@@ -1133,6 +1133,11 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
else
asoc->stats.oodchunks++;
+ /* Only now it's safe to consider this
+ * chunk as sent, sched-wise.
+ */
+ sctp_sched_dequeue_done(q, chunk);
+
break;
default:
@@ -1197,7 +1202,7 @@ sctp_flush_out:
static void sctp_sack_update_unack_data(struct sctp_association *assoc,
struct sctp_sackhdr *sack)
{
- sctp_sack_variable_t *frags;
+ union sctp_sack_variable *frags;
__u16 unack_data;
int i;
@@ -1224,7 +1229,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
struct sctp_transport *transport;
struct sctp_chunk *tchunk = NULL;
struct list_head *lchunk, *transport_list, *temp;
- sctp_sack_variable_t *frags = sack->variable;
+ union sctp_sack_variable *frags = sack->variable;
__u32 sack_ctsn, ctsn, tsn;
__u32 highest_tsn, highest_new_tsn;
__u32 sack_a_rwnd;
@@ -1736,10 +1741,10 @@ static void sctp_mark_missing(struct sctp_outq *q,
/* Is the given TSN acked by this packet? */
static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn)
{
- int i;
- sctp_sack_variable_t *frags;
- __u16 tsn_offset, blocks;
__u32 ctsn = ntohl(sack->cum_tsn_ack);
+ union sctp_sack_variable *frags;
+ __u16 tsn_offset, blocks;
+ int i;
if (TSN_lte(tsn, ctsn))
goto pass;
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index f0553a022859..c0817f7a8964 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -53,8 +53,8 @@
int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \
void *arg) { \
int error = 0; \
- sctp_event_t event_type; sctp_subtype_t subtype; \
- sctp_state_t state; \
+ enum sctp_event event_type; union sctp_subtype subtype; \
+ enum sctp_state state; \
struct sctp_endpoint *ep; \
\
event_type = SCTP_EVENT_T_PRIMITIVE; \
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index 6cc2152e0740..1280f85a598d 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -127,12 +127,13 @@ static const struct file_operations sctpprobe_fops = {
.llseek = noop_llseek,
};
-static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+static enum sctp_disposition jsctp_sf_eat_sack(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
struct sk_buff *skb = chunk->skb;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 989a900383b5..f5172c21349b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -196,7 +196,7 @@ static void sctp_free_local_addr_list(struct net *net)
/* Copy the local addresses which are valid for 'scope' into 'bp'. */
int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
- sctp_scope_t scope, gfp_t gfp, int copy_flags)
+ enum sctp_scope scope, gfp_t gfp, int copy_flags)
{
struct sctp_sockaddr_entry *addr;
union sctp_addr laddr;
@@ -292,7 +292,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr,
static int sctp_v4_to_addr_param(const union sctp_addr *addr,
union sctp_addr_param *param)
{
- int length = sizeof(sctp_ipv4addr_param_t);
+ int length = sizeof(struct sctp_ipv4addr_param);
param->v4.param_hdr.type = SCTP_PARAM_IPV4_ADDRESS;
param->v4.param_hdr.length = htons(length);
@@ -400,9 +400,9 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
* IPv4 scoping can be controlled through sysctl option
* net.sctp.addr_scope_policy
*/
-static sctp_scope_t sctp_v4_scope(union sctp_addr *addr)
+static enum sctp_scope sctp_v4_scope(union sctp_addr *addr)
{
- sctp_scope_t retval;
+ enum sctp_scope retval;
/* Check for unusable SCTP addresses. */
if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) {
@@ -622,9 +622,9 @@ static void sctp_v4_ecn_capable(struct sock *sk)
INET_ECN_xmit(sk);
}
-static void sctp_addr_wq_timeout_handler(unsigned long arg)
+static void sctp_addr_wq_timeout_handler(struct timer_list *t)
{
- struct net *net = (struct net *)arg;
+ struct net *net = from_timer(net, t, sctp.addr_wq_timer);
struct sctp_sockaddr_entry *addrw, *temp;
struct sctp_sock *sp;
@@ -1304,8 +1304,7 @@ static int __net_init sctp_defaults_init(struct net *net)
INIT_LIST_HEAD(&net->sctp.auto_asconf_splist);
spin_lock_init(&net->sctp.addr_wq_lock);
net->sctp.addr_wq_timer.expires = 0;
- setup_timer(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler,
- (unsigned long)net);
+ timer_setup(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, 0);
return 0;
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index 9a647214a91e..a72a7d925d46 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -70,7 +70,8 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb,
info = nla_data(attr);
list_for_each_entry_rcu(laddr, address_list, list) {
- memcpy(info, &laddr->a, addrlen);
+ memcpy(info, &laddr->a, sizeof(laddr->a));
+ memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a));
info += addrlen;
}
@@ -93,7 +94,9 @@ static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb,
info = nla_data(attr);
list_for_each_entry(from, &asoc->peer.transport_addr_list,
transports) {
- memcpy(info, &from->ipaddr, addrlen);
+ memcpy(info, &from->ipaddr, sizeof(from->ipaddr));
+ memset(info + sizeof(from->ipaddr), 0,
+ addrlen - sizeof(from->ipaddr));
info += addrlen;
}
@@ -276,9 +279,11 @@ out:
return err;
}
-static int sctp_sock_dump(struct sock *sk, void *p)
+static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
{
+ struct sctp_endpoint *ep = tsp->asoc->ep;
struct sctp_comm_param *commp = p;
+ struct sock *sk = ep->base.sk;
struct sk_buff *skb = commp->skb;
struct netlink_callback *cb = commp->cb;
const struct inet_diag_req_v2 *r = commp->r;
@@ -286,9 +291,7 @@ static int sctp_sock_dump(struct sock *sk, void *p)
int err = 0;
lock_sock(sk);
- if (!sctp_sk(sk)->ep)
- goto release;
- list_for_each_entry(assoc, &sctp_sk(sk)->ep->asocs, asocs) {
+ list_for_each_entry(assoc, &ep->asocs, asocs) {
if (cb->args[4] < cb->args[1])
goto next;
@@ -306,7 +309,6 @@ static int sctp_sock_dump(struct sock *sk, void *p)
cb->nlh->nlmsg_seq,
NLM_F_MULTI, cb->nlh,
commp->net_admin) < 0) {
- cb->args[3] = 1;
err = 1;
goto release;
}
@@ -324,40 +326,30 @@ next:
cb->args[4]++;
}
cb->args[1] = 0;
- cb->args[2]++;
cb->args[3] = 0;
cb->args[4] = 0;
release:
release_sock(sk);
- sock_put(sk);
return err;
}
-static int sctp_get_sock(struct sctp_transport *tsp, void *p)
+static int sctp_sock_filter(struct sctp_transport *tsp, void *p)
{
struct sctp_endpoint *ep = tsp->asoc->ep;
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
- struct netlink_callback *cb = commp->cb;
const struct inet_diag_req_v2 *r = commp->r;
struct sctp_association *assoc =
list_entry(ep->asocs.next, struct sctp_association, asocs);
/* find the ep only once through the transports by this condition */
if (tsp->asoc != assoc)
- goto out;
+ return 0;
if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
- goto out;
-
- sock_hold(sk);
- cb->args[5] = (long)sk;
+ return 0;
return 1;
-
-out:
- cb->args[2]++;
- return 0;
}
static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
@@ -471,6 +463,7 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
.r = r,
.net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
};
+ int pos = cb->args[2];
/* eps hashtable dumps
* args:
@@ -500,12 +493,9 @@ skip:
if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
goto done;
-next:
- cb->args[5] = 0;
- sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp);
-
- if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp))
- goto next;
+ sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
+ net, &pos, &commp);
+ cb->args[2] = pos;
done:
cb->args[1] = cb->args[4];
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 6110447fe51d..9bf575f2e8ed 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -69,7 +69,8 @@ static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
__u8 type, __u8 flags, int paylen,
gfp_t gfp);
-static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
+static struct sctp_cookie_param *sctp_pack_cookie(
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const struct sctp_chunk *init_chunk,
int *cookie_len,
@@ -131,17 +132,17 @@ static const struct sctp_paramhdr prsctp_param = {
* provided chunk, as most cause codes will be embedded inside an
* abort chunk.
*/
-void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
- size_t paylen)
+void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
+ size_t paylen)
{
- sctp_errhdr_t err;
+ struct sctp_errhdr err;
__u16 len;
/* Cause code constants are now defined in network order. */
err.cause = cause_code;
- len = sizeof(sctp_errhdr_t) + paylen;
+ len = sizeof(err) + paylen;
err.length = htons(len);
- chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err);
+ chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(err), &err);
}
/* A helper to initialize an op error inside a
@@ -150,21 +151,21 @@ void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
* if there isn't enough space in the op error chunk
*/
static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
- size_t paylen)
+ size_t paylen)
{
- sctp_errhdr_t err;
+ struct sctp_errhdr err;
__u16 len;
/* Cause code constants are now defined in network order. */
err.cause = cause_code;
- len = sizeof(sctp_errhdr_t) + paylen;
+ len = sizeof(err) + paylen;
err.length = htons(len);
if (skb_tailroom(chunk->skb) < len)
return -ENOSPC;
- chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk,
- sizeof(sctp_errhdr_t),
- &err);
+
+ chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, sizeof(err), &err);
+
return 0;
}
/* 3.3.2 Initiation (INIT) (1)
@@ -212,32 +213,31 @@ static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
* Supported Address Types (Note 4) Optional 12
*/
struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
- const struct sctp_bind_addr *bp,
- gfp_t gfp, int vparam_len)
+ const struct sctp_bind_addr *bp,
+ gfp_t gfp, int vparam_len)
{
struct net *net = sock_net(asoc->base.sk);
+ struct sctp_supported_ext_param ext_param;
+ struct sctp_adaptation_ind_param aiparam;
+ struct sctp_paramhdr *auth_chunks = NULL;
+ struct sctp_paramhdr *auth_hmacs = NULL;
+ struct sctp_supported_addrs_param sat;
struct sctp_endpoint *ep = asoc->ep;
- struct sctp_inithdr init;
- union sctp_params addrs;
- size_t chunksize;
struct sctp_chunk *retval = NULL;
int num_types, addrs_len = 0;
+ struct sctp_inithdr init;
+ union sctp_params addrs;
struct sctp_sock *sp;
- sctp_supported_addrs_param_t sat;
+ __u8 extensions[4];
+ size_t chunksize;
__be16 types[2];
- sctp_adaptation_ind_param_t aiparam;
- sctp_supported_ext_param_t ext_param;
int num_ext = 0;
- __u8 extensions[4];
- struct sctp_paramhdr *auth_chunks = NULL,
- *auth_hmacs = NULL;
/* RFC 2960 3.3.2 Initiation (INIT) (1)
*
* Note 1: The INIT chunks can contain multiple addresses that
* can be IPv4 and/or IPv6 in any combination.
*/
- retval = NULL;
/* Convert the provided bind address list to raw format. */
addrs = sctp_bind_addrs_to_raw(bp, &addrs_len, gfp);
@@ -305,8 +305,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
/* If we have any extensions to report, account for that */
if (num_ext)
- chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
- num_ext);
+ chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext);
/* RFC 2960 3.3.2 Initiation (INIT) (1)
*
@@ -348,10 +347,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
*/
if (num_ext) {
ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT;
- ext_param.param_hdr.length =
- htons(sizeof(sctp_supported_ext_param_t) + num_ext);
- sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t),
- &ext_param);
+ ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext);
+ sctp_addto_chunk(retval, sizeof(ext_param), &ext_param);
sctp_addto_param(retval, num_ext, extensions);
}
@@ -382,26 +379,24 @@ nodata:
}
struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
- const struct sctp_chunk *chunk,
- gfp_t gfp, int unkparam_len)
+ const struct sctp_chunk *chunk,
+ gfp_t gfp, int unkparam_len)
{
+ struct sctp_supported_ext_param ext_param;
+ struct sctp_adaptation_ind_param aiparam;
+ struct sctp_paramhdr *auth_chunks = NULL;
+ struct sctp_paramhdr *auth_random = NULL;
+ struct sctp_paramhdr *auth_hmacs = NULL;
+ struct sctp_chunk *retval = NULL;
+ struct sctp_cookie_param *cookie;
struct sctp_inithdr initack;
- struct sctp_chunk *retval;
union sctp_params addrs;
struct sctp_sock *sp;
- int addrs_len;
- sctp_cookie_param_t *cookie;
- int cookie_len;
+ __u8 extensions[4];
size_t chunksize;
- sctp_adaptation_ind_param_t aiparam;
- sctp_supported_ext_param_t ext_param;
int num_ext = 0;
- __u8 extensions[4];
- struct sctp_paramhdr *auth_chunks = NULL,
- *auth_hmacs = NULL,
- *auth_random = NULL;
-
- retval = NULL;
+ int cookie_len;
+ int addrs_len;
/* Note: there may be no addresses to embed. */
addrs = sctp_bind_addrs_to_raw(&asoc->base.bind_addr, &addrs_len, gfp);
@@ -468,8 +463,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
}
if (num_ext)
- chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
- num_ext);
+ chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext);
/* Now allocate and fill out the chunk. */
retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp);
@@ -495,10 +489,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param);
if (num_ext) {
ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT;
- ext_param.param_hdr.length =
- htons(sizeof(sctp_supported_ext_param_t) + num_ext);
- sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t),
- &ext_param);
+ ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext);
+ sctp_addto_chunk(retval, sizeof(ext_param), &ext_param);
sctp_addto_param(retval, num_ext, extensions);
}
if (asoc->peer.prsctp_capable)
@@ -567,11 +559,11 @@ nomem_cookie:
* to insure interoperability.
*/
struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc,
- const struct sctp_chunk *chunk)
+ const struct sctp_chunk *chunk)
{
struct sctp_chunk *retval;
- void *cookie;
int cookie_len;
+ void *cookie;
cookie = asoc->peer.cookie;
cookie_len = asoc->peer.cookie_len;
@@ -619,7 +611,7 @@ nodata:
* Set to zero on transmit and ignored on receipt.
*/
struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
- const struct sctp_chunk *chunk)
+ const struct sctp_chunk *chunk)
{
struct sctp_chunk *retval;
@@ -664,15 +656,15 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
* Note: The CWR is considered a Control chunk.
*/
struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
- const __u32 lowest_tsn,
- const struct sctp_chunk *chunk)
+ const __u32 lowest_tsn,
+ const struct sctp_chunk *chunk)
{
struct sctp_chunk *retval;
- sctp_cwrhdr_t cwr;
+ struct sctp_cwrhdr cwr;
cwr.lowest_tsn = htonl(lowest_tsn);
retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0,
- sizeof(sctp_cwrhdr_t), GFP_ATOMIC);
+ sizeof(cwr), GFP_ATOMIC);
if (!retval)
goto nodata;
@@ -699,14 +691,14 @@ nodata:
/* Make an ECNE chunk. This is a congestion experienced report. */
struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
- const __u32 lowest_tsn)
+ const __u32 lowest_tsn)
{
struct sctp_chunk *retval;
- sctp_ecnehdr_t ecne;
+ struct sctp_ecnehdr ecne;
ecne.lowest_tsn = htonl(lowest_tsn);
retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0,
- sizeof(sctp_ecnehdr_t), GFP_ATOMIC);
+ sizeof(ecne), GFP_ATOMIC);
if (!retval)
goto nodata;
retval->subh.ecne_hdr =
@@ -720,9 +712,9 @@ nodata:
* parameters. However, do not populate the data payload.
*/
struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
- const struct sctp_sndrcvinfo *sinfo,
- int data_len, __u8 flags, __u16 ssn,
- gfp_t gfp)
+ const struct sctp_sndrcvinfo *sinfo,
+ int data_len, __u8 flags, __u16 ssn,
+ gfp_t gfp)
{
struct sctp_chunk *retval;
struct sctp_datahdr dp;
@@ -760,15 +752,15 @@ nodata:
*/
struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
{
- struct sctp_chunk *retval;
- struct sctp_sackhdr sack;
- int len;
- __u32 ctsn;
- __u16 num_gabs, num_dup_tsns;
- struct sctp_association *aptr = (struct sctp_association *)asoc;
struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
+ struct sctp_association *aptr = (struct sctp_association *)asoc;
struct sctp_gap_ack_block gabs[SCTP_MAX_GABS];
+ __u16 num_gabs, num_dup_tsns;
struct sctp_transport *trans;
+ struct sctp_chunk *retval;
+ struct sctp_sackhdr sack;
+ __u32 ctsn;
+ int len;
memset(gabs, 0, sizeof(gabs));
ctsn = sctp_tsnmap_get_ctsn(map);
@@ -862,15 +854,15 @@ nodata:
struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
const struct sctp_chunk *chunk)
{
+ struct sctp_shutdownhdr shut;
struct sctp_chunk *retval;
- sctp_shutdownhdr_t shut;
__u32 ctsn;
ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
shut.cum_tsn_ack = htonl(ctsn);
retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
- sizeof(sctp_shutdownhdr_t), GFP_ATOMIC);
+ sizeof(shut), GFP_ATOMIC);
if (!retval)
goto nodata;
@@ -884,7 +876,7 @@ nodata:
}
struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
- const struct sctp_chunk *chunk)
+ const struct sctp_chunk *chunk)
{
struct sctp_chunk *retval;
@@ -907,8 +899,8 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
}
struct sctp_chunk *sctp_make_shutdown_complete(
- const struct sctp_association *asoc,
- const struct sctp_chunk *chunk)
+ const struct sctp_association *asoc,
+ const struct sctp_chunk *chunk)
{
struct sctp_chunk *retval;
__u8 flags = 0;
@@ -941,8 +933,8 @@ struct sctp_chunk *sctp_make_shutdown_complete(
* association, except when responding to an INIT (sctpimpguide 2.41).
*/
struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
- const struct sctp_chunk *chunk,
- const size_t hint)
+ const struct sctp_chunk *chunk,
+ const size_t hint)
{
struct sctp_chunk *retval;
__u8 flags = 0;
@@ -978,14 +970,15 @@ struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
/* Helper to create ABORT with a NO_USER_DATA error. */
struct sctp_chunk *sctp_make_abort_no_data(
- const struct sctp_association *asoc,
- const struct sctp_chunk *chunk, __u32 tsn)
+ const struct sctp_association *asoc,
+ const struct sctp_chunk *chunk,
+ __u32 tsn)
{
struct sctp_chunk *retval;
__be32 payload;
- retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t)
- + sizeof(tsn));
+ retval = sctp_make_abort(asoc, chunk,
+ sizeof(struct sctp_errhdr) + sizeof(tsn));
if (!retval)
goto no_mem;
@@ -1020,7 +1013,8 @@ struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc,
void *payload = NULL;
int err;
- retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen);
+ retval = sctp_make_abort(asoc, NULL,
+ sizeof(struct sctp_errhdr) + paylen);
if (!retval)
goto err_chunk;
@@ -1058,8 +1052,8 @@ err_chunk:
static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
const void *data)
{
- void *target;
int chunklen = ntohs(chunk->chunk_hdr->length);
+ void *target;
target = skb_put(chunk->skb, len);
@@ -1077,16 +1071,16 @@ static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
/* Make an ABORT chunk with a PROTOCOL VIOLATION cause code. */
struct sctp_chunk *sctp_make_abort_violation(
- const struct sctp_association *asoc,
- const struct sctp_chunk *chunk,
- const __u8 *payload,
- const size_t paylen)
+ const struct sctp_association *asoc,
+ const struct sctp_chunk *chunk,
+ const __u8 *payload,
+ const size_t paylen)
{
struct sctp_chunk *retval;
struct sctp_paramhdr phdr;
- retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen +
- sizeof(phdr));
+ retval = sctp_make_abort(asoc, chunk, sizeof(struct sctp_errhdr) +
+ paylen + sizeof(phdr));
if (!retval)
goto end;
@@ -1103,14 +1097,14 @@ end:
}
struct sctp_chunk *sctp_make_violation_paramlen(
- const struct sctp_association *asoc,
- const struct sctp_chunk *chunk,
- struct sctp_paramhdr *param)
+ const struct sctp_association *asoc,
+ const struct sctp_chunk *chunk,
+ struct sctp_paramhdr *param)
{
- struct sctp_chunk *retval;
static const char error[] = "The following parameter had invalid length:";
- size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) +
+ size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr) +
sizeof(*param);
+ struct sctp_chunk *retval;
retval = sctp_make_abort(asoc, chunk, payload_len);
if (!retval)
@@ -1126,12 +1120,12 @@ nodata:
}
struct sctp_chunk *sctp_make_violation_max_retrans(
- const struct sctp_association *asoc,
- const struct sctp_chunk *chunk)
+ const struct sctp_association *asoc,
+ const struct sctp_chunk *chunk)
{
- struct sctp_chunk *retval;
static const char error[] = "Association exceeded its max_retans count";
- size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t);
+ size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr);
+ struct sctp_chunk *retval;
retval = sctp_make_abort(asoc, chunk, payload_len);
if (!retval)
@@ -1146,10 +1140,10 @@ nodata:
/* Make a HEARTBEAT chunk. */
struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
- const struct sctp_transport *transport)
+ const struct sctp_transport *transport)
{
+ struct sctp_sender_hb_info hbinfo;
struct sctp_chunk *retval;
- sctp_sender_hb_info_t hbinfo;
retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0,
sizeof(hbinfo), GFP_ATOMIC);
@@ -1158,7 +1152,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
goto nodata;
hbinfo.param_hdr.type = SCTP_PARAM_HEARTBEAT_INFO;
- hbinfo.param_hdr.length = htons(sizeof(sctp_sender_hb_info_t));
+ hbinfo.param_hdr.length = htons(sizeof(hbinfo));
hbinfo.daddr = transport->ipaddr;
hbinfo.sent_at = jiffies;
hbinfo.hb_nonce = transport->hb_nonce;
@@ -1175,8 +1169,9 @@ nodata:
}
struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
- const struct sctp_chunk *chunk,
- const void *payload, const size_t paylen)
+ const struct sctp_chunk *chunk,
+ const void *payload,
+ const size_t paylen)
{
struct sctp_chunk *retval;
@@ -1207,14 +1202,15 @@ nodata:
* This routine can be used for containing multiple causes in the chunk.
*/
static struct sctp_chunk *sctp_make_op_error_space(
- const struct sctp_association *asoc,
- const struct sctp_chunk *chunk,
- size_t size)
+ const struct sctp_association *asoc,
+ const struct sctp_chunk *chunk,
+ size_t size)
{
struct sctp_chunk *retval;
retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0,
- sizeof(sctp_errhdr_t) + size, GFP_ATOMIC);
+ sizeof(struct sctp_errhdr) + size,
+ GFP_ATOMIC);
if (!retval)
goto nodata;
@@ -1240,8 +1236,8 @@ nodata:
* to report all the errors, if the incoming chunk is large
*/
static inline struct sctp_chunk *sctp_make_op_error_fixed(
- const struct sctp_association *asoc,
- const struct sctp_chunk *chunk)
+ const struct sctp_association *asoc,
+ const struct sctp_chunk *chunk)
{
size_t size = asoc ? asoc->pathmtu : 0;
@@ -1253,9 +1249,9 @@ static inline struct sctp_chunk *sctp_make_op_error_fixed(
/* Create an Operation Error chunk. */
struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
- const struct sctp_chunk *chunk,
- __be16 cause_code, const void *payload,
- size_t paylen, size_t reserve_tail)
+ const struct sctp_chunk *chunk,
+ __be16 cause_code, const void *payload,
+ size_t paylen, size_t reserve_tail)
{
struct sctp_chunk *retval;
@@ -1274,9 +1270,9 @@ nodata:
struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
{
- struct sctp_chunk *retval;
- struct sctp_hmac *hmac_desc;
struct sctp_authhdr auth_hdr;
+ struct sctp_hmac *hmac_desc;
+ struct sctp_chunk *retval;
__u8 *hmac;
/* Get the first hmac that the peer told us to use */
@@ -1285,16 +1281,16 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
return NULL;
retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0,
- hmac_desc->hmac_len + sizeof(sctp_authhdr_t),
- GFP_ATOMIC);
+ hmac_desc->hmac_len + sizeof(auth_hdr),
+ GFP_ATOMIC);
if (!retval)
return NULL;
auth_hdr.hmac_id = htons(hmac_desc->hmac_id);
auth_hdr.shkey_id = htons(asoc->active_key_id);
- retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(sctp_authhdr_t),
- &auth_hdr);
+ retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr),
+ &auth_hdr);
hmac = skb_put_zero(retval->skb, hmac_desc->hmac_len);
@@ -1322,8 +1318,8 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
*
*/
struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
- const struct sctp_association *asoc,
- struct sock *sk, gfp_t gfp)
+ const struct sctp_association *asoc,
+ struct sock *sk, gfp_t gfp)
{
struct sctp_chunk *retval;
@@ -1375,11 +1371,11 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk)
* arguments, reserving enough space for a 'paylen' byte payload.
*/
static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
- __u8 type, __u8 flags, int paylen,
- gfp_t gfp)
+ __u8 type, __u8 flags, int paylen,
+ gfp_t gfp)
{
- struct sctp_chunk *retval;
struct sctp_chunkhdr *chunk_hdr;
+ struct sctp_chunk *retval;
struct sk_buff *skb;
struct sock *sk;
@@ -1473,9 +1469,9 @@ void sctp_chunk_put(struct sctp_chunk *ch)
*/
void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
{
- void *target;
int chunklen = ntohs(chunk->chunk_hdr->length);
int padlen = SCTP_PAD4(chunklen) - chunklen;
+ void *target;
skb_put_zero(chunk->skb, padlen);
target = skb_put_data(chunk->skb, data, len);
@@ -1528,11 +1524,10 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int len,
*/
void sctp_chunk_assign_ssn(struct sctp_chunk *chunk)
{
- struct sctp_datamsg *msg;
- struct sctp_chunk *lchunk;
struct sctp_stream *stream;
- __u16 ssn;
- __u16 sid;
+ struct sctp_chunk *lchunk;
+ struct sctp_datamsg *msg;
+ __u16 ssn, sid;
if (chunk->has_ssn)
return;
@@ -1577,12 +1572,12 @@ void sctp_chunk_assign_tsn(struct sctp_chunk *chunk)
/* Create a CLOSED association to use with an incoming packet. */
struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
- struct sctp_chunk *chunk,
- gfp_t gfp)
+ struct sctp_chunk *chunk,
+ gfp_t gfp)
{
struct sctp_association *asoc;
+ enum sctp_scope scope;
struct sk_buff *skb;
- sctp_scope_t scope;
/* Create the bare association. */
scope = sctp_scope(sctp_source(chunk));
@@ -1601,14 +1596,15 @@ nodata:
/* Build a cookie representing asoc.
* This INCLUDES the param header needed to put the cookie in the INIT ACK.
*/
-static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const struct sctp_chunk *init_chunk,
- int *cookie_len,
- const __u8 *raw_addrs, int addrs_len)
+static struct sctp_cookie_param *sctp_pack_cookie(
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const struct sctp_chunk *init_chunk,
+ int *cookie_len, const __u8 *raw_addrs,
+ int addrs_len)
{
- sctp_cookie_param_t *retval;
struct sctp_signed_cookie *cookie;
+ struct sctp_cookie_param *retval;
int headersize, bodysize;
/* Header size is static data prior to the actual cookie, including
@@ -1692,19 +1688,19 @@ nodata:
/* Unpack the cookie from COOKIE ECHO chunk, recreating the association. */
struct sctp_association *sctp_unpack_cookie(
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- struct sctp_chunk *chunk, gfp_t gfp,
- int *error, struct sctp_chunk **errp)
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ struct sctp_chunk *chunk, gfp_t gfp,
+ int *error, struct sctp_chunk **errp)
{
struct sctp_association *retval = NULL;
+ int headersize, bodysize, fixed_size;
struct sctp_signed_cookie *cookie;
+ struct sk_buff *skb = chunk->skb;
struct sctp_cookie *bear_cookie;
- int headersize, bodysize, fixed_size;
__u8 *digest = ep->digest;
+ enum sctp_scope scope;
unsigned int len;
- sctp_scope_t scope;
- struct sk_buff *skb = chunk->skb;
ktime_t kt;
/* Header size is static data prior to the actual cookie, including
@@ -1976,8 +1972,8 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
static int sctp_verify_ext_param(struct net *net, union sctp_params param)
{
__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
- int have_auth = 0;
int have_asconf = 0;
+ int have_auth = 0;
int i;
for (i = 0; i < num_ext; i++) {
@@ -2007,10 +2003,10 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param)
}
static void sctp_process_ext_param(struct sctp_association *asoc,
- union sctp_params param)
+ union sctp_params param)
{
- struct net *net = sock_net(asoc->base.sk);
__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
+ struct net *net = sock_net(asoc->base.sk);
int i;
for (i = 0; i < num_ext; i++) {
@@ -2067,10 +2063,11 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
* SCTP_IERROR_ERROR - stop and report an error.
* SCTP_IERROR_NOMEME - out of memory.
*/
-static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
- union sctp_params param,
- struct sctp_chunk *chunk,
- struct sctp_chunk **errp)
+static enum sctp_ierror sctp_process_unk_param(
+ const struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_chunk *chunk,
+ struct sctp_chunk **errp)
{
int retval = SCTP_IERROR_NO_ERROR;
@@ -2119,13 +2116,13 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
* SCTP_IERROR_ERROR - stop processing, trigger an ERROR
* SCTP_IERROR_NO_ERROR - continue with the chunk
*/
-static sctp_ierror_t sctp_verify_param(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- union sctp_params param,
- enum sctp_cid cid,
- struct sctp_chunk *chunk,
- struct sctp_chunk **err_chunk)
+static enum sctp_ierror sctp_verify_param(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ union sctp_params param,
+ enum sctp_cid cid,
+ struct sctp_chunk *chunk,
+ struct sctp_chunk **err_chunk)
{
struct sctp_hmac_algo_param *hmacs;
int retval = SCTP_IERROR_NO_ERROR;
@@ -2310,13 +2307,13 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
struct sctp_init_chunk *peer_init, gfp_t gfp)
{
struct net *net = sock_net(asoc->base.sk);
- union sctp_params param;
struct sctp_transport *transport;
struct list_head *pos, *temp;
- struct sctp_af *af;
+ union sctp_params param;
union sctp_addr addr;
- char *cookie;
+ struct sctp_af *af;
int src_match = 0;
+ char *cookie;
/* We must include the address that the INIT packet came from.
* This is the only address that matters for an INIT packet.
@@ -2500,16 +2497,15 @@ static int sctp_process_param(struct sctp_association *asoc,
gfp_t gfp)
{
struct net *net = sock_net(asoc->base.sk);
- union sctp_addr addr;
- int i;
- __u16 sat;
- int retval = 1;
- sctp_scope_t scope;
- u32 stale;
- struct sctp_af *af;
+ struct sctp_endpoint *ep = asoc->ep;
union sctp_addr_param *addr_param;
struct sctp_transport *t;
- struct sctp_endpoint *ep = asoc->ep;
+ enum sctp_scope scope;
+ union sctp_addr addr;
+ struct sctp_af *af;
+ int retval = 1, i;
+ u32 stale;
+ __u16 sat;
/* We maintain all INIT parameters in network byte order all the
* time. This allows us to not worry about whether the parameters
@@ -2617,7 +2613,7 @@ do_addr_param:
if (!net->sctp.addip_enable)
goto fall_through;
- addr_param = param.v + sizeof(sctp_addip_param_t);
+ addr_param = param.v + sizeof(struct sctp_addip_param);
af = sctp_get_af_specific(param_type2af(addr_param->p.type));
if (af == NULL)
@@ -2754,7 +2750,7 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc,
union sctp_addr *addr,
int vparam_len)
{
- sctp_addiphdr_t asconf;
+ struct sctp_addiphdr asconf;
struct sctp_chunk *retval;
int length = sizeof(asconf) + vparam_len;
union sctp_addr_param addrparam;
@@ -2807,22 +2803,20 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc,
*
*/
struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
- union sctp_addr *laddr,
- struct sockaddr *addrs,
- int addrcnt,
- __be16 flags)
-{
- sctp_addip_param_t param;
- struct sctp_chunk *retval;
- union sctp_addr_param addr_param;
- union sctp_addr *addr;
- void *addr_buf;
- struct sctp_af *af;
- int paramlen = sizeof(param);
- int addr_param_len = 0;
- int totallen = 0;
- int i;
- int del_pickup = 0;
+ union sctp_addr *laddr,
+ struct sockaddr *addrs,
+ int addrcnt, __be16 flags)
+{
+ union sctp_addr_param addr_param;
+ struct sctp_addip_param param;
+ int paramlen = sizeof(param);
+ struct sctp_chunk *retval;
+ int addr_param_len = 0;
+ union sctp_addr *addr;
+ int totallen = 0, i;
+ int del_pickup = 0;
+ struct sctp_af *af;
+ void *addr_buf;
/* Get total length of all the address parameters. */
addr_buf = addrs;
@@ -2860,7 +2854,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
addr_param_len = af->to_addr_param(addr, &addr_param);
param.param_hdr.type = flags;
param.param_hdr.length = htons(paramlen + addr_param_len);
- param.crr_id = i;
+ param.crr_id = htonl(i);
sctp_addto_chunk(retval, paramlen, &param);
sctp_addto_chunk(retval, addr_param_len, &addr_param);
@@ -2873,7 +2867,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
addr_param_len = af->to_addr_param(addr, &addr_param);
param.param_hdr.type = SCTP_PARAM_DEL_IP;
param.param_hdr.length = htons(paramlen + addr_param_len);
- param.crr_id = i;
+ param.crr_id = htonl(i);
sctp_addto_chunk(retval, paramlen, &param);
sctp_addto_chunk(retval, addr_param_len, &addr_param);
@@ -2898,12 +2892,12 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
union sctp_addr *addr)
{
- sctp_addip_param_t param;
- struct sctp_chunk *retval;
- int len = sizeof(param);
- union sctp_addr_param addrparam;
- int addrlen;
- struct sctp_af *af = sctp_get_af_specific(addr->v4.sin_family);
+ struct sctp_af *af = sctp_get_af_specific(addr->v4.sin_family);
+ union sctp_addr_param addrparam;
+ struct sctp_addip_param param;
+ struct sctp_chunk *retval;
+ int len = sizeof(param);
+ int addrlen;
addrlen = af->to_addr_param(addr, &addrparam);
if (!addrlen)
@@ -2947,9 +2941,9 @@ struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *asoc,
__u32 serial, int vparam_len)
{
- sctp_addiphdr_t asconf;
- struct sctp_chunk *retval;
- int length = sizeof(asconf) + vparam_len;
+ struct sctp_addiphdr asconf;
+ struct sctp_chunk *retval;
+ int length = sizeof(asconf) + vparam_len;
/* Create the chunk. */
retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length,
@@ -2967,13 +2961,14 @@ static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *as
/* Add response parameters to an ASCONF_ACK chunk. */
static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id,
- __be16 err_code, sctp_addip_param_t *asconf_param)
+ __be16 err_code,
+ struct sctp_addip_param *asconf_param)
{
- sctp_addip_param_t ack_param;
- sctp_errhdr_t err_param;
- int asconf_param_len = 0;
- int err_param_len = 0;
- __be16 response_type;
+ struct sctp_addip_param ack_param;
+ struct sctp_errhdr err_param;
+ int asconf_param_len = 0;
+ int err_param_len = 0;
+ __be16 response_type;
if (SCTP_ERROR_NO_ERROR == err_code) {
response_type = SCTP_PARAM_SUCCESS_REPORT;
@@ -3008,15 +3003,15 @@ static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id,
/* Process a asconf parameter. */
static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
- struct sctp_chunk *asconf,
- sctp_addip_param_t *asconf_param)
+ struct sctp_chunk *asconf,
+ struct sctp_addip_param *asconf_param)
{
+ union sctp_addr_param *addr_param;
struct sctp_transport *peer;
- struct sctp_af *af;
union sctp_addr addr;
- union sctp_addr_param *addr_param;
+ struct sctp_af *af;
- addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t);
+ addr_param = (void *)asconf_param + sizeof(*asconf_param);
if (asconf_param->param_hdr.type != SCTP_PARAM_ADD_IP &&
asconf_param->param_hdr.type != SCTP_PARAM_DEL_IP &&
@@ -3141,10 +3136,11 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
struct sctp_chunk *chunk, bool addr_param_needed,
struct sctp_paramhdr **errp)
{
- sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) chunk->chunk_hdr;
- union sctp_params param;
+ struct sctp_addip_chunk *addip;
bool addr_param_seen = false;
+ union sctp_params param;
+ addip = (struct sctp_addip_chunk *)chunk->chunk_hdr;
sctp_walk_params(param, addip, addip_hdr.params) {
size_t length = ntohs(param.p->length);
@@ -3153,7 +3149,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
case SCTP_PARAM_ERR_CAUSE:
break;
case SCTP_PARAM_IPV4_ADDRESS:
- if (length != sizeof(sctp_ipv4addr_param_t))
+ if (length != sizeof(struct sctp_ipv4addr_param))
return false;
/* ensure there is only one addr param and it's in the
* beginning of addip_hdr params, or we reject it.
@@ -3163,7 +3159,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
addr_param_seen = true;
break;
case SCTP_PARAM_IPV6_ADDRESS:
- if (length != sizeof(sctp_ipv6addr_param_t))
+ if (length != sizeof(struct sctp_ipv6addr_param))
return false;
if (param.v != addip->addip_hdr.params)
return false;
@@ -3176,13 +3172,13 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
if (addr_param_needed && !addr_param_seen)
return false;
length = ntohs(param.addip->param_hdr.length);
- if (length < sizeof(sctp_addip_param_t) +
+ if (length < sizeof(struct sctp_addip_param) +
sizeof(**errp))
return false;
break;
case SCTP_PARAM_SUCCESS_REPORT:
case SCTP_PARAM_ADAPTATION_LAYER_IND:
- if (length != sizeof(sctp_addip_param_t))
+ if (length != sizeof(struct sctp_addip_param))
return false;
break;
default:
@@ -3208,24 +3204,24 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
struct sctp_chunk *asconf)
{
- sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) asconf->chunk_hdr;
+ union sctp_addr_param *addr_param;
+ struct sctp_addip_chunk *addip;
+ struct sctp_chunk *asconf_ack;
bool all_param_pass = true;
+ struct sctp_addiphdr *hdr;
+ int length = 0, chunk_len;
union sctp_params param;
- sctp_addiphdr_t *hdr;
- union sctp_addr_param *addr_param;
- struct sctp_chunk *asconf_ack;
- __be16 err_code;
- int length = 0;
- int chunk_len;
- __u32 serial;
+ __be16 err_code;
+ __u32 serial;
+ addip = (struct sctp_addip_chunk *)asconf->chunk_hdr;
chunk_len = ntohs(asconf->chunk_hdr->length) -
sizeof(struct sctp_chunkhdr);
- hdr = (sctp_addiphdr_t *)asconf->skb->data;
+ hdr = (struct sctp_addiphdr *)asconf->skb->data;
serial = ntohl(hdr->serial);
/* Skip the addiphdr and store a pointer to address parameter. */
- length = sizeof(sctp_addiphdr_t);
+ length = sizeof(*hdr);
addr_param = (union sctp_addr_param *)(asconf->skb->data + length);
chunk_len -= length;
@@ -3291,16 +3287,16 @@ done:
/* Process a asconf parameter that is successfully acked. */
static void sctp_asconf_param_success(struct sctp_association *asoc,
- sctp_addip_param_t *asconf_param)
+ struct sctp_addip_param *asconf_param)
{
- struct sctp_af *af;
- union sctp_addr addr;
struct sctp_bind_addr *bp = &asoc->base.bind_addr;
union sctp_addr_param *addr_param;
- struct sctp_transport *transport;
struct sctp_sockaddr_entry *saddr;
+ struct sctp_transport *transport;
+ union sctp_addr addr;
+ struct sctp_af *af;
- addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t);
+ addr_param = (void *)asconf_param + sizeof(*asconf_param);
/* We have checked the packet before, so we do not check again. */
af = sctp_get_af_specific(param_type2af(addr_param->p.type));
@@ -3351,14 +3347,14 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
* specific success indication is present for the parameter.
*/
static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
- sctp_addip_param_t *asconf_param,
- int no_err)
+ struct sctp_addip_param *asconf_param,
+ int no_err)
{
- sctp_addip_param_t *asconf_ack_param;
- sctp_errhdr_t *err_param;
- int length;
- int asconf_ack_len;
- __be16 err_code;
+ struct sctp_addip_param *asconf_ack_param;
+ struct sctp_errhdr *err_param;
+ int asconf_ack_len;
+ __be16 err_code;
+ int length;
if (no_err)
err_code = SCTP_ERROR_NO_ERROR;
@@ -3371,9 +3367,9 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
/* Skip the addiphdr from the asconf_ack chunk and store a pointer to
* the first asconf_ack parameter.
*/
- length = sizeof(sctp_addiphdr_t);
- asconf_ack_param = (sctp_addip_param_t *)(asconf_ack->skb->data +
- length);
+ length = sizeof(struct sctp_addiphdr);
+ asconf_ack_param = (struct sctp_addip_param *)(asconf_ack->skb->data +
+ length);
asconf_ack_len -= length;
while (asconf_ack_len > 0) {
@@ -3382,7 +3378,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
case SCTP_PARAM_SUCCESS_REPORT:
return SCTP_ERROR_NO_ERROR;
case SCTP_PARAM_ERR_CAUSE:
- length = sizeof(sctp_addip_param_t);
+ length = sizeof(*asconf_ack_param);
err_param = (void *)asconf_ack_param + length;
asconf_ack_len -= length;
if (asconf_ack_len > 0)
@@ -3407,20 +3403,20 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
int sctp_process_asconf_ack(struct sctp_association *asoc,
struct sctp_chunk *asconf_ack)
{
- struct sctp_chunk *asconf = asoc->addip_last_asconf;
- union sctp_addr_param *addr_param;
- sctp_addip_param_t *asconf_param;
- int length = 0;
- int asconf_len = asconf->skb->len;
- int all_param_pass = 0;
- int no_err = 1;
- int retval = 0;
- __be16 err_code = SCTP_ERROR_NO_ERROR;
+ struct sctp_chunk *asconf = asoc->addip_last_asconf;
+ struct sctp_addip_param *asconf_param;
+ __be16 err_code = SCTP_ERROR_NO_ERROR;
+ union sctp_addr_param *addr_param;
+ int asconf_len = asconf->skb->len;
+ int all_param_pass = 0;
+ int length = 0;
+ int no_err = 1;
+ int retval = 0;
/* Skip the chunkhdr and addiphdr from the last asconf sent and store
* a pointer to address parameter.
*/
- length = sizeof(sctp_addip_chunk_t);
+ length = sizeof(struct sctp_addip_chunk);
addr_param = (union sctp_addr_param *)(asconf->skb->data + length);
asconf_len -= length;
@@ -3436,7 +3432,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
* failures are indicated, then all request(s) are considered
* successful.
*/
- if (asconf_ack->skb->len == sizeof(sctp_addiphdr_t))
+ if (asconf_ack->skb->len == sizeof(struct sctp_addiphdr))
all_param_pass = 1;
/* Process the TLVs contained in the last sent ASCONF chunk. */
@@ -3542,9 +3538,8 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
* \ \
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*/
-static struct sctp_chunk *sctp_make_reconf(
- const struct sctp_association *asoc,
- int length)
+static struct sctp_chunk *sctp_make_reconf(const struct sctp_association *asoc,
+ int length)
{
struct sctp_reconf_chunk *reconf;
struct sctp_chunk *retval;
@@ -3595,12 +3590,12 @@ static struct sctp_chunk *sctp_make_reconf(
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*/
struct sctp_chunk *sctp_make_strreset_req(
- const struct sctp_association *asoc,
- __u16 stream_num, __u16 *stream_list,
- bool out, bool in)
+ const struct sctp_association *asoc,
+ __u16 stream_num, __be16 *stream_list,
+ bool out, bool in)
{
+ __u16 stream_len = stream_num * sizeof(__u16);
struct sctp_strreset_outreq outreq;
- __u16 stream_len = stream_num * 2;
struct sctp_strreset_inreq inreq;
struct sctp_chunk *retval;
__u16 outlen, inlen;
@@ -3649,7 +3644,7 @@ struct sctp_chunk *sctp_make_strreset_req(
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*/
struct sctp_chunk *sctp_make_strreset_tsnreq(
- const struct sctp_association *asoc)
+ const struct sctp_association *asoc)
{
struct sctp_strreset_tsnreq tsnreq;
__u16 length = sizeof(tsnreq);
@@ -3680,8 +3675,8 @@ struct sctp_chunk *sctp_make_strreset_tsnreq(
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*/
struct sctp_chunk *sctp_make_strreset_addstrm(
- const struct sctp_association *asoc,
- __u16 out, __u16 in)
+ const struct sctp_association *asoc,
+ __u16 out, __u16 in)
{
struct sctp_strreset_addstrm addstrm;
__u16 size = sizeof(addstrm);
@@ -3725,9 +3720,8 @@ struct sctp_chunk *sctp_make_strreset_addstrm(
* | Result |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*/
-struct sctp_chunk *sctp_make_strreset_resp(
- const struct sctp_association *asoc,
- __u32 result, __u32 sn)
+struct sctp_chunk *sctp_make_strreset_resp(const struct sctp_association *asoc,
+ __u32 result, __u32 sn)
{
struct sctp_strreset_resp resp;
__u16 length = sizeof(resp);
@@ -3762,10 +3756,10 @@ struct sctp_chunk *sctp_make_strreset_resp(
* | Receiver's Next TSN (optional) |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*/
-struct sctp_chunk *sctp_make_strreset_tsnresp(
- struct sctp_association *asoc,
- __u32 result, __u32 sn,
- __u32 sender_tsn, __u32 receiver_tsn)
+struct sctp_chunk *sctp_make_strreset_tsnresp(struct sctp_association *asoc,
+ __u32 result, __u32 sn,
+ __u32 sender_tsn,
+ __u32 receiver_tsn)
{
struct sctp_strreset_resptsn tsnresp;
__u16 length = sizeof(tsnresp);
@@ -3794,7 +3788,8 @@ bool sctp_verify_reconf(const struct sctp_association *asoc,
{
struct sctp_reconf_chunk *hdr;
union sctp_params param;
- __u16 last = 0, cnt = 0;
+ __be16 last = 0;
+ __u16 cnt = 0;
hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr;
sctp_walk_params(param, hdr, params) {
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index d6e5e9e0fd6d..df94d77401e7 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -50,23 +50,25 @@
#include <net/sock.h>
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
-static int sctp_cmd_interpreter(sctp_event_t event_type,
- sctp_subtype_t subtype,
- sctp_state_t state,
+static int sctp_cmd_interpreter(enum sctp_event event_type,
+ union sctp_subtype subtype,
+ enum sctp_state state,
struct sctp_endpoint *ep,
struct sctp_association *asoc,
void *event_arg,
- sctp_disposition_t status,
- sctp_cmd_seq_t *commands,
+ enum sctp_disposition status,
+ struct sctp_cmd_seq *commands,
gfp_t gfp);
-static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
- sctp_state_t state,
+static int sctp_side_effects(enum sctp_event event_type,
+ union sctp_subtype subtype,
+ enum sctp_state state,
struct sctp_endpoint *ep,
struct sctp_association **asoc,
void *event_arg,
- sctp_disposition_t status,
- sctp_cmd_seq_t *commands,
+ enum sctp_disposition status,
+ struct sctp_cmd_seq *commands,
gfp_t gfp);
/********************************************************************
@@ -96,8 +98,8 @@ static void sctp_do_ecn_ce_work(struct sctp_association *asoc,
* that was originally marked with the CE bit.
*/
static struct sctp_chunk *sctp_do_ecn_ecne_work(struct sctp_association *asoc,
- __u32 lowest_tsn,
- struct sctp_chunk *chunk)
+ __u32 lowest_tsn,
+ struct sctp_chunk *chunk)
{
struct sctp_chunk *repl;
@@ -149,11 +151,11 @@ static void sctp_do_ecn_cwr_work(struct sctp_association *asoc,
/* Generate SACK if necessary. We call this at the end of a packet. */
static int sctp_gen_sack(struct sctp_association *asoc, int force,
- sctp_cmd_seq_t *commands)
+ struct sctp_cmd_seq *commands)
{
+ struct sctp_transport *trans = asoc->peer.last_data_from;
__u32 ctsn, max_tsn_seen;
struct sctp_chunk *sack;
- struct sctp_transport *trans = asoc->peer.last_data_from;
int error = 0;
if (force ||
@@ -241,13 +243,14 @@ nomem:
/* When the T3-RTX timer expires, it calls this function to create the
* relevant state machine event.
*/
-void sctp_generate_t3_rtx_event(unsigned long peer)
+void sctp_generate_t3_rtx_event(struct timer_list *t)
{
- int error;
- struct sctp_transport *transport = (struct sctp_transport *) peer;
+ struct sctp_transport *transport =
+ from_timer(transport, t, T3_rtx_timer);
struct sctp_association *asoc = transport->asoc;
struct sock *sk = asoc->base.sk;
struct net *net = sock_net(sk);
+ int error;
/* Check whether a task is in the sock. */
@@ -280,7 +283,7 @@ out_unlock:
* for timeouts which use the association as their parameter.
*/
static void sctp_generate_timeout_event(struct sctp_association *asoc,
- sctp_event_timeout_t timeout_type)
+ enum sctp_event_timeout timeout_type)
{
struct sock *sk = asoc->base.sk;
struct net *net = sock_net(sk);
@@ -317,55 +320,68 @@ out_unlock:
sctp_association_put(asoc);
}
-static void sctp_generate_t1_cookie_event(unsigned long data)
+static void sctp_generate_t1_cookie_event(struct timer_list *t)
{
- struct sctp_association *asoc = (struct sctp_association *) data;
+ struct sctp_association *asoc =
+ from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_COOKIE]);
+
sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_COOKIE);
}
-static void sctp_generate_t1_init_event(unsigned long data)
+static void sctp_generate_t1_init_event(struct timer_list *t)
{
- struct sctp_association *asoc = (struct sctp_association *) data;
+ struct sctp_association *asoc =
+ from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_INIT]);
+
sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_INIT);
}
-static void sctp_generate_t2_shutdown_event(unsigned long data)
+static void sctp_generate_t2_shutdown_event(struct timer_list *t)
{
- struct sctp_association *asoc = (struct sctp_association *) data;
+ struct sctp_association *asoc =
+ from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN]);
+
sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T2_SHUTDOWN);
}
-static void sctp_generate_t4_rto_event(unsigned long data)
+static void sctp_generate_t4_rto_event(struct timer_list *t)
{
- struct sctp_association *asoc = (struct sctp_association *) data;
+ struct sctp_association *asoc =
+ from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T4_RTO]);
+
sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T4_RTO);
}
-static void sctp_generate_t5_shutdown_guard_event(unsigned long data)
+static void sctp_generate_t5_shutdown_guard_event(struct timer_list *t)
{
- struct sctp_association *asoc = (struct sctp_association *)data;
+ struct sctp_association *asoc =
+ from_timer(asoc, t,
+ timers[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD]);
+
sctp_generate_timeout_event(asoc,
SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD);
} /* sctp_generate_t5_shutdown_guard_event() */
-static void sctp_generate_autoclose_event(unsigned long data)
+static void sctp_generate_autoclose_event(struct timer_list *t)
{
- struct sctp_association *asoc = (struct sctp_association *) data;
+ struct sctp_association *asoc =
+ from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]);
+
sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_AUTOCLOSE);
}
/* Generate a heart beat event. If the sock is busy, reschedule. Make
* sure that the transport is still valid.
*/
-void sctp_generate_heartbeat_event(unsigned long data)
+void sctp_generate_heartbeat_event(struct timer_list *t)
{
- int error = 0;
- struct sctp_transport *transport = (struct sctp_transport *) data;
+ struct sctp_transport *transport = from_timer(transport, t, hb_timer);
struct sctp_association *asoc = transport->asoc;
struct sock *sk = asoc->base.sk;
struct net *net = sock_net(sk);
u32 elapsed, timeout;
+ int error = 0;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
@@ -403,9 +419,10 @@ out_unlock:
/* Handle the timeout of the ICMP protocol unreachable timer. Trigger
* the correct state machine transition that will close the association.
*/
-void sctp_generate_proto_unreach_event(unsigned long data)
+void sctp_generate_proto_unreach_event(struct timer_list *t)
{
- struct sctp_transport *transport = (struct sctp_transport *) data;
+ struct sctp_transport *transport =
+ from_timer(transport, t, proto_unreach_timer);
struct sctp_association *asoc = transport->asoc;
struct sock *sk = asoc->base.sk;
struct net *net = sock_net(sk);
@@ -437,9 +454,10 @@ out_unlock:
}
/* Handle the timeout of the RE-CONFIG timer. */
-void sctp_generate_reconf_event(unsigned long data)
+void sctp_generate_reconf_event(struct timer_list *t)
{
- struct sctp_transport *transport = (struct sctp_transport *)data;
+ struct sctp_transport *transport =
+ from_timer(transport, t, reconf_timer);
struct sctp_association *asoc = transport->asoc;
struct sock *sk = asoc->base.sk;
struct net *net = sock_net(sk);
@@ -469,24 +487,27 @@ out_unlock:
}
/* Inject a SACK Timeout event into the state machine. */
-static void sctp_generate_sack_event(unsigned long data)
+static void sctp_generate_sack_event(struct timer_list *t)
{
- struct sctp_association *asoc = (struct sctp_association *) data;
+ struct sctp_association *asoc =
+ from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_SACK]);
+
sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_SACK);
}
sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
- NULL,
- sctp_generate_t1_cookie_event,
- sctp_generate_t1_init_event,
- sctp_generate_t2_shutdown_event,
- NULL,
- sctp_generate_t4_rto_event,
- sctp_generate_t5_shutdown_guard_event,
- NULL,
- NULL,
- sctp_generate_sack_event,
- sctp_generate_autoclose_event,
+ [SCTP_EVENT_TIMEOUT_NONE] = NULL,
+ [SCTP_EVENT_TIMEOUT_T1_COOKIE] = sctp_generate_t1_cookie_event,
+ [SCTP_EVENT_TIMEOUT_T1_INIT] = sctp_generate_t1_init_event,
+ [SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = sctp_generate_t2_shutdown_event,
+ [SCTP_EVENT_TIMEOUT_T3_RTX] = NULL,
+ [SCTP_EVENT_TIMEOUT_T4_RTO] = sctp_generate_t4_rto_event,
+ [SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD] =
+ sctp_generate_t5_shutdown_guard_event,
+ [SCTP_EVENT_TIMEOUT_HEARTBEAT] = NULL,
+ [SCTP_EVENT_TIMEOUT_RECONF] = NULL,
+ [SCTP_EVENT_TIMEOUT_SACK] = sctp_generate_sack_event,
+ [SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sctp_generate_autoclose_event,
};
@@ -505,7 +526,7 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
* notification SHOULD be sent to the upper layer.
*
*/
-static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
+static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands,
struct sctp_association *asoc,
struct sctp_transport *transport,
int is_hb)
@@ -577,7 +598,7 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
}
/* Worker routine to handle INIT command failure. */
-static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
+static void sctp_cmd_init_failed(struct sctp_cmd_seq *commands,
struct sctp_association *asoc,
unsigned int error)
{
@@ -600,15 +621,16 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
}
/* Worker routine to handle SCTP_CMD_ASSOC_FAILED. */
-static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
+static void sctp_cmd_assoc_failed(struct sctp_cmd_seq *commands,
struct sctp_association *asoc,
- sctp_event_t event_type,
- sctp_subtype_t subtype,
+ enum sctp_event event_type,
+ union sctp_subtype subtype,
struct sctp_chunk *chunk,
unsigned int error)
{
struct sctp_ulpevent *event;
struct sctp_chunk *abort;
+
/* Cancel any partial delivery in progress. */
sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
@@ -644,7 +666,7 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
* since all other cases use "temporary" associations and can do all
* their work in statefuns directly.
*/
-static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
+static int sctp_cmd_process_init(struct sctp_cmd_seq *commands,
struct sctp_association *asoc,
struct sctp_chunk *chunk,
struct sctp_init_chunk *peer_init,
@@ -666,7 +688,7 @@ static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
}
/* Helper function to break out starting up of heartbeat timers. */
-static void sctp_cmd_hb_timers_start(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_hb_timers_start(struct sctp_cmd_seq *cmds,
struct sctp_association *asoc)
{
struct sctp_transport *t;
@@ -679,7 +701,7 @@ static void sctp_cmd_hb_timers_start(sctp_cmd_seq_t *cmds,
sctp_transport_reset_hb_timer(t);
}
-static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_hb_timers_stop(struct sctp_cmd_seq *cmds,
struct sctp_association *asoc)
{
struct sctp_transport *t;
@@ -694,7 +716,7 @@ static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds,
}
/* Helper function to stop any pending T3-RTX timers */
-static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_t3_rtx_timers_stop(struct sctp_cmd_seq *cmds,
struct sctp_association *asoc)
{
struct sctp_transport *t;
@@ -708,12 +730,12 @@ static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds,
/* Helper function to handle the reception of an HEARTBEAT ACK. */
-static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_transport_on(struct sctp_cmd_seq *cmds,
struct sctp_association *asoc,
struct sctp_transport *t,
struct sctp_chunk *chunk)
{
- sctp_sender_hb_info_t *hbinfo;
+ struct sctp_sender_hb_info *hbinfo;
int was_unconfirmed = 0;
/* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the
@@ -767,7 +789,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
if (t->rto_pending == 0)
t->rto_pending = 1;
- hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
+ hbinfo = (struct sctp_sender_hb_info *)chunk->skb->data;
sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at));
/* Update the heartbeat timer. */
@@ -779,7 +801,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
/* Helper function to process the process SACK command. */
-static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
+static int sctp_cmd_process_sack(struct sctp_cmd_seq *cmds,
struct sctp_association *asoc,
struct sctp_chunk *chunk)
{
@@ -801,7 +823,7 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
/* Helper function to set the timeout value for T2-SHUTDOWN timer and to set
* the transport for a shutdown chunk.
*/
-static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_setup_t2(struct sctp_cmd_seq *cmds,
struct sctp_association *asoc,
struct sctp_chunk *chunk)
{
@@ -818,7 +840,7 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
}
-static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_assoc_update(struct sctp_cmd_seq *cmds,
struct sctp_association *asoc,
struct sctp_association *new)
{
@@ -828,7 +850,7 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
if (!sctp_assoc_update(asoc, new))
return;
- abort = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t));
+ abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr));
if (abort) {
sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
@@ -841,9 +863,9 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
}
/* Helper function to change the state of an association. */
-static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds,
struct sctp_association *asoc,
- sctp_state_t state)
+ enum sctp_state state)
{
struct sock *sk = asoc->base.sk;
@@ -901,7 +923,7 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
}
/* Helper function to delete an association. */
-static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_delete_tcb(struct sctp_cmd_seq *cmds,
struct sctp_association *asoc)
{
struct sock *sk = asoc->base.sk;
@@ -923,9 +945,9 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds,
* destination address (we use active path instead of primary path just
* because primary path may be inactive.
*/
-static void sctp_cmd_setup_t4(sctp_cmd_seq_t *cmds,
- struct sctp_association *asoc,
- struct sctp_chunk *chunk)
+static void sctp_cmd_setup_t4(struct sctp_cmd_seq *cmds,
+ struct sctp_association *asoc,
+ struct sctp_chunk *chunk)
{
struct sctp_transport *t;
@@ -935,7 +957,7 @@ static void sctp_cmd_setup_t4(sctp_cmd_seq_t *cmds,
}
/* Process an incoming Operation Error Chunk. */
-static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds,
struct sctp_association *asoc,
struct sctp_chunk *chunk)
{
@@ -990,6 +1012,7 @@ static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq,
struct sctp_chunk *chunk)
{
struct sctp_fwdtsn_skip *skip;
+
/* Walk through all the skipped SSNs */
sctp_walk_fwdtsn(skip, chunk) {
sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn));
@@ -1002,8 +1025,8 @@ static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq,
static void sctp_cmd_del_non_primary(struct sctp_association *asoc)
{
struct sctp_transport *t;
- struct list_head *pos;
struct list_head *temp;
+ struct list_head *pos;
list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
t = list_entry(pos, struct sctp_transport, transports);
@@ -1024,9 +1047,9 @@ static void sctp_cmd_set_sk_err(struct sctp_association *asoc, int error)
}
/* Helper function to generate an association change event */
-static void sctp_cmd_assoc_change(sctp_cmd_seq_t *commands,
- struct sctp_association *asoc,
- u8 state)
+static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands,
+ struct sctp_association *asoc,
+ u8 state)
{
struct sctp_ulpevent *ev;
@@ -1039,7 +1062,7 @@ static void sctp_cmd_assoc_change(sctp_cmd_seq_t *commands,
}
/* Helper function to generate an adaptation indication event */
-static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands,
+static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands,
struct sctp_association *asoc)
{
struct sctp_ulpevent *ev;
@@ -1052,8 +1075,8 @@ static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands,
static void sctp_cmd_t1_timer_update(struct sctp_association *asoc,
- sctp_event_timeout_t timer,
- char *name)
+ enum sctp_event_timeout timer,
+ char *name)
{
struct sctp_transport *t;
@@ -1086,6 +1109,8 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc,
list_for_each_entry(chunk, &msg->chunks, frag_list)
sctp_outq_tail(&asoc->outqueue, chunk, gfp);
+
+ asoc->outqueue.sched->enqueue(&asoc->outqueue, msg);
}
@@ -1139,22 +1164,20 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
* If you want to understand all of lksctp, this is a
* good place to start.
*/
-int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
- sctp_state_t state,
- struct sctp_endpoint *ep,
- struct sctp_association *asoc,
- void *event_arg,
- gfp_t gfp)
+int sctp_do_sm(struct net *net, enum sctp_event event_type,
+ union sctp_subtype subtype, enum sctp_state state,
+ struct sctp_endpoint *ep, struct sctp_association *asoc,
+ void *event_arg, gfp_t gfp)
{
- sctp_cmd_seq_t commands;
- const sctp_sm_table_entry_t *state_fn;
- sctp_disposition_t status;
- int error = 0;
- typedef const char *(printfn_t)(sctp_subtype_t);
+ typedef const char *(printfn_t)(union sctp_subtype);
static printfn_t *table[] = {
NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname,
};
printfn_t *debug_fn __attribute__ ((unused)) = table[event_type];
+ const struct sctp_sm_table_entry *state_fn;
+ struct sctp_cmd_seq commands;
+ enum sctp_disposition status;
+ int error = 0;
/* Look up the state function, run it, and then process the
* side effects. These three steps are the heart of lksctp.
@@ -1178,13 +1201,14 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
/*****************************************************************
* This the master state function side effect processing function.
*****************************************************************/
-static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
- sctp_state_t state,
+static int sctp_side_effects(enum sctp_event event_type,
+ union sctp_subtype subtype,
+ enum sctp_state state,
struct sctp_endpoint *ep,
struct sctp_association **asoc,
void *event_arg,
- sctp_disposition_t status,
- sctp_cmd_seq_t *commands,
+ enum sctp_disposition status,
+ struct sctp_cmd_seq *commands,
gfp_t gfp)
{
int error;
@@ -1263,29 +1287,27 @@ bail:
********************************************************************/
/* This is the side-effect interpreter. */
-static int sctp_cmd_interpreter(sctp_event_t event_type,
- sctp_subtype_t subtype,
- sctp_state_t state,
+static int sctp_cmd_interpreter(enum sctp_event event_type,
+ union sctp_subtype subtype,
+ enum sctp_state state,
struct sctp_endpoint *ep,
struct sctp_association *asoc,
void *event_arg,
- sctp_disposition_t status,
- sctp_cmd_seq_t *commands,
+ enum sctp_disposition status,
+ struct sctp_cmd_seq *commands,
gfp_t gfp)
{
- struct sock *sk = ep->base.sk;
- struct sctp_sock *sp = sctp_sk(sk);
- int error = 0;
- int force;
- sctp_cmd_t *cmd;
- struct sctp_chunk *new_obj;
- struct sctp_chunk *chunk = NULL;
+ struct sctp_sock *sp = sctp_sk(ep->base.sk);
+ struct sctp_chunk *chunk = NULL, *new_obj;
struct sctp_packet *packet;
+ struct sctp_sackhdr sackh;
struct timer_list *timer;
- unsigned long timeout;
struct sctp_transport *t;
- struct sctp_sackhdr sackh;
+ unsigned long timeout;
+ struct sctp_cmd *cmd;
int local_cork = 0;
+ int error = 0;
+ int force;
if (SCTP_EVENT_T_TIMEOUT != event_type)
chunk = event_arg;
@@ -1607,12 +1629,12 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
break;
case SCTP_CMD_INIT_FAILED:
- sctp_cmd_init_failed(commands, asoc, cmd->obj.err);
+ sctp_cmd_init_failed(commands, asoc, cmd->obj.u32);
break;
case SCTP_CMD_ASSOC_FAILED:
sctp_cmd_assoc_failed(commands, asoc, event_type,
- subtype, chunk, cmd->obj.err);
+ subtype, chunk, cmd->obj.u32);
break;
case SCTP_CMD_INIT_COUNTER_INC:
@@ -1680,8 +1702,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
case SCTP_CMD_PROCESS_CTSN:
/* Dummy up a SACK for processing. */
sackh.cum_tsn_ack = cmd->obj.be32;
- sackh.a_rwnd = asoc->peer.rwnd +
- asoc->outqueue.outstanding_bytes;
+ sackh.a_rwnd = htonl(asoc->peer.rwnd +
+ asoc->outqueue.outstanding_bytes);
sackh.num_gap_ack_blocks = 0;
sackh.num_dup_tsns = 0;
chunk->subh.sack_hdr = &sackh;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index b2a74c3823ee..8f8ccded13e4 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -59,103 +59,110 @@
#include <net/sctp/sm.h>
#include <net/sctp/structs.h>
-static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- struct sctp_chunk *chunk,
- const void *payload,
- size_t paylen);
+static struct sctp_packet *sctp_abort_pkt_new(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ struct sctp_chunk *chunk,
+ const void *payload, size_t paylen);
static int sctp_eat_data(const struct sctp_association *asoc,
struct sctp_chunk *chunk,
- sctp_cmd_seq_t *commands);
-static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
- const struct sctp_association *asoc,
- const struct sctp_chunk *chunk);
+ struct sctp_cmd_seq *commands);
+static struct sctp_packet *sctp_ootb_pkt_new(
+ struct net *net,
+ const struct sctp_association *asoc,
+ const struct sctp_chunk *chunk);
static void sctp_send_stale_cookie_err(struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const struct sctp_chunk *chunk,
- sctp_cmd_seq_t *commands,
+ struct sctp_cmd_seq *commands,
struct sctp_chunk *err_chunk);
-static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands);
-static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands);
-static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
+static enum sctp_disposition sctp_sf_do_5_2_6_stale(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands);
+static enum sctp_disposition sctp_sf_shut_8_4_5(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands);
+static enum sctp_disposition sctp_sf_tabort_8_4_8(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const sctp_subtype_t type,
+ const union sctp_subtype type,
void *arg,
- sctp_cmd_seq_t *commands);
+ struct sctp_cmd_seq *commands);
static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk);
-static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
- sctp_cmd_seq_t *commands,
- __be16 error, int sk_err,
- const struct sctp_association *asoc,
- struct sctp_transport *transport);
+static enum sctp_disposition sctp_stop_t1_and_abort(
+ struct net *net,
+ struct sctp_cmd_seq *commands,
+ __be16 error, int sk_err,
+ const struct sctp_association *asoc,
+ struct sctp_transport *transport);
-static sctp_disposition_t sctp_sf_abort_violation(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- void *arg,
- sctp_cmd_seq_t *commands,
- const __u8 *payload,
- const size_t paylen);
+static enum sctp_disposition sctp_sf_abort_violation(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ void *arg,
+ struct sctp_cmd_seq *commands,
+ const __u8 *payload,
+ const size_t paylen);
-static sctp_disposition_t sctp_sf_violation_chunklen(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands);
+static enum sctp_disposition sctp_sf_violation_chunklen(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands);
-static sctp_disposition_t sctp_sf_violation_paramlen(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg, void *ext,
- sctp_cmd_seq_t *commands);
+static enum sctp_disposition sctp_sf_violation_paramlen(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg, void *ext,
+ struct sctp_cmd_seq *commands);
-static sctp_disposition_t sctp_sf_violation_ctsn(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands);
+static enum sctp_disposition sctp_sf_violation_ctsn(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands);
-static sctp_disposition_t sctp_sf_violation_chunk(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands);
+static enum sctp_disposition sctp_sf_violation_chunk(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands);
-static sctp_ierror_t sctp_sf_authenticate(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- struct sctp_chunk *chunk);
+static enum sctp_ierror sctp_sf_authenticate(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ struct sctp_chunk *chunk);
-static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
+static enum sctp_disposition __sctp_sf_do_9_1_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const sctp_subtype_t type,
+ const union sctp_subtype type,
void *arg,
- sctp_cmd_seq_t *commands);
+ struct sctp_cmd_seq *commands);
/* Small helper function that checks if the chunk length
* is of the appropriate length. The 'required_length' argument
@@ -164,8 +171,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
* false = Invalid length
*
*/
-static inline bool
-sctp_chunk_length_valid(struct sctp_chunk *chunk, __u16 required_length)
+static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk,
+ __u16 required_length)
{
__u16 chunk_length = ntohs(chunk->chunk_hdr->length);
@@ -213,12 +220,11 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, __u16 required_length)
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_4_C(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_4_C(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg, struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
struct sctp_ulpevent *ev;
@@ -299,19 +305,17 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- struct sctp_chunk *chunk = arg;
- struct sctp_chunk *repl;
+ struct sctp_chunk *chunk = arg, *repl, *err_chunk;
+ struct sctp_unrecognized_param *unk_param;
struct sctp_association *new_asoc;
- struct sctp_chunk *err_chunk;
struct sctp_packet *packet;
- sctp_unrecognized_param_t *unk_param;
int len;
/* 6.10 Bundling
@@ -435,7 +439,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
* construct the parameters in INIT ACK by copying the
* ERROR causes over.
*/
- unk_param = (sctp_unrecognized_param_t *)
+ unk_param = (struct sctp_unrecognized_param *)
((__u8 *)(err_chunk->chunk_hdr) +
sizeof(struct sctp_chunkhdr));
/* Replace the cause code with the "Unrecognized parameter"
@@ -495,15 +499,15 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- struct sctp_chunk *chunk = arg;
struct sctp_init_chunk *initchunk;
+ struct sctp_chunk *chunk = arg;
struct sctp_chunk *err_chunk;
struct sctp_packet *packet;
@@ -518,7 +522,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands);
/* Make sure that the INIT-ACK chunk has a valid length */
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_initack_chunk)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Grab the INIT header. */
@@ -530,7 +534,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
(struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
&err_chunk)) {
- sctp_error_t error = SCTP_ERROR_NO_RESOURCE;
+ enum sctp_error error = SCTP_ERROR_NO_RESOURCE;
/* This chunk contains fatal error. It is to be discarded.
* Send an ABORT, with causes. If there are no causes,
@@ -645,20 +649,21 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type, void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- struct sctp_chunk *chunk = arg;
+ struct sctp_ulpevent *ev, *ai_ev = NULL;
struct sctp_association *new_asoc;
struct sctp_init_chunk *peer_init;
- struct sctp_chunk *repl;
- struct sctp_ulpevent *ev, *ai_ev = NULL;
- int error = 0;
+ struct sctp_chunk *chunk = arg;
struct sctp_chunk *err_chk_p;
+ struct sctp_chunk *repl;
struct sock *sk;
+ int error = 0;
/* If the packet is an OOTB packet which is temporarily on the
* control endpoint, respond with an ABORT.
@@ -758,7 +763,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
*/
if (chunk->auth_chunk) {
struct sctp_chunk auth;
- sctp_ierror_t ret;
+ enum sctp_ierror ret;
/* Make sure that we and the peer are AUTH capable */
if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) {
@@ -872,11 +877,12 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type, void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
struct sctp_ulpevent *ev;
@@ -950,11 +956,12 @@ nomem:
}
/* Generate and sendout a heartbeat packet. */
-static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_heartbeat(
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_transport *transport = (struct sctp_transport *) arg;
struct sctp_chunk *reply;
@@ -975,12 +982,12 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
}
/* Generate a HEARTBEAT packet on the given transport. */
-sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_sendbeat_8_3(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_transport *transport = (struct sctp_transport *) arg;
@@ -1023,11 +1030,12 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
}
/* resend asoc strreset_chunk. */
-sctp_disposition_t sctp_sf_send_reconf(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type, void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_send_reconf(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_transport *transport = arg;
@@ -1074,12 +1082,11 @@ sctp_disposition_t sctp_sf_send_reconf(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_beat_8_3(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg, struct sctp_cmd_seq *commands)
{
struct sctp_paramhdr *param_hdr;
struct sctp_chunk *chunk = arg;
@@ -1090,7 +1097,8 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the HEARTBEAT chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk,
+ sizeof(struct sctp_heartbeat_chunk)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -1098,7 +1106,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
* respond with a HEARTBEAT ACK that contains the Heartbeat
* Information field copied from the received HEARTBEAT chunk.
*/
- chunk->subh.hb_hdr = (sctp_heartbeathdr_t *)chunk->skb->data;
+ chunk->subh.hb_hdr = (struct sctp_heartbeathdr *)chunk->skb->data;
param_hdr = (struct sctp_paramhdr *)chunk->subh.hb_hdr;
paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr);
@@ -1148,34 +1156,32 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
+ struct sctp_sender_hb_info *hbinfo;
struct sctp_chunk *chunk = arg;
- union sctp_addr from_addr;
struct sctp_transport *link;
- sctp_sender_hb_info_t *hbinfo;
unsigned long max_interval;
+ union sctp_addr from_addr;
if (!sctp_vtag_verify(chunk, asoc))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the HEARTBEAT-ACK chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr) +
- sizeof(sctp_sender_hb_info_t)))
+ sizeof(*hbinfo)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
- hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
+ hbinfo = (struct sctp_sender_hb_info *)chunk->skb->data;
/* Make sure that the length of the parameter is what we expect */
- if (ntohs(hbinfo->param_hdr.length) !=
- sizeof(sctp_sender_hb_info_t)) {
+ if (ntohs(hbinfo->param_hdr.length) != sizeof(*hbinfo))
return SCTP_DISPOSITION_DISCARD;
- }
from_addr = hbinfo->daddr;
link = sctp_assoc_lookup_paddr(asoc, &from_addr);
@@ -1227,15 +1233,15 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
*/
static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
struct sctp_chunk *init,
- sctp_cmd_seq_t *commands)
+ struct sctp_cmd_seq *commands)
{
- int len;
- struct sctp_packet *pkt;
+ struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family);
union sctp_addr_param *addrparm;
struct sctp_errhdr *errhdr;
+ char buffer[sizeof(*errhdr) + sizeof(*addrparm)];
struct sctp_endpoint *ep;
- char buffer[sizeof(struct sctp_errhdr)+sizeof(union sctp_addr_param)];
- struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family);
+ struct sctp_packet *pkt;
+ int len;
/* Build the error on the stack. We are way to malloc crazy
* throughout the code today.
@@ -1245,7 +1251,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
/* Copy into a parm format. */
len = af->to_addr_param(ssa, addrparm);
- len += sizeof(sctp_errhdr_t);
+ len += sizeof(*errhdr);
errhdr->cause = SCTP_ERROR_RESTART;
errhdr->length = htons(len);
@@ -1292,7 +1298,7 @@ static bool list_has_sctp_addr(const struct list_head *list,
static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc,
const struct sctp_association *asoc,
struct sctp_chunk *init,
- sctp_cmd_seq_t *commands)
+ struct sctp_cmd_seq *commands)
{
struct net *net = sock_net(new_asoc->base.sk);
struct sctp_transport *new_addr;
@@ -1412,20 +1418,19 @@ static char sctp_tietags_compare(struct sctp_association *new_asoc,
/* Common helper routine for both duplicate and simulataneous INIT
* chunk handling.
*/
-static sctp_disposition_t sctp_sf_do_unexpected_init(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg, sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_do_unexpected_init(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- sctp_disposition_t retval;
- struct sctp_chunk *chunk = arg;
- struct sctp_chunk *repl;
+ struct sctp_chunk *chunk = arg, *repl, *err_chunk;
+ struct sctp_unrecognized_param *unk_param;
struct sctp_association *new_asoc;
- struct sctp_chunk *err_chunk;
+ enum sctp_disposition retval;
struct sctp_packet *packet;
- sctp_unrecognized_param_t *unk_param;
int len;
/* 6.10 Bundling
@@ -1555,7 +1560,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
* construct the parameters in INIT ACK by copying the
* ERROR causes over.
*/
- unk_param = (sctp_unrecognized_param_t *)
+ unk_param = (struct sctp_unrecognized_param *)
((__u8 *)(err_chunk->chunk_hdr) +
sizeof(struct sctp_chunkhdr));
/* Replace the cause code with the "Unrecognized parameter"
@@ -1626,12 +1631,13 @@ cleanup:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_2_1_siminit(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
/* Call helper to do the real work for both simulataneous and
* duplicate INIT chunk handling.
@@ -1680,12 +1686,13 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
+enum sctp_disposition sctp_sf_do_5_2_2_dupinit(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const sctp_subtype_t type,
+ const union sctp_subtype type,
void *arg,
- sctp_cmd_seq_t *commands)
+ struct sctp_cmd_seq *commands)
{
/* Call helper to do the real work for both simulataneous and
* duplicate INIT chunk handling.
@@ -1703,11 +1710,13 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
* An unexpected INIT ACK usually indicates the processing of an old or
* duplicated INIT chunk.
*/
-sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg, sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_2_3_initack(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
/* Per the above section, we'll discard the chunk if we have an
* endpoint. If this is an OOTB INIT-ACK, treat it as such.
@@ -1723,18 +1732,19 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net,
* Section 5.2.4
* A) In this case, the peer may have restarted.
*/
-static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_a(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
- sctp_cmd_seq_t *commands,
+ struct sctp_cmd_seq *commands,
struct sctp_association *new_asoc)
{
struct sctp_init_chunk *peer_init;
+ enum sctp_disposition disposition;
struct sctp_ulpevent *ev;
struct sctp_chunk *repl;
struct sctp_chunk *err;
- sctp_disposition_t disposition;
/* new_asoc is a brand-new association, so these are not yet
* side effects--it is safe to run them here.
@@ -1838,11 +1848,12 @@ nomem:
* after responding to the local endpoint's INIT
*/
/* This case represents an initialization collision. */
-static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_b(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
- sctp_cmd_seq_t *commands,
+ struct sctp_cmd_seq *commands,
struct sctp_association *new_asoc)
{
struct sctp_init_chunk *peer_init;
@@ -1909,11 +1920,12 @@ nomem:
* but a new tag of its own.
*/
/* This case represents an initialization collision. */
-static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_c(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
- sctp_cmd_seq_t *commands,
+ struct sctp_cmd_seq *commands,
struct sctp_association *new_asoc)
{
/* The cookie should be silently discarded.
@@ -1931,11 +1943,12 @@ static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net,
* enter the ESTABLISHED state, if it has not already done so.
*/
/* This case represents an initialization collision. */
-static sctp_disposition_t sctp_sf_do_dupcook_d(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_d(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
- sctp_cmd_seq_t *commands,
+ struct sctp_cmd_seq *commands,
struct sctp_association *new_asoc)
{
struct sctp_ulpevent *ev = NULL, *ai_ev = NULL;
@@ -2026,19 +2039,20 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
+enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const sctp_subtype_t type,
+ const union sctp_subtype type,
void *arg,
- sctp_cmd_seq_t *commands)
+ struct sctp_cmd_seq *commands)
{
- sctp_disposition_t retval;
- struct sctp_chunk *chunk = arg;
struct sctp_association *new_asoc;
+ struct sctp_chunk *chunk = arg;
+ enum sctp_disposition retval;
+ struct sctp_chunk *err_chk_p;
int error = 0;
char action;
- struct sctp_chunk *err_chk_p;
/* Make sure that the chunk has a valid length from the protocol
* perspective. In this case check to make sure we have at least
@@ -2144,13 +2158,13 @@ nomem:
*
* See sctp_sf_do_9_1_abort().
*/
-sctp_disposition_t sctp_sf_shutdown_pending_abort(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_pending_abort(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
@@ -2167,7 +2181,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
* as we do not know its true length. So, to be safe, discard the
* packet.
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* ADD-IP: Special case for ABORT chunks
@@ -2187,12 +2201,13 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
*
* See sctp_sf_do_9_1_abort().
*/
-sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
+enum sctp_disposition sctp_sf_shutdown_sent_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const sctp_subtype_t type,
+ const union sctp_subtype type,
void *arg,
- sctp_cmd_seq_t *commands)
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
@@ -2209,7 +2224,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
* as we do not know its true length. So, to be safe, discard the
* packet.
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* ADD-IP: Special case for ABORT chunks
@@ -2237,13 +2252,13 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
*
* See sctp_sf_do_9_1_abort().
*/
-sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_ack_sent_abort(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
/* The same T2 timer, so we should be able to use
* common function with the SHUTDOWN-SENT state.
@@ -2265,15 +2280,16 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
+enum sctp_disposition sctp_sf_cookie_echoed_err(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const sctp_subtype_t type,
+ const union sctp_subtype type,
void *arg,
- sctp_cmd_seq_t *commands)
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
- sctp_errhdr_t *err;
+ struct sctp_errhdr *err;
if (!sctp_vtag_verify(chunk, asoc))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2281,7 +2297,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
/* Make sure that the ERROR chunk has a valid length.
* The parameter walking depends on this as well.
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_operr_chunk)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -2329,20 +2345,20 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_do_5_2_6_stale(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- struct sctp_chunk *chunk = arg;
- u32 stale;
- sctp_cookie_preserve_param_t bht;
- sctp_errhdr_t *err;
- struct sctp_chunk *reply;
- struct sctp_bind_addr *bp;
int attempts = asoc->init_err_counter + 1;
+ struct sctp_chunk *chunk = arg, *reply;
+ struct sctp_cookie_preserve_param bht;
+ struct sctp_bind_addr *bp;
+ struct sctp_errhdr *err;
+ u32 stale;
if (attempts > asoc->max_init_attempts) {
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
@@ -2352,7 +2368,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
return SCTP_DISPOSITION_DELETE_TCB;
}
- err = (sctp_errhdr_t *)(chunk->skb->data);
+ err = (struct sctp_errhdr *)(chunk->skb->data);
/* When calculating the time extension, an implementation
* SHOULD use the RTT information measured based on the
@@ -2368,7 +2384,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
* to give ample time to retransmit the new cookie and thus
* yield a higher probability of success on the reattempt.
*/
- stale = ntohl(*(__be32 *)((u8 *)err + sizeof(sctp_errhdr_t)));
+ stale = ntohl(*(__be32 *)((u8 *)err + sizeof(*err)));
stale = (stale * 2) / 1000;
bht.param_hdr.type = SCTP_PARAM_COOKIE_PRESERVATIVE;
@@ -2452,12 +2468,13 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
+enum sctp_disposition sctp_sf_do_9_1_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const sctp_subtype_t type,
+ const union sctp_subtype type,
void *arg,
- sctp_cmd_seq_t *commands)
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
@@ -2474,7 +2491,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
* as we do not know its true length. So, to be safe, discard the
* packet.
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* ADD-IP: Special case for ABORT chunks
@@ -2489,27 +2506,29 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
-static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
+static enum sctp_disposition __sctp_sf_do_9_1_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const sctp_subtype_t type,
+ const union sctp_subtype type,
void *arg,
- sctp_cmd_seq_t *commands)
+ struct sctp_cmd_seq *commands)
{
+ __be16 error = SCTP_ERROR_NO_ERROR;
struct sctp_chunk *chunk = arg;
unsigned int len;
- __be16 error = SCTP_ERROR_NO_ERROR;
/* See if we have an error cause code in the chunk. */
len = ntohs(chunk->chunk_hdr->length);
if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) {
+ struct sctp_errhdr *err;
- sctp_errhdr_t *err;
sctp_walk_errors(err, chunk->chunk_hdr);
if ((void *)err != (void *)chunk->chunk_end)
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg,
+ commands);
- error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
+ error = ((struct sctp_errhdr *)chunk->skb->data)->cause;
}
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
@@ -2526,16 +2545,17 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
*
* See sctp_sf_do_9_1_abort() above.
*/
-sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_wait_abort(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
+ __be16 error = SCTP_ERROR_NO_ERROR;
struct sctp_chunk *chunk = arg;
unsigned int len;
- __be16 error = SCTP_ERROR_NO_ERROR;
if (!sctp_vtag_verify_either(chunk, asoc))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2550,13 +2570,13 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
* as we do not know its true length. So, to be safe, discard the
* packet.
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* See if we have an error cause code in the chunk. */
len = ntohs(chunk->chunk_hdr->length);
if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
- error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
+ error = ((struct sctp_errhdr *)chunk->skb->data)->cause;
return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc,
chunk->transport);
@@ -2565,12 +2585,13 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
/*
* Process an incoming ICMP as an ABORT. (COOKIE-WAIT state)
*/
-sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
+enum sctp_disposition sctp_sf_cookie_wait_icmp_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const sctp_subtype_t type,
+ const union sctp_subtype type,
void *arg,
- sctp_cmd_seq_t *commands)
+ struct sctp_cmd_seq *commands)
{
return sctp_stop_t1_and_abort(net, commands, SCTP_ERROR_NO_ERROR,
ENOPROTOOPT, asoc,
@@ -2580,12 +2601,13 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
/*
* Process an ABORT. (COOKIE-ECHOED state)
*/
-sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_echoed_abort(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
/* There is a single T1 timer, so we should be able to use
* common function with the COOKIE-WAIT state.
@@ -2598,11 +2620,12 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
*
* This is common code called by several sctp_sf_*_abort() functions above.
*/
-static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
- sctp_cmd_seq_t *commands,
- __be16 error, int sk_err,
- const struct sctp_association *asoc,
- struct sctp_transport *transport)
+static enum sctp_disposition sctp_stop_t1_and_abort(
+ struct net *net,
+ struct sctp_cmd_seq *commands,
+ __be16 error, int sk_err,
+ const struct sctp_association *asoc,
+ struct sctp_transport *transport)
{
pr_debug("%s: ABORT received (INIT)\n", __func__);
@@ -2652,16 +2675,17 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_shutdown(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
+ enum sctp_disposition disposition;
struct sctp_chunk *chunk = arg;
- sctp_shutdownhdr_t *sdh;
- sctp_disposition_t disposition;
+ struct sctp_shutdownhdr *sdh;
struct sctp_ulpevent *ev;
__u32 ctsn;
@@ -2669,14 +2693,13 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the SHUTDOWN chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk,
- sizeof(struct sctp_shutdown_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Convert the elaborate header. */
- sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
- skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t));
+ sdh = (struct sctp_shutdownhdr *)chunk->skb->data;
+ skb_pull(chunk->skb, sizeof(*sdh));
chunk->subh.shutdown_hdr = sdh;
ctsn = ntohl(sdh->cum_tsn_ack);
@@ -2742,27 +2765,27 @@ out:
* The Cumulative TSN Ack of the received SHUTDOWN chunk
* MUST be processed.
*/
-sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_shut_ctsn(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
- sctp_shutdownhdr_t *sdh;
+ struct sctp_shutdownhdr *sdh;
__u32 ctsn;
if (!sctp_vtag_verify(chunk, asoc))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the SHUTDOWN chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk,
- sizeof(struct sctp_shutdown_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
- sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
+ sdh = (struct sctp_shutdownhdr *)chunk->skb->data;
ctsn = ntohl(sdh->cum_tsn_ack);
if (TSN_lt(ctsn, asoc->ctsn_ack_point)) {
@@ -2796,14 +2819,15 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
* that belong to this association, it should discard the INIT chunk and
* retransmit the SHUTDOWN ACK chunk.
*/
-sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_reshutack(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- struct sctp_chunk *chunk = (struct sctp_chunk *) arg;
+ struct sctp_chunk *chunk = arg;
struct sctp_chunk *reply;
/* Make sure that the chunk has a valid length */
@@ -2860,26 +2884,26 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_ecn_cwr(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- sctp_cwrhdr_t *cwr;
struct sctp_chunk *chunk = arg;
+ struct sctp_cwrhdr *cwr;
u32 lowest_tsn;
if (!sctp_vtag_verify(chunk, asoc))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_ecne_chunk)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
- cwr = (sctp_cwrhdr_t *) chunk->skb->data;
- skb_pull(chunk->skb, sizeof(sctp_cwrhdr_t));
+ cwr = (struct sctp_cwrhdr *)chunk->skb->data;
+ skb_pull(chunk->skb, sizeof(*cwr));
lowest_tsn = ntohl(cwr->lowest_tsn);
@@ -2916,25 +2940,24 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_ecne(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_ecne(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg, struct sctp_cmd_seq *commands)
{
- sctp_ecnehdr_t *ecne;
struct sctp_chunk *chunk = arg;
+ struct sctp_ecnehdr *ecne;
if (!sctp_vtag_verify(chunk, asoc))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_ecne_chunk)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
- ecne = (sctp_ecnehdr_t *) chunk->skb->data;
- skb_pull(chunk->skb, sizeof(sctp_ecnehdr_t));
+ ecne = (struct sctp_ecnehdr *)chunk->skb->data;
+ skb_pull(chunk->skb, sizeof(*ecne));
/* If this is a newer ECNE than the last CWR packet we sent out */
sctp_add_cmd_sf(commands, SCTP_CMD_ECN_ECNE,
@@ -2973,15 +2996,15 @@ sctp_disposition_t sctp_sf_do_ecne(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_data_6_2(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
+ union sctp_arg force = SCTP_NOFORCE();
struct sctp_chunk *chunk = arg;
- sctp_arg_t force = SCTP_NOFORCE();
int error;
if (!sctp_vtag_verify(chunk, asoc)) {
@@ -3093,12 +3116,13 @@ discard_noforce:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_data_fast_4_4(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
int error;
@@ -3184,22 +3208,22 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_sack_6_2(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
- sctp_sackhdr_t *sackh;
+ struct sctp_sackhdr *sackh;
__u32 ctsn;
if (!sctp_vtag_verify(chunk, asoc))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the SACK chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_sack_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_sack_chunk)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -3258,12 +3282,13 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
+static enum sctp_disposition sctp_sf_tabort_8_4_8(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const sctp_subtype_t type,
+ const union sctp_subtype type,
void *arg,
- sctp_cmd_seq_t *commands)
+ struct sctp_cmd_seq *commands)
{
struct sctp_packet *packet = NULL;
struct sctp_chunk *chunk = arg;
@@ -3308,21 +3333,21 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_operr_notify(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_operr_notify(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
- sctp_errhdr_t *err;
+ struct sctp_errhdr *err;
if (!sctp_vtag_verify(chunk, asoc))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the ERROR chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_operr_chunk)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
sctp_walk_errors(err, chunk->chunk_hdr);
@@ -3346,12 +3371,12 @@ sctp_disposition_t sctp_sf_operr_notify(struct net *net,
*
* The return value is the disposition.
*/
-sctp_disposition_t sctp_sf_do_9_2_final(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_final(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
struct sctp_chunk *reply;
@@ -3429,20 +3454,19 @@ nomem:
* receiver of the OOTB packet shall discard the OOTB packet and take
* no further action.
*/
-sctp_disposition_t sctp_sf_ootb(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_ootb(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg, struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
struct sk_buff *skb = chunk->skb;
struct sctp_chunkhdr *ch;
- sctp_errhdr_t *err;
- __u8 *ch_end;
- int ootb_shut_ack = 0;
+ struct sctp_errhdr *err;
int ootb_cookie_ack = 0;
+ int ootb_shut_ack = 0;
+ __u8 *ch_end;
SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
@@ -3518,16 +3542,17 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
* (endpoint, asoc, type, arg, commands)
*
* Outputs
- * (sctp_disposition_t)
+ * (enum sctp_disposition)
*
* The return value is the disposition of the chunk.
*/
-static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_shut_8_4_5(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_packet *packet = NULL;
struct sctp_chunk *chunk = arg;
@@ -3584,12 +3609,12 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
* chunks. --piggy ]
*
*/
-sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_8_5_1_E_sa(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
@@ -3609,17 +3634,18 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
}
/* ADDIP Section 4.2 Upon reception of an ASCONF Chunk. */
-sctp_disposition_t sctp_sf_do_asconf(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type, void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_asconf(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- struct sctp_chunk *chunk = arg;
- struct sctp_chunk *asconf_ack = NULL;
- struct sctp_paramhdr *err_param = NULL;
- sctp_addiphdr_t *hdr;
- __u32 serial;
+ struct sctp_paramhdr *err_param = NULL;
+ struct sctp_chunk *asconf_ack = NULL;
+ struct sctp_chunk *chunk = arg;
+ struct sctp_addiphdr *hdr;
+ __u32 serial;
if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
@@ -3634,14 +3660,15 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
* described in [I-D.ietf-tsvwg-sctp-auth].
*/
if (!net->sctp.addip_noauth && !chunk->auth)
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
+ commands);
/* Make sure that the ASCONF ADDIP chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
- hdr = (sctp_addiphdr_t *)chunk->skb->data;
+ hdr = (struct sctp_addiphdr *)chunk->skb->data;
serial = ntohl(hdr->serial);
/* Verify the ASCONF chunk before processing it. */
@@ -3725,18 +3752,19 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
* When building TLV parameters for the ASCONF Chunk that will add or
* delete IP addresses the D0 to D13 rules should be applied:
*/
-sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type, void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- struct sctp_chunk *asconf_ack = arg;
- struct sctp_chunk *last_asconf = asoc->addip_last_asconf;
- struct sctp_chunk *abort;
- struct sctp_paramhdr *err_param = NULL;
- sctp_addiphdr_t *addip_hdr;
- __u32 sent_serial, rcvd_serial;
+ struct sctp_chunk *last_asconf = asoc->addip_last_asconf;
+ struct sctp_paramhdr *err_param = NULL;
+ struct sctp_chunk *asconf_ack = arg;
+ struct sctp_addiphdr *addip_hdr;
+ __u32 sent_serial, rcvd_serial;
+ struct sctp_chunk *abort;
if (!sctp_vtag_verify(asconf_ack, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
@@ -3751,14 +3779,16 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
* described in [I-D.ietf-tsvwg-sctp-auth].
*/
if (!net->sctp.addip_noauth && !asconf_ack->auth)
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
+ commands);
/* Make sure that the ADDIP chunk has a valid length. */
- if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t)))
+ if (!sctp_chunk_length_valid(asconf_ack,
+ sizeof(struct sctp_addip_chunk)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
- addip_hdr = (sctp_addiphdr_t *)asconf_ack->skb->data;
+ addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data;
rcvd_serial = ntohl(addip_hdr->serial);
/* Verify the ASCONF-ACK chunk before processing it. */
@@ -3767,7 +3797,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
(void *)err_param, commands);
if (last_asconf) {
- addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr;
+ addip_hdr = (struct sctp_addiphdr *)last_asconf->subh.addip_hdr;
sent_serial = ntohl(addip_hdr->serial);
} else {
sent_serial = asoc->addip_serial - 1;
@@ -3782,7 +3812,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
if (ADDIP_SERIAL_gte(rcvd_serial, sent_serial + 1) &&
!(asoc->addip_last_asconf)) {
abort = sctp_make_abort(asoc, asconf_ack,
- sizeof(sctp_errhdr_t));
+ sizeof(struct sctp_errhdr));
if (abort) {
sctp_init_cause(abort, SCTP_ERROR_ASCONF_ACK, 0);
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -3818,7 +3848,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
}
abort = sctp_make_abort(asoc, asconf_ack,
- sizeof(sctp_errhdr_t));
+ sizeof(struct sctp_errhdr));
if (abort) {
sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -3841,11 +3871,12 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
}
/* RE-CONFIG Section 5.2 Upon reception of an RECONF Chunk. */
-sctp_disposition_t sctp_sf_do_reconf(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type, void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_reconf(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_paramhdr *err_param = NULL;
struct sctp_chunk *chunk = arg;
@@ -3917,15 +3948,15 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- struct sctp_chunk *chunk = arg;
struct sctp_fwdtsn_hdr *fwdtsn_hdr;
+ struct sctp_chunk *chunk = arg;
struct sctp_fwdtsn_skip *skip;
__u16 len;
__u32 tsn;
@@ -3987,16 +4018,16 @@ discard_noforce:
return SCTP_DISPOSITION_DISCARD;
}
-sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- struct sctp_chunk *chunk = arg;
struct sctp_fwdtsn_hdr *fwdtsn_hdr;
+ struct sctp_chunk *chunk = arg;
struct sctp_fwdtsn_skip *skip;
__u16 len;
__u32 tsn;
@@ -4079,23 +4110,23 @@ gen_shutdown:
*
* The return value is the disposition of the chunk.
*/
-static sctp_ierror_t sctp_sf_authenticate(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- struct sctp_chunk *chunk)
+static enum sctp_ierror sctp_sf_authenticate(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ struct sctp_chunk *chunk)
{
struct sctp_authhdr *auth_hdr;
+ __u8 *save_digest, *digest;
struct sctp_hmac *hmac;
unsigned int sig_len;
__u16 key_id;
- __u8 *save_digest;
- __u8 *digest;
/* Pull in the auth header, so we can do some more verification */
auth_hdr = (struct sctp_authhdr *)chunk->skb->data;
chunk->subh.auth_hdr = auth_hdr;
- skb_pull(chunk->skb, sizeof(struct sctp_authhdr));
+ skb_pull(chunk->skb, sizeof(*auth_hdr));
/* Make sure that we support the HMAC algorithm from the auth
* chunk.
@@ -4114,7 +4145,8 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net,
/* Make sure that the length of the signature matches what
* we expect.
*/
- sig_len = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_auth_chunk_t);
+ sig_len = ntohs(chunk->chunk_hdr->length) -
+ sizeof(struct sctp_auth_chunk);
hmac = sctp_auth_get_hmac(ntohs(auth_hdr->hmac_id));
if (sig_len != hmac->hmac_len)
return SCTP_IERROR_PROTO_VIOLATION;
@@ -4136,8 +4168,8 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net,
memset(digest, 0, sig_len);
sctp_auth_calculate_hmac(asoc, chunk->skb,
- (struct sctp_auth_chunk *)chunk->chunk_hdr,
- GFP_ATOMIC);
+ (struct sctp_auth_chunk *)chunk->chunk_hdr,
+ GFP_ATOMIC);
/* Discard the packet if the digests do not match */
if (memcmp(save_digest, digest, sig_len)) {
@@ -4153,17 +4185,16 @@ nomem:
return SCTP_IERROR_NOMEM;
}
-sctp_disposition_t sctp_sf_eat_auth(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_auth(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg, struct sctp_cmd_seq *commands)
{
- struct sctp_authhdr *auth_hdr;
struct sctp_chunk *chunk = arg;
+ struct sctp_authhdr *auth_hdr;
struct sctp_chunk *err_chunk;
- sctp_ierror_t error;
+ enum sctp_ierror error;
/* Make sure that the peer has AUTH capable */
if (!asoc->peer.auth_capable)
@@ -4250,12 +4281,12 @@ sctp_disposition_t sctp_sf_eat_auth(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_unk_chunk(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *unk_chunk = arg;
struct sctp_chunk *err_chunk;
@@ -4330,12 +4361,12 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_discard_chunk(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
@@ -4370,12 +4401,11 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_pdiscard(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_pdiscard(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg, struct sctp_cmd_seq *commands)
{
SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS);
sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
@@ -4398,12 +4428,12 @@ sctp_disposition_t sctp_sf_pdiscard(struct net *net,
* We simply tag the chunk as a violation. The state machine will log
* the violation and continue.
*/
-sctp_disposition_t sctp_sf_violation(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_violation(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
@@ -4418,14 +4448,14 @@ sctp_disposition_t sctp_sf_violation(struct net *net,
/*
* Common function to handle a protocol violation.
*/
-static sctp_disposition_t sctp_sf_abort_violation(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- void *arg,
- sctp_cmd_seq_t *commands,
- const __u8 *payload,
- const size_t paylen)
+static enum sctp_disposition sctp_sf_abort_violation(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ void *arg,
+ struct sctp_cmd_seq *commands,
+ const __u8 *payload,
+ const size_t paylen)
{
struct sctp_packet *packet = NULL;
struct sctp_chunk *chunk = arg;
@@ -4454,11 +4484,10 @@ static sctp_disposition_t sctp_sf_abort_violation(
/* Treat INIT-ACK as a special case during COOKIE-WAIT. */
if (chunk->chunk_hdr->type == SCTP_CID_INIT_ACK &&
!asoc->peer.i.init_tag) {
- sctp_initack_chunk_t *initack;
+ struct sctp_initack_chunk *initack;
- initack = (sctp_initack_chunk_t *)chunk->chunk_hdr;
- if (!sctp_chunk_length_valid(chunk,
- sizeof(sctp_initack_chunk_t)))
+ initack = (struct sctp_initack_chunk *)chunk->chunk_hdr;
+ if (!sctp_chunk_length_valid(chunk, sizeof(*initack)))
abort->chunk_hdr->flags |= SCTP_CHUNK_FLAG_T;
else {
unsigned int inittag;
@@ -4521,7 +4550,7 @@ nomem:
* Handle a protocol violation when the chunk length is invalid.
* "Invalid" length is identified as smaller than the minimal length a
* given chunk can be. For example, a SACK chunk has invalid length
- * if its length is set to be smaller than the size of sctp_sack_chunk_t.
+ * if its length is set to be smaller than the size of struct sctp_sack_chunk.
*
* We inform the other end by sending an ABORT with a Protocol Violation
* error code.
@@ -4536,18 +4565,18 @@ nomem:
*
* Generate an ABORT chunk and terminate the association.
*/
-static sctp_disposition_t sctp_sf_violation_chunklen(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_violation_chunklen(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
static const char err_str[] = "The following chunk had invalid length:";
return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
- sizeof(err_str));
+ sizeof(err_str));
}
/*
@@ -4556,17 +4585,17 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
* or accumulated length in multi parameters exceeds the end of the chunk,
* the length is considered as invalid.
*/
-static sctp_disposition_t sctp_sf_violation_paramlen(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg, void *ext,
- sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_violation_paramlen(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg, void *ext,
+ struct sctp_cmd_seq *commands)
{
- struct sctp_chunk *chunk = arg;
struct sctp_paramhdr *param = ext;
struct sctp_chunk *abort = NULL;
+ struct sctp_chunk *chunk = arg;
if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
goto discard;
@@ -4599,18 +4628,18 @@ nomem:
* We inform the other end by sending an ABORT with a Protocol Violation
* error code.
*/
-static sctp_disposition_t sctp_sf_violation_ctsn(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_violation_ctsn(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
static const char err_str[] = "The cumulative tsn ack beyond the max tsn currently sent:";
return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
- sizeof(err_str));
+ sizeof(err_str));
}
/* Handle protocol violation of an invalid chunk bundling. For example,
@@ -4619,13 +4648,13 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
* statement from the specs. Additionally, there might be an attacker
* on the path and we may not want to continue this communication.
*/
-static sctp_disposition_t sctp_sf_violation_chunk(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_violation_chunk(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
static const char err_str[] = "The following chunk violates protocol:";
@@ -4633,7 +4662,7 @@ static sctp_disposition_t sctp_sf_violation_chunk(
return sctp_sf_violation(net, ep, asoc, type, arg, commands);
return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
- sizeof(err_str));
+ sizeof(err_str));
}
/***************************************************************************
* These are the state functions for handling primitive (Section 10) events.
@@ -4695,15 +4724,15 @@ static sctp_disposition_t sctp_sf_violation_chunk(
*
* The return value is a disposition.
*/
-sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_prm_asoc(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- struct sctp_chunk *repl;
struct sctp_association *my_asoc;
+ struct sctp_chunk *repl;
/* The comment below says that we enter COOKIE-WAIT AFTER
* sending the INIT, but that doesn't actually work in our
@@ -4807,12 +4836,12 @@ nomem:
*
* The return value is the disposition.
*/
-sctp_disposition_t sctp_sf_do_prm_send(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_prm_send(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_datamsg *msg = arg;
@@ -4846,15 +4875,15 @@ sctp_disposition_t sctp_sf_do_prm_send(struct net *net,
*
* The return value is the disposition.
*/
-sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_prm_shutdown(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- int disposition;
+ enum sctp_disposition disposition;
/* From 9.2 Shutdown of an Association
* Upon receipt of the SHUTDOWN primitive from its upper
@@ -4872,6 +4901,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
arg, commands);
}
+
return disposition;
}
@@ -4902,13 +4932,13 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
*
* The return value is the disposition.
*/
-sctp_disposition_t sctp_sf_do_9_1_prm_abort(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_1_prm_abort(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
/* From 9.1 Abort of an Association
* Upon receipt of the ABORT primitive from its upper
@@ -4940,12 +4970,12 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
}
/* We tried an illegal operation on an association which is closed. */
-sctp_disposition_t sctp_sf_error_closed(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_error_closed(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_ERROR, SCTP_ERROR(-EINVAL));
return SCTP_DISPOSITION_CONSUME;
@@ -4954,12 +4984,13 @@ sctp_disposition_t sctp_sf_error_closed(struct net *net,
/* We tried an illegal operation on an association which is shutting
* down.
*/
-sctp_disposition_t sctp_sf_error_shutdown(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_error_shutdown(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_ERROR,
SCTP_ERROR(-ESHUTDOWN));
@@ -4980,13 +5011,13 @@ sctp_disposition_t sctp_sf_error_shutdown(struct net *net,
* Outputs
* (timers)
*/
-sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_wait_prm_shutdown(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
@@ -5015,12 +5046,13 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
* Outputs
* (timers)
*/
-sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg, sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_echoed_prm_shutdown(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
/* There is a single T1 timer, so we should be able to use
* common function with the COOKIE-WAIT state.
@@ -5042,13 +5074,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
* Outputs
* (timers)
*/
-sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_wait_prm_abort(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *abort = arg;
@@ -5091,13 +5123,13 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
* Outputs
* (timers)
*/
-sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_echoed_prm_abort(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
/* There is a single T1 timer, so we should be able to use
* common function with the COOKIE-WAIT state.
@@ -5117,13 +5149,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
* Outputs
* (timers)
*/
-sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_pending_prm_abort(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
/* Stop the T5-shutdown guard timer. */
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -5144,13 +5176,13 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
* Outputs
* (timers)
*/
-sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_sent_prm_abort(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
/* Stop the T2-shutdown timer. */
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -5175,13 +5207,13 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
* Outputs
* (timers)
*/
-sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_ack_sent_prm_abort(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
/* The same T2 timer, so we should be able to use
* common function with the SHUTDOWN-SENT state.
@@ -5211,13 +5243,13 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
* o destination transport address - the transport address of the
* association on which a heartbeat should be issued.
*/
-sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
+enum sctp_disposition sctp_sf_do_prm_requestheartbeat(
struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- const sctp_subtype_t type,
+ const union sctp_subtype type,
void *arg,
- sctp_cmd_seq_t *commands)
+ struct sctp_cmd_seq *commands)
{
if (SCTP_DISPOSITION_NOMEM == sctp_sf_heartbeat(ep, asoc, type,
(struct sctp_transport *)arg, commands))
@@ -5244,12 +5276,12 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
* When an endpoint has an ASCONF signaled change to be sent to the
* remote endpoint it should do A1 to A9
*/
-sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_prm_asconf(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
@@ -5261,11 +5293,12 @@ sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
}
/* RE-CONFIG Section 5.1 RECONF Chunk Procedures */
-sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg, sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_prm_reconf(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
@@ -5278,13 +5311,13 @@ sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net,
*
* The return value is the disposition of the primitive.
*/
-sctp_disposition_t sctp_sf_ignore_primitive(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_ignore_primitive(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
pr_debug("%s: primitive type:%d is ignored\n", __func__,
type.primitive);
@@ -5302,13 +5335,13 @@ sctp_disposition_t sctp_sf_ignore_primitive(
* subscribes to this event, if there is no data to be sent or
* retransmit, the stack will immediately send up this notification.
*/
-sctp_disposition_t sctp_sf_do_no_pending_tsn(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_no_pending_tsn(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_ulpevent *event;
@@ -5334,13 +5367,13 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn(
*
* The return value is the disposition.
*/
-sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_start_shutdown(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *reply;
@@ -5404,15 +5437,15 @@ nomem:
*
* The return value is the disposition.
*/
-sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_shutdown_ack(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- struct sctp_chunk *chunk = (struct sctp_chunk *) arg;
+ struct sctp_chunk *chunk = arg;
struct sctp_chunk *reply;
/* There are 2 ways of getting here:
@@ -5424,12 +5457,14 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
*/
if (chunk) {
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg,
+ commands);
/* Make sure that the SHUTDOWN chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t)))
- return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
- commands);
+ if (!sctp_chunk_length_valid(
+ chunk, sizeof(struct sctp_shutdown_chunk)))
+ return sctp_sf_violation_chunklen(net, ep, asoc, type,
+ arg, commands);
}
/* If it has no more outstanding DATA chunks, the SHUTDOWN receiver
@@ -5476,12 +5511,12 @@ nomem:
*
* The return value is the disposition of the event.
*/
-sctp_disposition_t sctp_sf_ignore_other(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_ignore_other(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
pr_debug("%s: the event other type:%d is ignored\n",
__func__, type.other);
@@ -5504,12 +5539,12 @@ sctp_disposition_t sctp_sf_ignore_other(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_6_3_3_rtx(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_transport *transport = arg;
@@ -5592,12 +5627,12 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
* allow. However, an SCTP transmitter MUST NOT be more aggressive than
* the following algorithms allow.
*/
-sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_6_2_sack(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS);
sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
@@ -5623,16 +5658,17 @@ sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
* (timers, events)
*
*/
-sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t1_init_timer_expire(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
+ int attempts = asoc->init_err_counter + 1;
struct sctp_chunk *repl = NULL;
struct sctp_bind_addr *bp;
- int attempts = asoc->init_err_counter + 1;
pr_debug("%s: timer T1 expired (INIT)\n", __func__);
@@ -5687,15 +5723,16 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
* (timers, events)
*
*/
-sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t1_cookie_timer_expire(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- struct sctp_chunk *repl = NULL;
int attempts = asoc->init_err_counter + 1;
+ struct sctp_chunk *repl = NULL;
pr_debug("%s: timer T1 expired (COOKIE-ECHO)\n", __func__);
@@ -5737,12 +5774,13 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
* the T2-Shutdown timer, giving its peer ample opportunity to transmit
* all of its queued DATA chunks that have not yet been sent.
*/
-sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t2_timer_expire(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *reply = NULL;
@@ -5807,13 +5845,13 @@ nomem:
* ADDIP Section 4.1 ASCONF CHunk Procedures
* If the T4 RTO timer expires the endpoint should do B1 to B5
*/
-sctp_disposition_t sctp_sf_t4_timer_expire(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t4_timer_expire(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = asoc->addip_last_asconf;
struct sctp_transport *transport = chunk->transport;
@@ -5879,12 +5917,13 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
* At the expiration of this timer the sender SHOULD abort the association
* by sending an ABORT chunk.
*/
-sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t5_timer_expire(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *reply = NULL;
@@ -5915,15 +5954,15 @@ nomem:
* The work that needs to be done is same as when SHUTDOWN is initiated by
* the user. So this routine looks same as sctp_sf_do_9_2_prm_shutdown().
*/
-sctp_disposition_t sctp_sf_autoclose_timer_expire(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_autoclose_timer_expire(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
- int disposition;
+ enum sctp_disposition disposition;
SCTP_INC_STATS(net, SCTP_MIB_AUTOCLOSE_EXPIREDS);
@@ -5943,6 +5982,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
arg, commands);
}
+
return disposition;
}
@@ -5958,12 +5998,11 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_not_impl(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_not_impl(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg, struct sctp_cmd_seq *commands)
{
return SCTP_DISPOSITION_NOT_IMPL;
}
@@ -5976,12 +6015,11 @@ sctp_disposition_t sctp_sf_not_impl(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_bug(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_bug(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg, struct sctp_cmd_seq *commands)
{
return SCTP_DISPOSITION_BUG;
}
@@ -5997,12 +6035,12 @@ sctp_disposition_t sctp_sf_bug(struct net *net,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const sctp_subtype_t type,
- void *arg,
- sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_timer_ignore(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
{
pr_debug("%s: timer %d ignored\n", __func__, type.chunk);
@@ -6017,9 +6055,9 @@ sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk)
{
struct sctp_sackhdr *sack;
+ __u16 num_dup_tsns;
unsigned int len;
__u16 num_blocks;
- __u16 num_dup_tsns;
/* Protect ourselves from reading too far into
* the skb from a bogus sender.
@@ -6041,12 +6079,12 @@ static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk)
/* Create an ABORT packet to be sent as a response, with the specified
* error causes.
*/
-static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- struct sctp_chunk *chunk,
- const void *payload,
- size_t paylen)
+static struct sctp_packet *sctp_abort_pkt_new(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ struct sctp_chunk *chunk,
+ const void *payload, size_t paylen)
{
struct sctp_packet *packet;
struct sctp_chunk *abort;
@@ -6083,14 +6121,14 @@ static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
}
/* Allocate a packet for responding in the OOTB conditions. */
-static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
- const struct sctp_association *asoc,
- const struct sctp_chunk *chunk)
+static struct sctp_packet *sctp_ootb_pkt_new(
+ struct net *net,
+ const struct sctp_association *asoc,
+ const struct sctp_chunk *chunk)
{
- struct sctp_packet *packet;
struct sctp_transport *transport;
- __u16 sport;
- __u16 dport;
+ struct sctp_packet *packet;
+ __u16 sport, dport;
__u32 vtag;
/* Get the source and destination port from the inbound packet. */
@@ -6107,9 +6145,9 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
switch (chunk->chunk_hdr->type) {
case SCTP_CID_INIT_ACK:
{
- sctp_initack_chunk_t *initack;
+ struct sctp_initack_chunk *initack;
- initack = (sctp_initack_chunk_t *)chunk->chunk_hdr;
+ initack = (struct sctp_initack_chunk *)chunk->chunk_hdr;
vtag = ntohl(initack->init_hdr.init_tag);
break;
}
@@ -6168,7 +6206,7 @@ static void sctp_send_stale_cookie_err(struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const struct sctp_chunk *chunk,
- sctp_cmd_seq_t *commands,
+ struct sctp_cmd_seq *commands,
struct sctp_chunk *err_chunk)
{
struct sctp_packet *packet;
@@ -6197,20 +6235,19 @@ static void sctp_send_stale_cookie_err(struct net *net,
/* Process a data chunk */
static int sctp_eat_data(const struct sctp_association *asoc,
struct sctp_chunk *chunk,
- sctp_cmd_seq_t *commands)
+ struct sctp_cmd_seq *commands)
{
- struct sctp_datahdr *data_hdr;
- struct sctp_chunk *err;
- size_t datalen;
- sctp_verb_t deliver;
- int tmp;
- __u32 tsn;
struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
struct sock *sk = asoc->base.sk;
struct net *net = sock_net(sk);
- u16 ssn;
- u16 sid;
+ struct sctp_datahdr *data_hdr;
+ struct sctp_chunk *err;
+ enum sctp_verb deliver;
+ size_t datalen;
u8 ordered = 0;
+ u16 ssn, sid;
+ __u32 tsn;
+ int tmp;
data_hdr = (struct sctp_datahdr *)chunk->skb->data;
chunk->subh.data_hdr = data_hdr;
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 3e958c1c4b95..79b6bee5b768 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -45,26 +45,27 @@
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES];
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES];
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES];
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
- enum sctp_cid cid,
- sctp_state_t state);
+static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
+ struct net *net,
+ enum sctp_cid cid,
+ enum sctp_state state);
-static const sctp_sm_table_entry_t bug = {
+static const struct sctp_sm_table_entry bug = {
.fn = sctp_sf_bug,
.name = "sctp_sf_bug"
};
#define DO_LOOKUP(_max, _type, _table) \
({ \
- const sctp_sm_table_entry_t *rtn; \
+ const struct sctp_sm_table_entry *rtn; \
\
if ((event_subtype._type > (_max))) { \
pr_warn("table %p possible attack: event %d exceeds max %d\n", \
@@ -76,10 +77,11 @@ static const sctp_sm_table_entry_t bug = {
rtn; \
})
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
- sctp_event_t event_type,
- sctp_state_t state,
- sctp_subtype_t event_subtype)
+const struct sctp_sm_table_entry *sctp_sm_lookup_event(
+ struct net *net,
+ enum sctp_event event_type,
+ enum sctp_state state,
+ union sctp_subtype event_subtype)
{
switch (event_type) {
case SCTP_EVENT_T_CHUNK:
@@ -392,7 +394,8 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
*
* For base protocol (RFC 2960).
*/
-static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_DATA,
TYPE_SCTP_INIT,
TYPE_SCTP_INIT_ACK,
@@ -451,7 +454,8 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][
/* The primary index for this table is the chunk type.
* The secondary index for this table is the state.
*/
-static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_ASCONF,
TYPE_SCTP_ASCONF_ACK,
}; /*state_fn_t addip_chunk_event_table[][] */
@@ -478,7 +482,8 @@ static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_
/* The primary index for this table is the chunk type.
* The secondary index for this table is the state.
*/
-static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_FWD_TSN,
}; /*state_fn_t prsctp_chunk_event_table[][] */
@@ -504,7 +509,8 @@ static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUN
/* The primary index for this table is the chunk type.
* The secondary index for this table is the state.
*/
-static const sctp_sm_table_entry_t reconf_chunk_event_table[SCTP_NUM_RECONF_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+reconf_chunk_event_table[SCTP_NUM_RECONF_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_RECONF,
}; /*state_fn_t reconf_chunk_event_table[][] */
@@ -530,11 +536,12 @@ static const sctp_sm_table_entry_t reconf_chunk_event_table[SCTP_NUM_RECONF_CHUN
/* The primary index for this table is the chunk type.
* The secondary index for this table is the state.
*/
-static const sctp_sm_table_entry_t auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_AUTH,
}; /*state_fn_t auth_chunk_event_table[][] */
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
/* SCTP_STATE_CLOSED */
TYPE_SCTP_FUNC(sctp_sf_ootb),
@@ -691,7 +698,8 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
/* The primary index for this table is the primitive type.
* The secondary index for this table is the state.
*/
-static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_PRIMITIVE_ASSOCIATE,
TYPE_SCTP_PRIMITIVE_SHUTDOWN,
TYPE_SCTP_PRIMITIVE_ABORT,
@@ -739,7 +747,8 @@ static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPE
TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
}
-static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_OTHER_NO_PENDING_TSN,
TYPE_SCTP_OTHER_ICMP_PROTO_UNREACH,
};
@@ -953,7 +962,8 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
}
-static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_EVENT_TIMEOUT_NONE,
TYPE_SCTP_EVENT_TIMEOUT_T1_COOKIE,
TYPE_SCTP_EVENT_TIMEOUT_T1_INIT,
@@ -967,9 +977,10 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S
TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
};
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
- enum sctp_cid cid,
- sctp_state_t state)
+static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
+ struct net *net,
+ enum sctp_cid cid,
+ enum sctp_state state)
{
if (state > SCTP_STATE_MAX)
return &bug;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 1db478e34520..3204a9b29407 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -79,12 +79,13 @@
#include <net/sock.h>
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
/* Forward declarations for internal helper functions. */
static int sctp_writeable(struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
-static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p,
- size_t msg_len);
+static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
+ size_t msg_len, struct sock **orig_sk);
static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -100,8 +101,9 @@ static int sctp_send_asconf(struct sctp_association *asoc,
struct sctp_chunk *chunk);
static int sctp_do_bind(struct sock *, union sctp_addr *, int);
static int sctp_autobind(struct sock *sk);
-static void sctp_sock_migrate(struct sock *, struct sock *,
- struct sctp_association *, sctp_socket_type_t);
+static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
+ struct sctp_association *assoc,
+ enum sctp_socket_type type);
static unsigned long sctp_memory_pressure;
static atomic_long_t sctp_memory_allocated;
@@ -169,6 +171,36 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
sk_mem_charge(sk, chunk->skb->truesize);
}
+static void sctp_clear_owner_w(struct sctp_chunk *chunk)
+{
+ skb_orphan(chunk->skb);
+}
+
+static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
+ void (*cb)(struct sctp_chunk *))
+
+{
+ struct sctp_outq *q = &asoc->outqueue;
+ struct sctp_transport *t;
+ struct sctp_chunk *chunk;
+
+ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports)
+ list_for_each_entry(chunk, &t->transmitted, transmitted_list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->retransmit, list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->sacked, list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->abandoned, list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->out_chunk_list, list)
+ cb(chunk);
+}
+
/* Verify that this is a valid address. */
static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
int len)
@@ -1055,7 +1087,7 @@ static int __sctp_connect(struct sock *sk,
struct sctp_association *asoc2;
struct sctp_transport *transport;
union sctp_addr to;
- sctp_scope_t scope;
+ enum sctp_scope scope;
long timeo;
int err = 0;
int addrcnt = 0;
@@ -1593,7 +1625,8 @@ static int sctp_error(struct sock *sk, int flags, int err)
*/
/* BUG: We do not implement the equivalent of sk_stream_wait_memory(). */
-static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *);
+static int sctp_msghdr_parse(const struct msghdr *msg,
+ struct sctp_cmsgs *cmsgs);
static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
{
@@ -1609,8 +1642,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
struct sctp_sndrcvinfo *sinfo;
struct sctp_initmsg *sinit;
sctp_assoc_t associd = 0;
- sctp_cmsgs_t cmsgs = { NULL };
- sctp_scope_t scope;
+ struct sctp_cmsgs cmsgs = { NULL };
+ enum sctp_scope scope;
bool fill_sinfo_ttl = false, wait_connect = false;
struct sctp_datamsg *datamsg;
int msg_flags = msg->msg_flags;
@@ -1925,14 +1958,28 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
goto out_free;
}
+ /* Allocate sctp_stream_out_ext if not already done */
+ if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
+ err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
+ if (err)
+ goto out_free;
+ }
+
if (sctp_wspace(asoc) < msg_len)
sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if (!sctp_wspace(asoc)) {
- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
- if (err)
+ /* sk can be changed by peel off when waiting for buf. */
+ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
+ if (err) {
+ if (err == -ESRCH) {
+ /* asoc is already dead. */
+ new_asoc = NULL;
+ err = -EPIPE;
+ }
goto out_free;
+ }
}
/* If an address is passed with the sendto/sendmsg call, it is used
@@ -3093,9 +3140,9 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign
*/
static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
{
+ struct sctp_sock *sp = sctp_sk(sk);
struct sctp_assoc_value params;
struct sctp_association *asoc;
- struct sctp_sock *sp = sctp_sk(sk);
int val;
if (optlen == sizeof(int)) {
@@ -3111,26 +3158,35 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
if (copy_from_user(&params, optval, optlen))
return -EFAULT;
val = params.assoc_value;
- } else
+ } else {
return -EINVAL;
+ }
- if ((val != 0) && ((val < 8) || (val > SCTP_MAX_CHUNK_LEN)))
- return -EINVAL;
+ if (val) {
+ int min_len, max_len;
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id && sctp_style(sk, UDP))
- return -EINVAL;
+ min_len = SCTP_DEFAULT_MINSEGMENT - sp->pf->af->net_header_len;
+ min_len -= sizeof(struct sctphdr) +
+ sizeof(struct sctp_data_chunk);
+
+ max_len = SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_data_chunk);
+
+ if (val < min_len || val > max_len)
+ return -EINVAL;
+ }
+ asoc = sctp_id2assoc(sk, params.assoc_id);
if (asoc) {
if (val == 0) {
- val = asoc->pathmtu;
- val -= sp->pf->af->net_header_len;
+ val = asoc->pathmtu - sp->pf->af->net_header_len;
val -= sizeof(struct sctphdr) +
- sizeof(struct sctp_data_chunk);
+ sizeof(struct sctp_data_chunk);
}
asoc->user_frag = val;
asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
} else {
+ if (params.assoc_id && sctp_style(sk, UDP))
+ return -EINVAL;
sp->user_frag = val;
}
@@ -3905,6 +3961,64 @@ out:
return retval;
}
+static int sctp_setsockopt_scheduler(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_association *asoc;
+ struct sctp_assoc_value params;
+ int retval = -EINVAL;
+
+ if (optlen < sizeof(params))
+ goto out;
+
+ optlen = sizeof(params);
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (params.assoc_value > SCTP_SS_MAX)
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc)
+ goto out;
+
+ retval = sctp_sched_set_sched(asoc, params.assoc_value);
+
+out:
+ return retval;
+}
+
+static int sctp_setsockopt_scheduler_value(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_association *asoc;
+ struct sctp_stream_value params;
+ int retval = -EINVAL;
+
+ if (optlen < sizeof(params))
+ goto out;
+
+ optlen = sizeof(params);
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc)
+ goto out;
+
+ retval = sctp_sched_set_value(asoc, params.stream_id,
+ params.stream_value, GFP_KERNEL);
+
+out:
+ return retval;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4086,6 +4200,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_ADD_STREAMS:
retval = sctp_setsockopt_add_streams(sk, optval, optlen);
break;
+ case SCTP_STREAM_SCHEDULER:
+ retval = sctp_setsockopt_scheduler(sk, optval, optlen);
+ break;
+ case SCTP_STREAM_SCHEDULER_VALUE:
+ retval = sctp_setsockopt_scheduler_value(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -4538,8 +4658,7 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
info->sctpi_ictrlchunks = asoc->stats.ictrlchunks;
prim = asoc->peer.primary_path;
- memcpy(&info->sctpi_p_address, &prim->ipaddr,
- sizeof(struct sockaddr_storage));
+ memcpy(&info->sctpi_p_address, &prim->ipaddr, sizeof(prim->ipaddr));
info->sctpi_p_state = prim->state;
info->sctpi_p_cwnd = prim->cwnd;
info->sctpi_p_srtt = prim->srtt;
@@ -4657,29 +4776,39 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
- struct net *net, int pos, void *p) {
+ int (*cb_done)(struct sctp_transport *, void *),
+ struct net *net, int *pos, void *p) {
struct rhashtable_iter hti;
- void *obj;
- int err;
-
- err = sctp_transport_walk_start(&hti);
- if (err)
- return err;
+ struct sctp_transport *tsp;
+ int ret;
- obj = sctp_transport_get_idx(net, &hti, pos + 1);
- for (; !IS_ERR_OR_NULL(obj); obj = sctp_transport_get_next(net, &hti)) {
- struct sctp_transport *transport = obj;
+again:
+ ret = sctp_transport_walk_start(&hti);
+ if (ret)
+ return ret;
- if (!sctp_transport_hold(transport))
+ tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
+ for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
+ if (!sctp_transport_hold(tsp))
continue;
- err = cb(transport, p);
- sctp_transport_put(transport);
- if (err)
+ ret = cb(tsp, p);
+ if (ret)
break;
+ (*pos)++;
+ sctp_transport_put(tsp);
}
sctp_transport_walk_stop(&hti);
- return err;
+ if (ret) {
+ if (cb_done && !cb_done(tsp, p)) {
+ (*pos)++;
+ sctp_transport_put(tsp);
+ goto again;
+ }
+ sctp_transport_put(tsp);
+ }
+
+ return ret;
}
EXPORT_SYMBOL_GPL(sctp_for_each_transport);
@@ -4895,14 +5024,12 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
struct socket *sock;
int err = 0;
- if (!asoc)
+ /* Do not peel off from one netns to another one. */
+ if (!net_eq(current->nsproxy->net_ns, sock_net(sk)))
return -EINVAL;
- /* If there is a thread waiting on more sndbuf space for
- * sending on this asoc, it cannot be peeled.
- */
- if (waitqueue_active(&asoc->wait))
- return -EBUSY;
+ if (!asoc)
+ return -EINVAL;
/* An association cannot be branched off from an already peeled-off
* socket, nor is this supported for tcp style sockets.
@@ -6634,7 +6761,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
char __user *optval,
int __user *optlen)
{
- struct sctp_stream_out *streamout;
+ struct sctp_stream_out_ext *streamoute;
struct sctp_association *asoc;
struct sctp_prstatus params;
int retval = -EINVAL;
@@ -6657,21 +6784,29 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
if (!asoc || params.sprstat_sid >= asoc->stream.outcnt)
goto out;
- streamout = &asoc->stream.out[params.sprstat_sid];
+ streamoute = asoc->stream.out[params.sprstat_sid].ext;
+ if (!streamoute) {
+ /* Not allocated yet, means all stats are 0 */
+ params.sprstat_abandoned_unsent = 0;
+ params.sprstat_abandoned_sent = 0;
+ retval = 0;
+ goto out;
+ }
+
if (policy == SCTP_PR_SCTP_NONE) {
params.sprstat_abandoned_unsent = 0;
params.sprstat_abandoned_sent = 0;
for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
params.sprstat_abandoned_unsent +=
- streamout->abandoned_unsent[policy];
+ streamoute->abandoned_unsent[policy];
params.sprstat_abandoned_sent +=
- streamout->abandoned_sent[policy];
+ streamoute->abandoned_sent[policy];
}
} else {
params.sprstat_abandoned_unsent =
- streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)];
+ streamoute->abandoned_unsent[__SCTP_PR_INDEX(policy)];
params.sprstat_abandoned_sent =
- streamout->abandoned_sent[__SCTP_PR_INDEX(policy)];
+ streamoute->abandoned_sent[__SCTP_PR_INDEX(policy)];
}
if (put_user(len, optlen) || copy_to_user(optval, &params, len)) {
@@ -6767,6 +6902,85 @@ out:
return retval;
}
+static int sctp_getsockopt_scheduler(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ params.assoc_value = sctp_sched_get_sched(asoc);
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_getsockopt_scheduler_value(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_stream_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ retval = sctp_sched_get_value(asoc, params.stream_id,
+ &params.stream_value);
+ if (retval)
+ goto out;
+
+ if (put_user(len, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (copy_to_user(optval, &params, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -6949,6 +7163,14 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
retval = sctp_getsockopt_enable_strreset(sk, len, optval,
optlen);
break;
+ case SCTP_STREAM_SCHEDULER:
+ retval = sctp_getsockopt_scheduler(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_STREAM_SCHEDULER_VALUE:
+ retval = sctp_getsockopt_scheduler_value(sk, len, optval,
+ optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -7445,10 +7667,10 @@ static int sctp_autobind(struct sock *sk)
* msg_control
* points here
*/
-static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
+static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
{
- struct cmsghdr *cmsg;
struct msghdr *my_msg = (struct msghdr *)msg;
+ struct cmsghdr *cmsg;
for_each_cmsghdr(cmsg, my_msg) {
if (!CMSG_OK(my_msg, cmsg))
@@ -7777,7 +7999,7 @@ void sctp_sock_rfree(struct sk_buff *skb)
/* Helper function to wait for space in the sndbuf. */
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len)
+ size_t msg_len, struct sock **orig_sk)
{
struct sock *sk = asoc->base.sk;
int err = 0;
@@ -7794,10 +8016,11 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
for (;;) {
prepare_to_wait_exclusive(&asoc->wait, &wait,
TASK_INTERRUPTIBLE);
+ if (asoc->base.dead)
+ goto do_dead;
if (!*timeo_p)
goto do_nonblock;
- if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING ||
- asoc->base.dead)
+ if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING)
goto do_error;
if (signal_pending(current))
goto do_interrupted;
@@ -7810,11 +8033,17 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
release_sock(sk);
current_timeo = schedule_timeout(current_timeo);
lock_sock(sk);
+ if (sk != asoc->base.sk) {
+ release_sock(sk);
+ sk = asoc->base.sk;
+ lock_sock(sk);
+ }
*timeo_p = current_timeo;
}
out:
+ *orig_sk = sk;
finish_wait(&asoc->wait, &wait);
/* Release the association's refcnt. */
@@ -7822,6 +8051,10 @@ out:
return err;
+do_dead:
+ err = -ESRCH;
+ goto out;
+
do_error:
err = -EPIPE;
goto out;
@@ -8085,7 +8318,7 @@ static inline void sctp_copy_descendant(struct sock *sk_to,
*/
static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
struct sctp_association *assoc,
- sctp_socket_type_t type)
+ enum sctp_socket_type type)
{
struct sctp_sock *oldsp = sctp_sk(oldsk);
struct sctp_sock *newsp = sctp_sk(newsk);
@@ -8197,7 +8430,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
* paths won't try to lock it and then oldsk.
*/
lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
+ sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w);
sctp_assoc_migrate(assoc, newsk);
+ sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w);
/* If the association on the newsk is already closed before accept()
* is called, set RCV_SHUTDOWN flag.
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 63ea15503714..a11db21dc8a0 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -32,44 +32,181 @@
* Xin Long <lucien.xin@gmail.com>
*/
+#include <linux/list.h>
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* Migrates chunks from stream queues to new stream queues if needed,
+ * but not across associations. Also, removes those chunks to streams
+ * higher than the new max.
+ */
+static void sctp_stream_outq_migrate(struct sctp_stream *stream,
+ struct sctp_stream *new, __u16 outcnt)
+{
+ struct sctp_association *asoc;
+ struct sctp_chunk *ch, *temp;
+ struct sctp_outq *outq;
+ int i;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+ outq = &asoc->outqueue;
+
+ list_for_each_entry_safe(ch, temp, &outq->out_chunk_list, list) {
+ __u16 sid = sctp_chunk_stream_no(ch);
+
+ if (sid < outcnt)
+ continue;
+
+ sctp_sched_dequeue_common(outq, ch);
+ /* No need to call dequeue_done here because
+ * the chunks are not scheduled by now.
+ */
+
+ /* Mark as failed send. */
+ sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM);
+ if (asoc->peer.prsctp_capable &&
+ SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags))
+ asoc->sent_cnt_removable--;
+
+ sctp_chunk_free(ch);
+ }
+
+ if (new) {
+ /* Here we actually move the old ext stuff into the new
+ * buffer, because we want to keep it. Then
+ * sctp_stream_update will swap ->out pointers.
+ */
+ for (i = 0; i < outcnt; i++) {
+ kfree(new->out[i].ext);
+ new->out[i].ext = stream->out[i].ext;
+ stream->out[i].ext = NULL;
+ }
+ }
+
+ for (i = outcnt; i < stream->outcnt; i++)
+ kfree(stream->out[i].ext);
+}
+
+static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
+ gfp_t gfp)
+{
+ struct sctp_stream_out *out;
+
+ out = kmalloc_array(outcnt, sizeof(*out), gfp);
+ if (!out)
+ return -ENOMEM;
+
+ if (stream->out) {
+ memcpy(out, stream->out, min(outcnt, stream->outcnt) *
+ sizeof(*out));
+ kfree(stream->out);
+ }
+
+ if (outcnt > stream->outcnt)
+ memset(out + stream->outcnt, 0,
+ (outcnt - stream->outcnt) * sizeof(*out));
+
+ stream->out = out;
+
+ return 0;
+}
+
+static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt,
+ gfp_t gfp)
+{
+ struct sctp_stream_in *in;
+
+ in = kmalloc_array(incnt, sizeof(*stream->in), gfp);
+
+ if (!in)
+ return -ENOMEM;
+
+ if (stream->in) {
+ memcpy(in, stream->in, min(incnt, stream->incnt) *
+ sizeof(*in));
+ kfree(stream->in);
+ }
+
+ if (incnt > stream->incnt)
+ memset(in + stream->incnt, 0,
+ (incnt - stream->incnt) * sizeof(*in));
+
+ stream->in = in;
+
+ return 0;
+}
int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
gfp_t gfp)
{
- int i;
+ struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+ int i, ret = 0;
+
+ gfp |= __GFP_NOWARN;
/* Initial stream->out size may be very big, so free it and alloc
- * a new one with new outcnt to save memory.
+ * a new one with new outcnt to save memory if needed.
*/
- kfree(stream->out);
+ if (outcnt == stream->outcnt)
+ goto in;
- stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp);
- if (!stream->out)
- return -ENOMEM;
+ /* Filter out chunks queued on streams that won't exist anymore */
+ sched->unsched_all(stream);
+ sctp_stream_outq_migrate(stream, NULL, outcnt);
+ sched->sched_all(stream);
+
+ i = sctp_stream_alloc_out(stream, outcnt, gfp);
+ if (i)
+ return i;
stream->outcnt = outcnt;
for (i = 0; i < stream->outcnt; i++)
stream->out[i].state = SCTP_STREAM_OPEN;
+ sched->init(stream);
+
+in:
if (!incnt)
- return 0;
+ goto out;
- stream->in = kcalloc(incnt, sizeof(*stream->in), gfp);
- if (!stream->in) {
- kfree(stream->out);
- stream->out = NULL;
- return -ENOMEM;
+ i = sctp_stream_alloc_in(stream, incnt, gfp);
+ if (i) {
+ ret = -ENOMEM;
+ goto free;
}
stream->incnt = incnt;
+ goto out;
- return 0;
+free:
+ sched->free(stream);
+ kfree(stream->out);
+ stream->out = NULL;
+out:
+ return ret;
+}
+
+int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
+{
+ struct sctp_stream_out_ext *soute;
+
+ soute = kzalloc(sizeof(*soute), GFP_KERNEL);
+ if (!soute)
+ return -ENOMEM;
+ stream->out[sid].ext = soute;
+
+ return sctp_sched_init_sid(stream, sid, GFP_KERNEL);
}
void sctp_stream_free(struct sctp_stream *stream)
{
+ struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+ int i;
+
+ sched->free(stream);
+ for (i = 0; i < stream->outcnt; i++)
+ kfree(stream->out[i].ext);
kfree(stream->out);
kfree(stream->in);
}
@@ -87,6 +224,10 @@ void sctp_stream_clear(struct sctp_stream *stream)
void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
{
+ struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+
+ sched->unsched_all(stream);
+ sctp_stream_outq_migrate(stream, new, new->outcnt);
sctp_stream_free(stream);
stream->out = new->out;
@@ -94,6 +235,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
stream->outcnt = new->outcnt;
stream->incnt = new->incnt;
+ sched->sched_all(stream);
+
new->out = NULL;
new->in = NULL;
}
@@ -118,6 +261,7 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
__u16 i, str_nums, *str_list;
struct sctp_chunk *chunk;
int retval = -EINVAL;
+ __be16 *nstr_list;
bool out, in;
if (!asoc->peer.reconf_capable ||
@@ -138,23 +282,44 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
str_nums = params->srs_number_streams;
str_list = params->srs_stream_list;
- if (out && str_nums)
- for (i = 0; i < str_nums; i++)
- if (str_list[i] >= stream->outcnt)
- goto out;
+ if (str_nums) {
+ int param_len = 0;
- if (in && str_nums)
- for (i = 0; i < str_nums; i++)
- if (str_list[i] >= stream->incnt)
- goto out;
+ if (out) {
+ for (i = 0; i < str_nums; i++)
+ if (str_list[i] >= stream->outcnt)
+ goto out;
- for (i = 0; i < str_nums; i++)
- str_list[i] = htons(str_list[i]);
+ param_len = str_nums * sizeof(__u16) +
+ sizeof(struct sctp_strreset_outreq);
+ }
+
+ if (in) {
+ for (i = 0; i < str_nums; i++)
+ if (str_list[i] >= stream->incnt)
+ goto out;
- chunk = sctp_make_strreset_req(asoc, str_nums, str_list, out, in);
+ param_len += str_nums * sizeof(__u16) +
+ sizeof(struct sctp_strreset_inreq);
+ }
+
+ if (param_len > SCTP_MAX_CHUNK_LEN -
+ sizeof(struct sctp_reconf_chunk))
+ goto out;
+ }
+
+ nstr_list = kcalloc(str_nums, sizeof(__be16), GFP_KERNEL);
+ if (!nstr_list) {
+ retval = -ENOMEM;
+ goto out;
+ }
for (i = 0; i < str_nums; i++)
- str_list[i] = ntohs(str_list[i]);
+ nstr_list[i] = htons(str_list[i]);
+
+ chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in);
+
+ kfree(nstr_list);
if (!chunk) {
retval = -ENOMEM;
@@ -244,7 +409,7 @@ int sctp_send_add_streams(struct sctp_association *asoc,
{
struct sctp_stream *stream = &asoc->stream;
struct sctp_chunk *chunk = NULL;
- int retval = -ENOMEM;
+ int retval;
__u32 outcnt, incnt;
__u16 out, in;
@@ -270,20 +435,16 @@ int sctp_send_add_streams(struct sctp_association *asoc,
}
if (out) {
- struct sctp_stream_out *streamout;
-
- streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
- GFP_KERNEL);
- if (!streamout)
+ retval = sctp_stream_alloc_out(stream, outcnt, GFP_KERNEL);
+ if (retval)
goto out;
-
- memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
- stream->out = streamout;
}
chunk = sctp_make_strreset_addstrm(asoc, out, in);
- if (!chunk)
+ if (!chunk) {
+ retval = -ENOMEM;
goto out;
+ }
asoc->strreset_chunk = chunk;
sctp_chunk_hold(asoc->strreset_chunk);
@@ -305,7 +466,7 @@ out:
}
static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param(
- struct sctp_association *asoc, __u32 resp_seq,
+ struct sctp_association *asoc, __be32 resp_seq,
__be16 type)
{
struct sctp_chunk *chunk = asoc->strreset_chunk;
@@ -345,8 +506,9 @@ struct sctp_chunk *sctp_process_strreset_outreq(
{
struct sctp_strreset_outreq *outreq = param.v;
struct sctp_stream *stream = &asoc->stream;
- __u16 i, nums, flags = 0, *str_p = NULL;
__u32 result = SCTP_STRRESET_DENIED;
+ __u16 i, nums, flags = 0;
+ __be16 *str_p = NULL;
__u32 request_seq;
request_seq = ntohl(outreq->request_seq);
@@ -439,8 +601,9 @@ struct sctp_chunk *sctp_process_strreset_inreq(
struct sctp_stream *stream = &asoc->stream;
__u32 result = SCTP_STRRESET_DENIED;
struct sctp_chunk *chunk = NULL;
- __u16 i, nums, *str_p;
__u32 request_seq;
+ __u16 i, nums;
+ __be16 *str_p;
request_seq = ntohl(inreq->request_seq);
if (TSN_lt(asoc->strreset_inseq, request_seq) ||
@@ -601,7 +764,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
struct sctp_strreset_addstrm *addstrm = param.v;
struct sctp_stream *stream = &asoc->stream;
__u32 result = SCTP_STRRESET_DENIED;
- struct sctp_stream_in *streamin;
__u32 request_seq, incnt;
__u16 in, i;
@@ -648,13 +810,9 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
if (!in || incnt > SCTP_MAX_STREAM)
goto out;
- streamin = krealloc(stream->in, incnt * sizeof(*streamin),
- GFP_ATOMIC);
- if (!streamin)
+ if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC))
goto out;
- memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
- stream->in = streamin;
stream->incnt = incnt;
result = SCTP_STRRESET_PERFORMED;
@@ -676,10 +834,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
struct sctp_strreset_addstrm *addstrm = param.v;
struct sctp_stream *stream = &asoc->stream;
__u32 result = SCTP_STRRESET_DENIED;
- struct sctp_stream_out *streamout;
struct sctp_chunk *chunk = NULL;
__u32 request_seq, outcnt;
__u16 out, i;
+ int ret;
request_seq = ntohl(addstrm->request_seq);
if (TSN_lt(asoc->strreset_inseq, request_seq) ||
@@ -708,14 +866,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
if (!out || outcnt > SCTP_MAX_STREAM)
goto out;
- streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
- GFP_ATOMIC);
- if (!streamout)
+ ret = sctp_stream_alloc_out(stream, outcnt, GFP_ATOMIC);
+ if (ret)
goto out;
- memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
- stream->out = streamout;
-
chunk = sctp_make_strreset_addstrm(asoc, out, 0);
if (!chunk)
goto out;
@@ -769,7 +923,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) {
struct sctp_strreset_outreq *outreq;
- __u16 *str_p;
+ __be16 *str_p;
outreq = (struct sctp_strreset_outreq *)req;
str_p = outreq->list_of_streams;
@@ -794,7 +948,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
nums, str_p, GFP_ATOMIC);
} else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) {
struct sctp_strreset_inreq *inreq;
- __u16 *str_p;
+ __be16 *str_p;
/* if the result is performed, it's impossible for inreq */
if (result == SCTP_STRRESET_PERFORMED)
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
new file mode 100644
index 000000000000..0b83ec51e43b
--- /dev/null
+++ b/net/sctp/stream_sched.c
@@ -0,0 +1,275 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#include <linux/list.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* First Come First Serve (a.k.a. FIFO)
+ * RFC DRAFT ndata Section 3.1
+ */
+static int sctp_sched_fcfs_set(struct sctp_stream *stream, __u16 sid,
+ __u16 value, gfp_t gfp)
+{
+ return 0;
+}
+
+static int sctp_sched_fcfs_get(struct sctp_stream *stream, __u16 sid,
+ __u16 *value)
+{
+ *value = 0;
+ return 0;
+}
+
+static int sctp_sched_fcfs_init(struct sctp_stream *stream)
+{
+ return 0;
+}
+
+static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid,
+ gfp_t gfp)
+{
+ return 0;
+}
+
+static void sctp_sched_fcfs_free(struct sctp_stream *stream)
+{
+}
+
+static void sctp_sched_fcfs_enqueue(struct sctp_outq *q,
+ struct sctp_datamsg *msg)
+{
+}
+
+static struct sctp_chunk *sctp_sched_fcfs_dequeue(struct sctp_outq *q)
+{
+ struct sctp_stream *stream = &q->asoc->stream;
+ struct sctp_chunk *ch = NULL;
+ struct list_head *entry;
+
+ if (list_empty(&q->out_chunk_list))
+ goto out;
+
+ if (stream->out_curr) {
+ ch = list_entry(stream->out_curr->ext->outq.next,
+ struct sctp_chunk, stream_list);
+ } else {
+ entry = q->out_chunk_list.next;
+ ch = list_entry(entry, struct sctp_chunk, list);
+ }
+
+ sctp_sched_dequeue_common(q, ch);
+
+out:
+ return ch;
+}
+
+static void sctp_sched_fcfs_dequeue_done(struct sctp_outq *q,
+ struct sctp_chunk *chunk)
+{
+}
+
+static void sctp_sched_fcfs_sched_all(struct sctp_stream *stream)
+{
+}
+
+static void sctp_sched_fcfs_unsched_all(struct sctp_stream *stream)
+{
+}
+
+static struct sctp_sched_ops sctp_sched_fcfs = {
+ .set = sctp_sched_fcfs_set,
+ .get = sctp_sched_fcfs_get,
+ .init = sctp_sched_fcfs_init,
+ .init_sid = sctp_sched_fcfs_init_sid,
+ .free = sctp_sched_fcfs_free,
+ .enqueue = sctp_sched_fcfs_enqueue,
+ .dequeue = sctp_sched_fcfs_dequeue,
+ .dequeue_done = sctp_sched_fcfs_dequeue_done,
+ .sched_all = sctp_sched_fcfs_sched_all,
+ .unsched_all = sctp_sched_fcfs_unsched_all,
+};
+
+/* API to other parts of the stack */
+
+extern struct sctp_sched_ops sctp_sched_prio;
+extern struct sctp_sched_ops sctp_sched_rr;
+
+static struct sctp_sched_ops *sctp_sched_ops[] = {
+ &sctp_sched_fcfs,
+ &sctp_sched_prio,
+ &sctp_sched_rr,
+};
+
+int sctp_sched_set_sched(struct sctp_association *asoc,
+ enum sctp_sched_type sched)
+{
+ struct sctp_sched_ops *n = sctp_sched_ops[sched];
+ struct sctp_sched_ops *old = asoc->outqueue.sched;
+ struct sctp_datamsg *msg = NULL;
+ struct sctp_chunk *ch;
+ int i, ret = 0;
+
+ if (old == n)
+ return ret;
+
+ if (sched > SCTP_SS_MAX)
+ return -EINVAL;
+
+ if (old) {
+ old->free(&asoc->stream);
+
+ /* Give the next scheduler a clean slate. */
+ for (i = 0; i < asoc->stream.outcnt; i++) {
+ void *p = asoc->stream.out[i].ext;
+
+ if (!p)
+ continue;
+
+ p += offsetofend(struct sctp_stream_out_ext, outq);
+ memset(p, 0, sizeof(struct sctp_stream_out_ext) -
+ offsetofend(struct sctp_stream_out_ext, outq));
+ }
+ }
+
+ asoc->outqueue.sched = n;
+ n->init(&asoc->stream);
+ for (i = 0; i < asoc->stream.outcnt; i++) {
+ if (!asoc->stream.out[i].ext)
+ continue;
+
+ ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
+ if (ret)
+ goto err;
+ }
+
+ /* We have to requeue all chunks already queued. */
+ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
+ if (ch->msg == msg)
+ continue;
+ msg = ch->msg;
+ n->enqueue(&asoc->outqueue, msg);
+ }
+
+ return ret;
+
+err:
+ n->free(&asoc->stream);
+ asoc->outqueue.sched = &sctp_sched_fcfs; /* Always safe */
+
+ return ret;
+}
+
+int sctp_sched_get_sched(struct sctp_association *asoc)
+{
+ int i;
+
+ for (i = 0; i <= SCTP_SS_MAX; i++)
+ if (asoc->outqueue.sched == sctp_sched_ops[i])
+ return i;
+
+ return 0;
+}
+
+int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid,
+ __u16 value, gfp_t gfp)
+{
+ if (sid >= asoc->stream.outcnt)
+ return -EINVAL;
+
+ if (!asoc->stream.out[sid].ext) {
+ int ret;
+
+ ret = sctp_stream_init_ext(&asoc->stream, sid);
+ if (ret)
+ return ret;
+ }
+
+ return asoc->outqueue.sched->set(&asoc->stream, sid, value, gfp);
+}
+
+int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid,
+ __u16 *value)
+{
+ if (sid >= asoc->stream.outcnt)
+ return -EINVAL;
+
+ if (!asoc->stream.out[sid].ext)
+ return 0;
+
+ return asoc->outqueue.sched->get(&asoc->stream, sid, value);
+}
+
+void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch)
+{
+ if (!list_is_last(&ch->frag_list, &ch->msg->chunks)) {
+ struct sctp_stream_out *sout;
+ __u16 sid;
+
+ /* datamsg is not finish, so save it as current one,
+ * in case application switch scheduler or a higher
+ * priority stream comes in.
+ */
+ sid = sctp_chunk_stream_no(ch);
+ sout = &q->asoc->stream.out[sid];
+ q->asoc->stream.out_curr = sout;
+ return;
+ }
+
+ q->asoc->stream.out_curr = NULL;
+ q->sched->dequeue_done(q, ch);
+}
+
+/* Auxiliary functions for the schedulers */
+void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch)
+{
+ list_del_init(&ch->list);
+ list_del_init(&ch->stream_list);
+ q->out_qlen -= ch->skb->len;
+}
+
+int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp)
+{
+ struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+
+ INIT_LIST_HEAD(&stream->out[sid].ext->outq);
+ return sched->init_sid(stream, sid, gfp);
+}
+
+struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream)
+{
+ struct sctp_association *asoc;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+
+ return asoc->outqueue.sched;
+}
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
new file mode 100644
index 000000000000..384dbf3c8760
--- /dev/null
+++ b/net/sctp/stream_sched_prio.c
@@ -0,0 +1,347 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#include <linux/list.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* Priority handling
+ * RFC DRAFT ndata section 3.4
+ */
+
+static void sctp_sched_prio_unsched_all(struct sctp_stream *stream);
+
+static struct sctp_stream_priorities *sctp_sched_prio_new_head(
+ struct sctp_stream *stream, int prio, gfp_t gfp)
+{
+ struct sctp_stream_priorities *p;
+
+ p = kmalloc(sizeof(*p), gfp);
+ if (!p)
+ return NULL;
+
+ INIT_LIST_HEAD(&p->prio_sched);
+ INIT_LIST_HEAD(&p->active);
+ p->next = NULL;
+ p->prio = prio;
+
+ return p;
+}
+
+static struct sctp_stream_priorities *sctp_sched_prio_get_head(
+ struct sctp_stream *stream, int prio, gfp_t gfp)
+{
+ struct sctp_stream_priorities *p;
+ int i;
+
+ /* Look into scheduled priorities first, as they are sorted and
+ * we can find it fast IF it's scheduled.
+ */
+ list_for_each_entry(p, &stream->prio_list, prio_sched) {
+ if (p->prio == prio)
+ return p;
+ if (p->prio > prio)
+ break;
+ }
+
+ /* No luck. So we search on all streams now. */
+ for (i = 0; i < stream->outcnt; i++) {
+ if (!stream->out[i].ext)
+ continue;
+
+ p = stream->out[i].ext->prio_head;
+ if (!p)
+ /* Means all other streams won't be initialized
+ * as well.
+ */
+ break;
+ if (p->prio == prio)
+ return p;
+ }
+
+ /* If not even there, allocate a new one. */
+ return sctp_sched_prio_new_head(stream, prio, gfp);
+}
+
+static void sctp_sched_prio_next_stream(struct sctp_stream_priorities *p)
+{
+ struct list_head *pos;
+
+ pos = p->next->prio_list.next;
+ if (pos == &p->active)
+ pos = pos->next;
+ p->next = list_entry(pos, struct sctp_stream_out_ext, prio_list);
+}
+
+static bool sctp_sched_prio_unsched(struct sctp_stream_out_ext *soute)
+{
+ bool scheduled = false;
+
+ if (!list_empty(&soute->prio_list)) {
+ struct sctp_stream_priorities *prio_head = soute->prio_head;
+
+ /* Scheduled */
+ scheduled = true;
+
+ if (prio_head->next == soute)
+ /* Try to move to the next stream */
+ sctp_sched_prio_next_stream(prio_head);
+
+ list_del_init(&soute->prio_list);
+
+ /* Also unsched the priority if this was the last stream */
+ if (list_empty(&prio_head->active)) {
+ list_del_init(&prio_head->prio_sched);
+ /* If there is no stream left, clear next */
+ prio_head->next = NULL;
+ }
+ }
+
+ return scheduled;
+}
+
+static void sctp_sched_prio_sched(struct sctp_stream *stream,
+ struct sctp_stream_out_ext *soute)
+{
+ struct sctp_stream_priorities *prio, *prio_head;
+
+ prio_head = soute->prio_head;
+
+ /* Nothing to do if already scheduled */
+ if (!list_empty(&soute->prio_list))
+ return;
+
+ /* Schedule the stream. If there is a next, we schedule the new
+ * one before it, so it's the last in round robin order.
+ * If there isn't, we also have to schedule the priority.
+ */
+ if (prio_head->next) {
+ list_add(&soute->prio_list, prio_head->next->prio_list.prev);
+ return;
+ }
+
+ list_add(&soute->prio_list, &prio_head->active);
+ prio_head->next = soute;
+
+ list_for_each_entry(prio, &stream->prio_list, prio_sched) {
+ if (prio->prio > prio_head->prio) {
+ list_add(&prio_head->prio_sched, prio->prio_sched.prev);
+ return;
+ }
+ }
+
+ list_add_tail(&prio_head->prio_sched, &stream->prio_list);
+}
+
+static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid,
+ __u16 prio, gfp_t gfp)
+{
+ struct sctp_stream_out *sout = &stream->out[sid];
+ struct sctp_stream_out_ext *soute = sout->ext;
+ struct sctp_stream_priorities *prio_head, *old;
+ bool reschedule = false;
+ int i;
+
+ prio_head = sctp_sched_prio_get_head(stream, prio, gfp);
+ if (!prio_head)
+ return -ENOMEM;
+
+ reschedule = sctp_sched_prio_unsched(soute);
+ old = soute->prio_head;
+ soute->prio_head = prio_head;
+ if (reschedule)
+ sctp_sched_prio_sched(stream, soute);
+
+ if (!old)
+ /* Happens when we set the priority for the first time */
+ return 0;
+
+ for (i = 0; i < stream->outcnt; i++) {
+ soute = stream->out[i].ext;
+ if (soute && soute->prio_head == old)
+ /* It's still in use, nothing else to do here. */
+ return 0;
+ }
+
+ /* No hits, we are good to free it. */
+ kfree(old);
+
+ return 0;
+}
+
+static int sctp_sched_prio_get(struct sctp_stream *stream, __u16 sid,
+ __u16 *value)
+{
+ *value = stream->out[sid].ext->prio_head->prio;
+ return 0;
+}
+
+static int sctp_sched_prio_init(struct sctp_stream *stream)
+{
+ INIT_LIST_HEAD(&stream->prio_list);
+
+ return 0;
+}
+
+static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid,
+ gfp_t gfp)
+{
+ INIT_LIST_HEAD(&stream->out[sid].ext->prio_list);
+ return sctp_sched_prio_set(stream, sid, 0, gfp);
+}
+
+static void sctp_sched_prio_free(struct sctp_stream *stream)
+{
+ struct sctp_stream_priorities *prio, *n;
+ LIST_HEAD(list);
+ int i;
+
+ /* As we don't keep a list of priorities, to avoid multiple
+ * frees we have to do it in 3 steps:
+ * 1. unsched everyone, so the lists are free to use in 2.
+ * 2. build the list of the priorities
+ * 3. free the list
+ */
+ sctp_sched_prio_unsched_all(stream);
+ for (i = 0; i < stream->outcnt; i++) {
+ if (!stream->out[i].ext)
+ continue;
+ prio = stream->out[i].ext->prio_head;
+ if (prio && list_empty(&prio->prio_sched))
+ list_add(&prio->prio_sched, &list);
+ }
+ list_for_each_entry_safe(prio, n, &list, prio_sched) {
+ list_del_init(&prio->prio_sched);
+ kfree(prio);
+ }
+}
+
+static void sctp_sched_prio_enqueue(struct sctp_outq *q,
+ struct sctp_datamsg *msg)
+{
+ struct sctp_stream *stream;
+ struct sctp_chunk *ch;
+ __u16 sid;
+
+ ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
+ sid = sctp_chunk_stream_no(ch);
+ stream = &q->asoc->stream;
+ sctp_sched_prio_sched(stream, stream->out[sid].ext);
+}
+
+static struct sctp_chunk *sctp_sched_prio_dequeue(struct sctp_outq *q)
+{
+ struct sctp_stream *stream = &q->asoc->stream;
+ struct sctp_stream_priorities *prio;
+ struct sctp_stream_out_ext *soute;
+ struct sctp_chunk *ch = NULL;
+
+ /* Bail out quickly if queue is empty */
+ if (list_empty(&q->out_chunk_list))
+ goto out;
+
+ /* Find which chunk is next. It's easy, it's either the current
+ * one or the first chunk on the next active stream.
+ */
+ if (stream->out_curr) {
+ soute = stream->out_curr->ext;
+ } else {
+ prio = list_entry(stream->prio_list.next,
+ struct sctp_stream_priorities, prio_sched);
+ soute = prio->next;
+ }
+ ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
+ sctp_sched_dequeue_common(q, ch);
+
+out:
+ return ch;
+}
+
+static void sctp_sched_prio_dequeue_done(struct sctp_outq *q,
+ struct sctp_chunk *ch)
+{
+ struct sctp_stream_priorities *prio;
+ struct sctp_stream_out_ext *soute;
+ __u16 sid;
+
+ /* Last chunk on that msg, move to the next stream on
+ * this priority.
+ */
+ sid = sctp_chunk_stream_no(ch);
+ soute = q->asoc->stream.out[sid].ext;
+ prio = soute->prio_head;
+
+ sctp_sched_prio_next_stream(prio);
+
+ if (list_empty(&soute->outq))
+ sctp_sched_prio_unsched(soute);
+}
+
+static void sctp_sched_prio_sched_all(struct sctp_stream *stream)
+{
+ struct sctp_association *asoc;
+ struct sctp_stream_out *sout;
+ struct sctp_chunk *ch;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
+ __u16 sid;
+
+ sid = sctp_chunk_stream_no(ch);
+ sout = &stream->out[sid];
+ if (sout->ext)
+ sctp_sched_prio_sched(stream, sout->ext);
+ }
+}
+
+static void sctp_sched_prio_unsched_all(struct sctp_stream *stream)
+{
+ struct sctp_stream_priorities *p, *tmp;
+ struct sctp_stream_out_ext *soute, *souttmp;
+
+ list_for_each_entry_safe(p, tmp, &stream->prio_list, prio_sched)
+ list_for_each_entry_safe(soute, souttmp, &p->active, prio_list)
+ sctp_sched_prio_unsched(soute);
+}
+
+struct sctp_sched_ops sctp_sched_prio = {
+ .set = sctp_sched_prio_set,
+ .get = sctp_sched_prio_get,
+ .init = sctp_sched_prio_init,
+ .init_sid = sctp_sched_prio_init_sid,
+ .free = sctp_sched_prio_free,
+ .enqueue = sctp_sched_prio_enqueue,
+ .dequeue = sctp_sched_prio_dequeue,
+ .dequeue_done = sctp_sched_prio_dequeue_done,
+ .sched_all = sctp_sched_prio_sched_all,
+ .unsched_all = sctp_sched_prio_unsched_all,
+};
diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
new file mode 100644
index 000000000000..7612a438c5b9
--- /dev/null
+++ b/net/sctp/stream_sched_rr.c
@@ -0,0 +1,201 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#include <linux/list.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* Priority handling
+ * RFC DRAFT ndata section 3.2
+ */
+static void sctp_sched_rr_unsched_all(struct sctp_stream *stream);
+
+static void sctp_sched_rr_next_stream(struct sctp_stream *stream)
+{
+ struct list_head *pos;
+
+ pos = stream->rr_next->rr_list.next;
+ if (pos == &stream->rr_list)
+ pos = pos->next;
+ stream->rr_next = list_entry(pos, struct sctp_stream_out_ext, rr_list);
+}
+
+static void sctp_sched_rr_unsched(struct sctp_stream *stream,
+ struct sctp_stream_out_ext *soute)
+{
+ if (stream->rr_next == soute)
+ /* Try to move to the next stream */
+ sctp_sched_rr_next_stream(stream);
+
+ list_del_init(&soute->rr_list);
+
+ /* If we have no other stream queued, clear next */
+ if (list_empty(&stream->rr_list))
+ stream->rr_next = NULL;
+}
+
+static void sctp_sched_rr_sched(struct sctp_stream *stream,
+ struct sctp_stream_out_ext *soute)
+{
+ if (!list_empty(&soute->rr_list))
+ /* Already scheduled. */
+ return;
+
+ /* Schedule the stream */
+ list_add_tail(&soute->rr_list, &stream->rr_list);
+
+ if (!stream->rr_next)
+ stream->rr_next = soute;
+}
+
+static int sctp_sched_rr_set(struct sctp_stream *stream, __u16 sid,
+ __u16 prio, gfp_t gfp)
+{
+ return 0;
+}
+
+static int sctp_sched_rr_get(struct sctp_stream *stream, __u16 sid,
+ __u16 *value)
+{
+ return 0;
+}
+
+static int sctp_sched_rr_init(struct sctp_stream *stream)
+{
+ INIT_LIST_HEAD(&stream->rr_list);
+ stream->rr_next = NULL;
+
+ return 0;
+}
+
+static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid,
+ gfp_t gfp)
+{
+ INIT_LIST_HEAD(&stream->out[sid].ext->rr_list);
+
+ return 0;
+}
+
+static void sctp_sched_rr_free(struct sctp_stream *stream)
+{
+ sctp_sched_rr_unsched_all(stream);
+}
+
+static void sctp_sched_rr_enqueue(struct sctp_outq *q,
+ struct sctp_datamsg *msg)
+{
+ struct sctp_stream *stream;
+ struct sctp_chunk *ch;
+ __u16 sid;
+
+ ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
+ sid = sctp_chunk_stream_no(ch);
+ stream = &q->asoc->stream;
+ sctp_sched_rr_sched(stream, stream->out[sid].ext);
+}
+
+static struct sctp_chunk *sctp_sched_rr_dequeue(struct sctp_outq *q)
+{
+ struct sctp_stream *stream = &q->asoc->stream;
+ struct sctp_stream_out_ext *soute;
+ struct sctp_chunk *ch = NULL;
+
+ /* Bail out quickly if queue is empty */
+ if (list_empty(&q->out_chunk_list))
+ goto out;
+
+ /* Find which chunk is next */
+ if (stream->out_curr)
+ soute = stream->out_curr->ext;
+ else
+ soute = stream->rr_next;
+ ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
+
+ sctp_sched_dequeue_common(q, ch);
+
+out:
+ return ch;
+}
+
+static void sctp_sched_rr_dequeue_done(struct sctp_outq *q,
+ struct sctp_chunk *ch)
+{
+ struct sctp_stream_out_ext *soute;
+ __u16 sid;
+
+ /* Last chunk on that msg, move to the next stream */
+ sid = sctp_chunk_stream_no(ch);
+ soute = q->asoc->stream.out[sid].ext;
+
+ sctp_sched_rr_next_stream(&q->asoc->stream);
+
+ if (list_empty(&soute->outq))
+ sctp_sched_rr_unsched(&q->asoc->stream, soute);
+}
+
+static void sctp_sched_rr_sched_all(struct sctp_stream *stream)
+{
+ struct sctp_association *asoc;
+ struct sctp_stream_out_ext *soute;
+ struct sctp_chunk *ch;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
+ __u16 sid;
+
+ sid = sctp_chunk_stream_no(ch);
+ soute = stream->out[sid].ext;
+ if (soute)
+ sctp_sched_rr_sched(stream, soute);
+ }
+}
+
+static void sctp_sched_rr_unsched_all(struct sctp_stream *stream)
+{
+ struct sctp_stream_out_ext *soute, *tmp;
+
+ list_for_each_entry_safe(soute, tmp, &stream->rr_list, rr_list)
+ sctp_sched_rr_unsched(stream, soute);
+}
+
+struct sctp_sched_ops sctp_sched_rr = {
+ .set = sctp_sched_rr_set,
+ .get = sctp_sched_rr_get,
+ .init = sctp_sched_rr_init,
+ .init_sid = sctp_sched_rr_init_sid,
+ .free = sctp_sched_rr_free,
+ .enqueue = sctp_sched_rr_enqueue,
+ .dequeue = sctp_sched_rr_dequeue,
+ .dequeue_done = sctp_sched_rr_dequeue_done,
+ .sched_all = sctp_sched_rr_sched_all,
+ .unsched_all = sctp_sched_rr_unsched_all,
+};
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 0e732f68c2bf..ef7ca44d6e6a 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -46,7 +46,7 @@ static int timer_max = 86400000; /* ms in one day */
static int int_max = INT_MAX;
static int sack_timer_min = 1;
static int sack_timer_max = 500;
-static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
+static int addr_scope_max = SCTP_SCOPE_POLICY_MAX;
static int rwnd_scale_max = 16;
static int rto_alpha_min = 0;
static int rto_beta_min = 0;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 80a97c8501a7..1e5a22430cf5 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -87,14 +87,11 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
INIT_LIST_HEAD(&peer->send_ready);
INIT_LIST_HEAD(&peer->transports);
- setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,
- (unsigned long)peer);
- setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
- (unsigned long)peer);
- setup_timer(&peer->reconf_timer, sctp_generate_reconf_event,
- (unsigned long)peer);
- setup_timer(&peer->proto_unreach_timer,
- sctp_generate_proto_unreach_event, (unsigned long)peer);
+ timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0);
+ timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0);
+ timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0);
+ timer_setup(&peer->proto_unreach_timer,
+ sctp_generate_proto_unreach_event, 0);
/* Initialize the 64-bit random nonce sent with heartbeat. */
get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
@@ -490,7 +487,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
* detected.
*/
void sctp_transport_lower_cwnd(struct sctp_transport *transport,
- sctp_lower_cwnd_t reason)
+ enum sctp_lower_cwnd reason)
{
struct sctp_association *asoc = transport->asoc;
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 5f86c5062a98..5447228bf1a0 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -371,19 +371,19 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc,
struct sctp_chunk *chunk, __u16 flags,
gfp_t gfp)
{
- struct sctp_ulpevent *event;
struct sctp_remote_error *sre;
+ struct sctp_ulpevent *event;
+ struct sctp_errhdr *ch;
struct sk_buff *skb;
- sctp_errhdr_t *ch;
__be16 cause;
int elen;
- ch = (sctp_errhdr_t *)(chunk->skb->data);
+ ch = (struct sctp_errhdr *)(chunk->skb->data);
cause = ch->cause;
- elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t);
+ elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(*ch);
/* Pull off the ERROR header. */
- skb_pull(chunk->skb, sizeof(sctp_errhdr_t));
+ skb_pull(chunk->skb, sizeof(*ch));
/* Copy the skb to a new skb with room for us to prepend
* notification with.
@@ -847,7 +847,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event(
struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event(
const struct sctp_association *asoc, __u16 flags, __u16 stream_num,
- __u16 *stream_list, gfp_t gfp)
+ __be16 *stream_list, gfp_t gfp)
{
struct sctp_stream_reset_event *sreset;
struct sctp_ulpevent *event;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 0225d62a869f..a71be33f3afe 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -265,7 +265,8 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
sctp_ulpq_clear_pd(ulpq);
if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) {
- sp->data_ready_signalled = 1;
+ if (!sock_owned_by_user(sk))
+ sp->data_ready_signalled = 1;
sk->sk_data_ready(sk);
}
return 1;
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index 33954852f3f8..c717ef0896aa 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -8,10 +8,6 @@ config SMC
The Linux implementation of the SMC-R solution is designed as
a separate socket family SMC.
- Warning: SMC will expose all memory for remote reads and writes
- once a connection is established. Don't enable this option except
- for tightly controlled lab environment.
-
Select this option if you want to run SMC socket applications
config SMC_DIAG
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6793d7348cc8..6451c5013e06 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -282,6 +282,7 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
__be32 *subnet, u8 *prefix_len)
{
struct dst_entry *dst = sk_dst_get(clcsock->sk);
+ struct in_device *in_dev;
struct sockaddr_in addr;
int rc = -ENOENT;
int len;
@@ -298,14 +299,17 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
/* get address to which the internal TCP socket is bound */
kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
/* analyze IPv4 specific data of net_device belonging to TCP socket */
- for_ifa(dst->dev->ip_ptr) {
- if (ifa->ifa_address != addr.sin_addr.s_addr)
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(dst->dev);
+ for_ifa(in_dev) {
+ if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
continue;
*prefix_len = inet_mask_len(ifa->ifa_mask);
*subnet = ifa->ifa_address & ifa->ifa_mask;
rc = 0;
break;
- } endfor_ifa(dst->dev->ip_ptr);
+ } endfor_ifa(in_dev);
+ rcu_read_unlock();
out_rel:
dst_release(dst);
@@ -338,6 +342,12 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
return SMC_CLC_DECL_INTERR;
smc_wr_remember_qp_attr(link);
+
+ rc = smc_wr_reg_send(link,
+ smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
+ if (rc)
+ return SMC_CLC_DECL_INTERR;
+
/* send CONFIRM LINK response over RoCE fabric */
rc = smc_llc_send_confirm_link(link,
link->smcibdev->mac[link->ibport - 1],
@@ -380,6 +390,12 @@ static int smc_connect_rdma(struct smc_sock *smc)
int rc = 0;
u8 ibport;
+ if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
+ /* peer has not signalled SMC-capability */
+ smc->use_fallback = true;
+ goto out_connected;
+ }
+
/* IPSec connections opt out of SMC-R optimizations */
if (using_ipsec(smc)) {
reason_code = SMC_CLC_DECL_IPSEC;
@@ -430,12 +446,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
smc_conn_save_peer_info(smc, &aclc);
- rc = smc_sndbuf_create(smc);
- if (rc) {
- reason_code = SMC_CLC_DECL_MEM;
- goto decline_rdma_unlock;
- }
- rc = smc_rmb_create(smc);
+ /* create send buffer and rmb */
+ rc = smc_buf_create(smc);
if (rc) {
reason_code = SMC_CLC_DECL_MEM;
goto decline_rdma_unlock;
@@ -459,7 +471,20 @@ static int smc_connect_rdma(struct smc_sock *smc)
reason_code = SMC_CLC_DECL_INTERR;
goto decline_rdma_unlock;
}
+ } else {
+ struct smc_buf_desc *buf_desc = smc->conn.rmb_desc;
+
+ if (!buf_desc->reused) {
+ /* register memory region for new rmb */
+ rc = smc_wr_reg_send(link,
+ buf_desc->mr_rx[SMC_SINGLE_LINK]);
+ if (rc) {
+ reason_code = SMC_CLC_DECL_INTERR;
+ goto decline_rdma_unlock;
+ }
+ }
}
+ smc_rmb_sync_sg_for_device(&smc->conn);
rc = smc_clc_send_confirm(smc);
if (rc)
@@ -494,7 +519,7 @@ decline_rdma:
/* RDMA setup failed, switch back to TCP */
smc->use_fallback = true;
if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
- rc = smc_clc_send_decline(smc, reason_code, 0);
+ rc = smc_clc_send_decline(smc, reason_code);
if (rc < sizeof(struct smc_clc_msg_decline))
goto out_err;
}
@@ -536,6 +561,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
}
smc_copy_sock_settings_to_clc(smc);
+ tcp_sk(smc->clcsock->sk)->syn_smc = 1;
rc = kernel_connect(smc->clcsock, addr, alen, flags);
if (rc)
goto out;
@@ -692,6 +718,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
int rc;
link = &lgr->lnk[SMC_SINGLE_LINK];
+
+ rc = smc_wr_reg_send(link,
+ smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
+ if (rc)
+ return SMC_CLC_DECL_INTERR;
+
/* send CONFIRM LINK request to client over the RoCE fabric */
rc = smc_llc_send_confirm_link(link,
link->smcibdev->mac[link->ibport - 1],
@@ -734,6 +766,12 @@ static void smc_listen_work(struct work_struct *work)
u8 prefix_len;
u8 ibport;
+ /* check if peer is smc capable */
+ if (!tcp_sk(newclcsock->sk)->syn_smc) {
+ new_smc->use_fallback = true;
+ goto out_connected;
+ }
+
/* do inband token exchange -
*wait for and receive SMC Proposal CLC message
*/
@@ -779,46 +817,50 @@ static void smc_listen_work(struct work_struct *work)
mutex_lock(&smc_create_lgr_pending);
local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
smcibdev, ibport, &pclc.lcl, 0);
- if (local_contact == SMC_REUSE_CONTACT)
- /* lock no longer needed, free it due to following
- * smc_clc_wait_msg() call
- */
- mutex_unlock(&smc_create_lgr_pending);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
- else if (rc == -ENOLINK)
- reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
- goto decline_rdma;
+ goto decline_rdma_unlock;
}
link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
- rc = smc_sndbuf_create(new_smc);
+ /* create send buffer and rmb */
+ rc = smc_buf_create(new_smc);
if (rc) {
reason_code = SMC_CLC_DECL_MEM;
- goto decline_rdma;
- }
- rc = smc_rmb_create(new_smc);
- if (rc) {
- reason_code = SMC_CLC_DECL_MEM;
- goto decline_rdma;
+ goto decline_rdma_unlock;
}
smc_close_init(new_smc);
smc_rx_init(new_smc);
+ if (local_contact != SMC_FIRST_CONTACT) {
+ struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc;
+
+ if (!buf_desc->reused) {
+ /* register memory region for new rmb */
+ rc = smc_wr_reg_send(link,
+ buf_desc->mr_rx[SMC_SINGLE_LINK]);
+ if (rc) {
+ reason_code = SMC_CLC_DECL_INTERR;
+ goto decline_rdma_unlock;
+ }
+ }
+ }
+ smc_rmb_sync_sg_for_device(&new_smc->conn);
+
rc = smc_clc_send_accept(new_smc, local_contact);
if (rc)
- goto out_err;
+ goto out_err_unlock;
/* receive SMC Confirm CLC message */
reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
SMC_CLC_CONFIRM);
if (reason_code < 0)
- goto out_err;
+ goto out_err_unlock;
if (reason_code > 0)
- goto decline_rdma;
+ goto decline_rdma_unlock;
smc_conn_save_peer_info(new_smc, &cclc);
if (local_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, &cclc);
@@ -826,35 +868,34 @@ static void smc_listen_work(struct work_struct *work)
rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc);
if (rc) {
reason_code = SMC_CLC_DECL_INTERR;
- goto decline_rdma;
+ goto decline_rdma_unlock;
}
if (local_contact == SMC_FIRST_CONTACT) {
rc = smc_ib_ready_link(link);
if (rc) {
reason_code = SMC_CLC_DECL_INTERR;
- goto decline_rdma;
+ goto decline_rdma_unlock;
}
/* QP confirmation over RoCE fabric */
reason_code = smc_serv_conf_first_link(new_smc);
if (reason_code < 0) {
/* peer is not aware of a problem */
rc = reason_code;
- goto out_err;
+ goto out_err_unlock;
}
if (reason_code > 0)
- goto decline_rdma;
+ goto decline_rdma_unlock;
}
smc_tx_init(new_smc);
+ mutex_unlock(&smc_create_lgr_pending);
out_connected:
sk_refcnt_debug_inc(newsmcsk);
if (newsmcsk->sk_state == SMC_INIT)
newsmcsk->sk_state = SMC_ACTIVE;
enqueue:
- if (local_contact == SMC_FIRST_CONTACT)
- mutex_unlock(&smc_create_lgr_pending);
lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
if (lsmc->sk.sk_state == SMC_LISTEN) {
smc_accept_enqueue(&lsmc->sk, newsmcsk);
@@ -868,17 +909,21 @@ enqueue:
sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
return;
+decline_rdma_unlock:
+ mutex_unlock(&smc_create_lgr_pending);
decline_rdma:
/* RDMA setup failed, switch back to TCP */
smc_conn_free(&new_smc->conn);
new_smc->use_fallback = true;
if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
- rc = smc_clc_send_decline(new_smc, reason_code, 0);
+ rc = smc_clc_send_decline(new_smc, reason_code);
if (rc < sizeof(struct smc_clc_msg_decline))
goto out_err;
}
goto out_connected;
+out_err_unlock:
+ mutex_unlock(&smc_create_lgr_pending);
out_err:
newsmcsk->sk_state = SMC_CLOSED;
smc_conn_free(&new_smc->conn);
@@ -935,6 +980,7 @@ static int smc_listen(struct socket *sock, int backlog)
* them to the clc socket -- copy smc socket options to clc socket
*/
smc_copy_sock_settings_to_clc(smc);
+ tcp_sk(smc->clcsock->sk)->syn_smc = 1;
rc = kernel_listen(smc->clcsock, backlog);
if (rc)
@@ -1377,6 +1423,7 @@ static int __init smc_init(void)
goto out_sock;
}
+ static_branch_enable(&tcp_have_smc);
return 0;
out_sock:
@@ -1401,6 +1448,7 @@ static void __exit smc_exit(void)
list_del_init(&lgr->list);
smc_lgr_free(lgr); /* free link group */
}
+ static_branch_disable(&tcp_have_smc);
smc_ib_unregister_client();
sock_unregister(PF_SMC);
proto_unregister(&smc_proto);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 6e44313e4467..0bee9d16cf29 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -149,7 +150,7 @@ struct smc_connection {
atomic_t sndbuf_space; /* remaining space in sndbuf */
u16 tx_cdc_seq; /* sequence # for CDC send */
spinlock_t send_lock; /* protect wr_sends */
- struct work_struct tx_work; /* retry of smc_cdc_msg_send */
+ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */
struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl.
* .prod cf. TCP rcv_nxt
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index a7294edbc221..87f7bede6eab 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -62,10 +63,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
bh_unlock_sock(&smc->sk);
}
-int smc_cdc_get_free_slot(struct smc_link *link,
+int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_wr_buf **wr_buf,
struct smc_cdc_tx_pend **pend)
{
+ struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+
return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
(struct smc_wr_tx_pend_priv **)pend);
}
@@ -118,8 +121,7 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
struct smc_wr_buf *wr_buf;
int rc;
- rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf,
- &pend);
+ rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
if (rc)
return rc;
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 8e1d76f26007..149ceda1b088 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -206,7 +207,8 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smc_cdc_tx_pend;
-int smc_cdc_get_free_slot(struct smc_link *link, struct smc_wr_buf **wr_buf,
+int smc_cdc_get_free_slot(struct smc_connection *conn,
+ struct smc_wr_buf **wr_buf,
struct smc_cdc_tx_pend **pend);
void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 03ec058d18df..1800e16b2a02 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -95,9 +96,10 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
if (clcm->type == SMC_CLC_DECLINE) {
reason_code = SMC_CLC_DECL_REPLY;
- if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis)
- == SMC_CLC_DECL_SYNCERR)
+ if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
smc->conn.lgr->sync_err = true;
+ smc_lgr_terminate(smc->conn.lgr);
+ }
}
out:
@@ -105,8 +107,7 @@ out:
}
/* send CLC DECLINE message across internal TCP socket */
-int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
- u8 out_of_sync)
+int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
{
struct smc_clc_msg_decline dclc;
struct msghdr msg;
@@ -118,7 +119,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
dclc.hdr.type = SMC_CLC_DECLINE;
dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
dclc.hdr.version = SMC_CLC_V1;
- dclc.hdr.flag = out_of_sync ? 1 : 0;
+ dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
dclc.peer_diagnosis = htonl(peer_diag_info);
memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
@@ -204,13 +205,13 @@ int smc_clc_send_confirm(struct smc_sock *smc)
memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
hton24(cclc.qpn, link->roce_qp->qp_num);
cclc.rmb_rkey =
- htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
+ htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
cclc.rmbe_alert_token = htonl(conn->alert_token_local);
cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
cclc.rmbe_size = conn->rmbe_size_short;
- cclc.rmb_dma_addr =
- cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]);
+ cclc.rmb_dma_addr = cpu_to_be64(
+ (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
hton24(cclc.psn, link->psn_initial);
memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
@@ -256,13 +257,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
hton24(aclc.qpn, link->roce_qp->qp_num);
aclc.rmb_rkey =
- htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
+ htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */
aclc.rmbe_alert_token = htonl(conn->alert_token_local);
aclc.qp_mtu = link->path_mtu;
aclc.rmbe_size = conn->rmbe_size_short,
- aclc.rmb_dma_addr =
- cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]);
+ aclc.rmb_dma_addr = cpu_to_be64(
+ (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
hton24(aclc.psn, link->psn_initial);
memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 13db8ce177c9..12a9af1539a2 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -106,8 +107,7 @@ struct smc_ib_device;
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type);
-int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
- u8 out_of_sync);
+int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev,
u8 ibport);
int smc_clc_send_confirm(struct smc_sock *smc);
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 3c2e166b5d22..48615d2ac4aa 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -174,15 +175,15 @@ int smc_close_active(struct smc_sock *smc)
{
struct smc_cdc_conn_state_flags *txflags =
&smc->conn.local_tx_ctrl.conn_state_flags;
- long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
struct smc_connection *conn = &smc->conn;
struct sock *sk = &smc->sk;
int old_state;
+ long timeout;
int rc = 0;
- if (sock_flag(sk, SOCK_LINGER) &&
- !(current->flags & PF_EXITING))
- timeout = sk->sk_lingertime;
+ timeout = current->flags & PF_EXITING ?
+ 0 : sock_flag(sk, SOCK_LINGER) ?
+ sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
again:
old_state = sk->sk_state;
@@ -208,7 +209,7 @@ again:
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
release_sock(sk);
- cancel_work_sync(&conn->tx_work);
+ cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
if (sk->sk_state == SMC_ACTIVE) {
/* send close request */
@@ -234,7 +235,7 @@ again:
if (!smc_cdc_rxed_any_close(conn))
smc_close_stream_wait(smc, timeout);
release_sock(sk);
- cancel_work_sync(&conn->tx_work);
+ cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
if (sk->sk_err != ECONNABORTED) {
/* confirm close from peer */
@@ -263,7 +264,9 @@ again:
/* peer sending PeerConnectionClosed will cause transition */
break;
case SMC_PROCESSABORT:
- cancel_work_sync(&conn->tx_work);
+ release_sock(sk);
+ cancel_delayed_work_sync(&conn->tx_work);
+ lock_sock(sk);
smc_close_abort(conn);
sk->sk_state = SMC_CLOSED;
smc_close_wait_tx_pends(smc);
@@ -358,7 +361,8 @@ static void smc_close_passive_work(struct work_struct *work)
case SMC_PEERCLOSEWAIT1:
if (rxflags->peer_done_writing)
sk->sk_state = SMC_PEERCLOSEWAIT2;
- /* fall through to check for closing */
+ /* fall through */
+ /* to check for closing */
case SMC_PEERCLOSEWAIT2:
case SMC_PEERFINCLOSEWAIT:
if (!smc_cdc_rxed_any_close(&smc->conn))
@@ -411,13 +415,14 @@ void smc_close_sock_put_work(struct work_struct *work)
int smc_close_shutdown_write(struct smc_sock *smc)
{
struct smc_connection *conn = &smc->conn;
- long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
struct sock *sk = &smc->sk;
int old_state;
+ long timeout;
int rc = 0;
- if (sock_flag(sk, SOCK_LINGER))
- timeout = sk->sk_lingertime;
+ timeout = current->flags & PF_EXITING ?
+ 0 : sock_flag(sk, SOCK_LINGER) ?
+ sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
again:
old_state = sk->sk_state;
@@ -425,7 +430,7 @@ again:
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
release_sock(sk);
- cancel_work_sync(&conn->tx_work);
+ cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
/* send close wr request */
rc = smc_close_wr(conn);
@@ -439,7 +444,7 @@ again:
if (!smc_cdc_rxed_any_close(conn))
smc_close_stream_wait(smc, timeout);
release_sock(sk);
- cancel_work_sync(&conn->tx_work);
+ cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
/* confirm close from peer */
rc = smc_close_wr(conn);
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index 4a3d99a8d7cb..ed82506b1b0a 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3ac09a629ea1..2578fbd95664 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -25,8 +26,9 @@
#include "smc_cdc.h"
#include "smc_close.h"
-#define SMC_LGR_NUM_INCR 256
-#define SMC_LGR_FREE_DELAY (600 * HZ)
+#define SMC_LGR_NUM_INCR 256
+#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
+#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10)
static u32 smc_lgr_num; /* unique link group number */
@@ -107,8 +109,15 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
__smc_lgr_unregister_conn(conn);
}
write_unlock_bh(&lgr->conns_lock);
- if (reduced && !lgr->conns_num)
- schedule_delayed_work(&lgr->free_work, SMC_LGR_FREE_DELAY);
+ if (!reduced || lgr->conns_num)
+ return;
+ /* client link group creation always follows the server link group
+ * creation. For client use a somewhat higher removal delay time,
+ * otherwise there is a risk of out-of-sync link groups.
+ */
+ mod_delayed_work(system_wq, &lgr->free_work,
+ lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
+ SMC_LGR_FREE_DELAY_SERV);
}
static void smc_lgr_free_work(struct work_struct *work)
@@ -175,7 +184,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
rc = smc_wr_alloc_link_mem(lnk);
if (rc)
goto free_lgr;
- init_waitqueue_head(&lnk->wr_tx_wait);
rc = smc_ib_create_protection_domain(lnk);
if (rc)
goto free_link_mem;
@@ -207,17 +215,14 @@ out:
return rc;
}
-static void smc_sndbuf_unuse(struct smc_connection *conn)
+static void smc_buf_unuse(struct smc_connection *conn)
{
if (conn->sndbuf_desc) {
conn->sndbuf_desc->used = 0;
conn->sndbuf_size = 0;
}
-}
-
-static void smc_rmb_unuse(struct smc_connection *conn)
-{
if (conn->rmb_desc) {
+ conn->rmb_desc->reused = true;
conn->rmb_desc->used = 0;
conn->rmbe_size = 0;
}
@@ -232,8 +237,7 @@ void smc_conn_free(struct smc_connection *conn)
return;
smc_cdc_tx_dismiss_slots(conn);
smc_lgr_unregister_conn(conn);
- smc_rmb_unuse(conn);
- smc_sndbuf_unuse(conn);
+ smc_buf_unuse(conn);
}
static void smc_link_clear(struct smc_link *lnk)
@@ -246,48 +250,57 @@ static void smc_link_clear(struct smc_link *lnk)
smc_wr_free_link_mem(lnk);
}
-static void smc_lgr_free_sndbufs(struct smc_link_group *lgr)
+static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
+ bool is_rmb)
{
- struct smc_buf_desc *sndbuf_desc, *bf_desc;
- int i;
-
- for (i = 0; i < SMC_RMBE_SIZES; i++) {
- list_for_each_entry_safe(sndbuf_desc, bf_desc, &lgr->sndbufs[i],
- list) {
- list_del(&sndbuf_desc->list);
- smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- smc_uncompress_bufsize(i),
- sndbuf_desc, DMA_TO_DEVICE);
- kfree(sndbuf_desc->cpu_addr);
- kfree(sndbuf_desc);
- }
+ if (is_rmb) {
+ if (buf_desc->mr_rx[SMC_SINGLE_LINK])
+ smc_ib_put_memory_region(
+ buf_desc->mr_rx[SMC_SINGLE_LINK]);
+ smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
+ DMA_FROM_DEVICE);
+ } else {
+ smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
+ DMA_TO_DEVICE);
}
+ sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
+ if (buf_desc->cpu_addr)
+ free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order);
+ kfree(buf_desc);
}
-static void smc_lgr_free_rmbs(struct smc_link_group *lgr)
+static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
{
- struct smc_buf_desc *rmb_desc, *bf_desc;
struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+ struct smc_buf_desc *buf_desc, *bf_desc;
+ struct list_head *buf_list;
int i;
for (i = 0; i < SMC_RMBE_SIZES; i++) {
- list_for_each_entry_safe(rmb_desc, bf_desc, &lgr->rmbs[i],
+ if (is_rmb)
+ buf_list = &lgr->rmbs[i];
+ else
+ buf_list = &lgr->sndbufs[i];
+ list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
list) {
- list_del(&rmb_desc->list);
- smc_ib_buf_unmap(lnk->smcibdev,
- smc_uncompress_bufsize(i),
- rmb_desc, DMA_FROM_DEVICE);
- kfree(rmb_desc->cpu_addr);
- kfree(rmb_desc);
+ list_del(&buf_desc->list);
+ smc_buf_free(buf_desc, lnk, is_rmb);
}
}
}
+static void smc_lgr_free_bufs(struct smc_link_group *lgr)
+{
+ /* free send buffers */
+ __smc_lgr_free_bufs(lgr, false);
+ /* free rmbs */
+ __smc_lgr_free_bufs(lgr, true);
+}
+
/* remove a link group */
void smc_lgr_free(struct smc_link_group *lgr)
{
- smc_lgr_free_rmbs(lgr);
- smc_lgr_free_sndbufs(lgr);
+ smc_lgr_free_bufs(lgr);
smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
kfree(lgr);
}
@@ -368,10 +381,14 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid,
&gattr))
continue;
- if (gattr.ndev &&
- (vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id)) {
- lnk->gid = gid;
- return 0;
+ if (gattr.ndev) {
+ if (is_vlan_dev(gattr.ndev) &&
+ vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
+ lnk->gid = gid;
+ dev_put(gattr.ndev);
+ return 0;
+ }
+ dev_put(gattr.ndev);
}
}
return -ENODEV;
@@ -452,45 +469,25 @@ out:
return rc ? rc : local_contact;
}
-/* try to reuse a sndbuf description slot of the sndbufs list for a certain
- * buf_size; if not available, return NULL
+/* try to reuse a sndbuf or rmb description slot for a certain
+ * buffer size; if not available, return NULL
*/
static inline
-struct smc_buf_desc *smc_sndbuf_get_slot(struct smc_link_group *lgr,
- int compressed_bufsize)
+struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr,
+ int compressed_bufsize,
+ rwlock_t *lock,
+ struct list_head *buf_list)
{
- struct smc_buf_desc *sndbuf_slot;
-
- read_lock_bh(&lgr->sndbufs_lock);
- list_for_each_entry(sndbuf_slot, &lgr->sndbufs[compressed_bufsize],
- list) {
- if (cmpxchg(&sndbuf_slot->used, 0, 1) == 0) {
- read_unlock_bh(&lgr->sndbufs_lock);
- return sndbuf_slot;
- }
- }
- read_unlock_bh(&lgr->sndbufs_lock);
- return NULL;
-}
+ struct smc_buf_desc *buf_slot;
-/* try to reuse an rmb description slot of the rmbs list for a certain
- * rmbe_size; if not available, return NULL
- */
-static inline
-struct smc_buf_desc *smc_rmb_get_slot(struct smc_link_group *lgr,
- int compressed_bufsize)
-{
- struct smc_buf_desc *rmb_slot;
-
- read_lock_bh(&lgr->rmbs_lock);
- list_for_each_entry(rmb_slot, &lgr->rmbs[compressed_bufsize],
- list) {
- if (cmpxchg(&rmb_slot->used, 0, 1) == 0) {
- read_unlock_bh(&lgr->rmbs_lock);
- return rmb_slot;
+ read_lock_bh(lock);
+ list_for_each_entry(buf_slot, buf_list, list) {
+ if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
+ read_unlock_bh(lock);
+ return buf_slot;
}
}
- read_unlock_bh(&lgr->rmbs_lock);
+ read_unlock_bh(lock);
return NULL;
}
@@ -503,136 +500,186 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}
-/* create the tx buffer for an SMC socket */
-int smc_sndbuf_create(struct smc_sock *smc)
+static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
+ bool is_rmb, int bufsize)
{
- struct smc_connection *conn = &smc->conn;
- struct smc_link_group *lgr = conn->lgr;
- int tmp_bufsize, tmp_bufsize_short;
- struct smc_buf_desc *sndbuf_desc;
+ struct smc_buf_desc *buf_desc;
+ struct smc_link *lnk;
int rc;
- /* use socket send buffer size (w/o overhead) as start value */
- for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2);
- tmp_bufsize_short >= 0; tmp_bufsize_short--) {
- tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short);
- /* check for reusable sndbuf_slot in the link group */
- sndbuf_desc = smc_sndbuf_get_slot(lgr, tmp_bufsize_short);
- if (sndbuf_desc) {
- memset(sndbuf_desc->cpu_addr, 0, tmp_bufsize);
- break; /* found reusable slot */
- }
- /* try to alloc a new send buffer */
- sndbuf_desc = kzalloc(sizeof(*sndbuf_desc), GFP_KERNEL);
- if (!sndbuf_desc)
- break; /* give up with -ENOMEM */
- sndbuf_desc->cpu_addr = kzalloc(tmp_bufsize,
- GFP_KERNEL | __GFP_NOWARN |
- __GFP_NOMEMALLOC |
- __GFP_NORETRY);
- if (!sndbuf_desc->cpu_addr) {
- kfree(sndbuf_desc);
- sndbuf_desc = NULL;
- /* if send buffer allocation has failed,
- * try a smaller one
- */
- continue;
- }
- rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- tmp_bufsize, sndbuf_desc,
- DMA_TO_DEVICE);
+ /* try to alloc a new buffer */
+ buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+ if (!buf_desc)
+ return ERR_PTR(-ENOMEM);
+
+ buf_desc->cpu_addr =
+ (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN |
+ __GFP_NOMEMALLOC |
+ __GFP_NORETRY | __GFP_ZERO,
+ get_order(bufsize));
+ if (!buf_desc->cpu_addr) {
+ kfree(buf_desc);
+ return ERR_PTR(-EAGAIN);
+ }
+ buf_desc->order = get_order(bufsize);
+
+ /* build the sg table from the pages */
+ lnk = &lgr->lnk[SMC_SINGLE_LINK];
+ rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
+ GFP_KERNEL);
+ if (rc) {
+ smc_buf_free(buf_desc, lnk, is_rmb);
+ return ERR_PTR(rc);
+ }
+ sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
+ buf_desc->cpu_addr, bufsize);
+
+ /* map sg table to DMA address */
+ rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
+ is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ /* SMC protocol depends on mapping to one DMA address only */
+ if (rc != 1) {
+ smc_buf_free(buf_desc, lnk, is_rmb);
+ return ERR_PTR(-EAGAIN);
+ }
+
+ /* create a new memory region for the RMB */
+ if (is_rmb) {
+ rc = smc_ib_get_memory_region(lnk->roce_pd,
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_LOCAL_WRITE,
+ buf_desc);
if (rc) {
- kfree(sndbuf_desc->cpu_addr);
- kfree(sndbuf_desc);
- sndbuf_desc = NULL;
- continue; /* if mapping failed, try smaller one */
+ smc_buf_free(buf_desc, lnk, is_rmb);
+ return ERR_PTR(rc);
}
- sndbuf_desc->used = 1;
- write_lock_bh(&lgr->sndbufs_lock);
- list_add(&sndbuf_desc->list,
- &lgr->sndbufs[tmp_bufsize_short]);
- write_unlock_bh(&lgr->sndbufs_lock);
- break;
- }
- if (sndbuf_desc && sndbuf_desc->cpu_addr) {
- conn->sndbuf_desc = sndbuf_desc;
- conn->sndbuf_size = tmp_bufsize;
- smc->sk.sk_sndbuf = tmp_bufsize * 2;
- atomic_set(&conn->sndbuf_space, tmp_bufsize);
- return 0;
- } else {
- return -ENOMEM;
}
+
+ return buf_desc;
}
-/* create the RMB for an SMC socket (even though the SMC protocol
- * allows more than one RMB-element per RMB, the Linux implementation
- * uses just one RMB-element per RMB, i.e. uses an extra RMB for every
- * connection in a link group
- */
-int smc_rmb_create(struct smc_sock *smc)
+static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
{
struct smc_connection *conn = &smc->conn;
struct smc_link_group *lgr = conn->lgr;
- int tmp_bufsize, tmp_bufsize_short;
- struct smc_buf_desc *rmb_desc;
- int rc;
+ struct smc_buf_desc *buf_desc = NULL;
+ struct list_head *buf_list;
+ int bufsize, bufsize_short;
+ int sk_buf_size;
+ rwlock_t *lock;
+
+ if (is_rmb)
+ /* use socket recv buffer size (w/o overhead) as start value */
+ sk_buf_size = smc->sk.sk_rcvbuf / 2;
+ else
+ /* use socket send buffer size (w/o overhead) as start value */
+ sk_buf_size = smc->sk.sk_sndbuf / 2;
+
+ for (bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2);
+ bufsize_short >= 0; bufsize_short--) {
+
+ if (is_rmb) {
+ lock = &lgr->rmbs_lock;
+ buf_list = &lgr->rmbs[bufsize_short];
+ } else {
+ lock = &lgr->sndbufs_lock;
+ buf_list = &lgr->sndbufs[bufsize_short];
+ }
+ bufsize = smc_uncompress_bufsize(bufsize_short);
+ if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
+ continue;
- /* use socket recv buffer size (w/o overhead) as start value */
- for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_rcvbuf / 2);
- tmp_bufsize_short >= 0; tmp_bufsize_short--) {
- tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short);
- /* check for reusable rmb_slot in the link group */
- rmb_desc = smc_rmb_get_slot(lgr, tmp_bufsize_short);
- if (rmb_desc) {
- memset(rmb_desc->cpu_addr, 0, tmp_bufsize);
+ /* check for reusable slot in the link group */
+ buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list);
+ if (buf_desc) {
+ memset(buf_desc->cpu_addr, 0, bufsize);
break; /* found reusable slot */
}
- /* try to alloc a new RMB */
- rmb_desc = kzalloc(sizeof(*rmb_desc), GFP_KERNEL);
- if (!rmb_desc)
- break; /* give up with -ENOMEM */
- rmb_desc->cpu_addr = kzalloc(tmp_bufsize,
- GFP_KERNEL | __GFP_NOWARN |
- __GFP_NOMEMALLOC |
- __GFP_NORETRY);
- if (!rmb_desc->cpu_addr) {
- kfree(rmb_desc);
- rmb_desc = NULL;
- /* if RMB allocation has failed,
- * try a smaller one
- */
+
+ buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize);
+ if (PTR_ERR(buf_desc) == -ENOMEM)
+ break;
+ if (IS_ERR(buf_desc))
continue;
- }
- rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- tmp_bufsize, rmb_desc,
- DMA_FROM_DEVICE);
- if (rc) {
- kfree(rmb_desc->cpu_addr);
- kfree(rmb_desc);
- rmb_desc = NULL;
- continue; /* if mapping failed, try smaller one */
- }
- rmb_desc->rkey[SMC_SINGLE_LINK] =
- lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey;
- rmb_desc->used = 1;
- write_lock_bh(&lgr->rmbs_lock);
- list_add(&rmb_desc->list,
- &lgr->rmbs[tmp_bufsize_short]);
- write_unlock_bh(&lgr->rmbs_lock);
- break;
+
+ buf_desc->used = 1;
+ write_lock_bh(lock);
+ list_add(&buf_desc->list, buf_list);
+ write_unlock_bh(lock);
+ break; /* found */
}
- if (rmb_desc && rmb_desc->cpu_addr) {
- conn->rmb_desc = rmb_desc;
- conn->rmbe_size = tmp_bufsize;
- conn->rmbe_size_short = tmp_bufsize_short;
- smc->sk.sk_rcvbuf = tmp_bufsize * 2;
+
+ if (IS_ERR(buf_desc))
+ return -ENOMEM;
+
+ if (is_rmb) {
+ conn->rmb_desc = buf_desc;
+ conn->rmbe_size = bufsize;
+ conn->rmbe_size_short = bufsize_short;
+ smc->sk.sk_rcvbuf = bufsize * 2;
atomic_set(&conn->bytes_to_rcv, 0);
- conn->rmbe_update_limit = smc_rmb_wnd_update_limit(tmp_bufsize);
- return 0;
+ conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
} else {
- return -ENOMEM;
+ conn->sndbuf_desc = buf_desc;
+ conn->sndbuf_size = bufsize;
+ smc->sk.sk_sndbuf = bufsize * 2;
+ atomic_set(&conn->sndbuf_space, bufsize);
}
+ return 0;
+}
+
+void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
+{
+ struct smc_link_group *lgr = conn->lgr;
+
+ smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+ conn->sndbuf_desc, DMA_TO_DEVICE);
+}
+
+void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
+{
+ struct smc_link_group *lgr = conn->lgr;
+
+ smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+ conn->sndbuf_desc, DMA_TO_DEVICE);
+}
+
+void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
+{
+ struct smc_link_group *lgr = conn->lgr;
+
+ smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+ conn->rmb_desc, DMA_FROM_DEVICE);
+}
+
+void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
+{
+ struct smc_link_group *lgr = conn->lgr;
+
+ smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+ conn->rmb_desc, DMA_FROM_DEVICE);
+}
+
+/* create the send and receive buffer for an SMC socket;
+ * receive buffers are called RMBs;
+ * (even though the SMC protocol allows more than one RMB-element per RMB,
+ * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
+ * extra RMB for every connection in a link group
+ */
+int smc_buf_create(struct smc_sock *smc)
+{
+ int rc;
+
+ /* create send buffer */
+ rc = __smc_buf_create(smc, false);
+ if (rc)
+ return rc;
+ /* create rmb */
+ rc = __smc_buf_create(smc, true);
+ if (rc)
+ smc_buf_free(smc->conn.sndbuf_desc,
+ &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false);
+ return rc;
}
static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index b013cb43a327..fe691bf9af91 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -37,6 +38,14 @@ struct smc_wr_buf {
u8 raw[SMC_WR_BUF_SIZE];
};
+#define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */
+
+enum smc_wr_reg_state {
+ POSTED, /* ib_wr_reg_mr request posted */
+ CONFIRMED, /* ib_wr_reg_mr response: successful */
+ FAILED /* ib_wr_reg_mr response: failure */
+};
+
struct smc_link {
struct smc_ib_device *smcibdev; /* ib-device */
u8 ibport; /* port - values 1 | 2 */
@@ -65,6 +74,10 @@ struct smc_link {
u64 wr_rx_id; /* seq # of last recv WR */
u32 wr_rx_cnt; /* number of WR recv buffers */
+ struct ib_reg_wr wr_reg; /* WR register memory region */
+ wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */
+ enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */
+
union ib_gid gid; /* gid matching used vlan id */
u32 peer_qpn; /* QP number of peer */
enum ib_mtu path_mtu; /* used mtu */
@@ -90,14 +103,15 @@ struct smc_link {
/* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */
struct smc_buf_desc {
struct list_head list;
- u64 dma_addr[SMC_LINKS_PER_LGR_MAX];
- /* mapped address of buffer */
void *cpu_addr; /* virtual address of buffer */
- u32 rkey[SMC_LINKS_PER_LGR_MAX];
- /* for rmb only:
- * rkey provided to peer
+ struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
+ struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
+ /* for rmb only: memory region
+ * incl. rkey provided to peer
*/
+ u32 order; /* allocation order */
u32 used; /* currently used / unused */
+ bool reused; /* new created / reused */
};
struct smc_rtoken { /* address/key of remote RMB */
@@ -173,9 +187,11 @@ struct smc_clc_msg_accept_confirm;
void smc_lgr_free(struct smc_link_group *lgr);
void smc_lgr_terminate(struct smc_link_group *lgr);
-int smc_sndbuf_create(struct smc_sock *smc);
-int smc_rmb_create(struct smc_sock *smc);
+int smc_buf_create(struct smc_sock *smc);
int smc_rmb_rtoken_handling(struct smc_connection *conn,
struct smc_clc_msg_accept_confirm *clc);
-
+void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
+void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
+void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
+void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
#endif
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index b31715505a35..90f1a7f9085c 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -13,6 +14,7 @@
#include <linux/random.h>
#include <linux/workqueue.h>
+#include <linux/scatterlist.h>
#include <rdma/ib_verbs.h>
#include "smc_pnet.h"
@@ -192,8 +194,7 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
{
int rc;
- lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev,
- IB_PD_UNSAFE_GLOBAL_RKEY);
+ lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0);
rc = PTR_ERR_OR_ZERO(lnk->roce_pd);
if (IS_ERR(lnk->roce_pd))
lnk->roce_pd = NULL;
@@ -232,10 +233,10 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
.recv_cq = lnk->smcibdev->roce_cq_recv,
.srq = NULL,
.cap = {
- .max_send_wr = SMC_WR_BUF_CNT,
/* include unsolicited rdma_writes as well,
* there are max. 2 RDMA_WRITE per 1 WR_SEND
*/
+ .max_send_wr = SMC_WR_BUF_CNT * 3,
.max_recv_wr = SMC_WR_BUF_CNT * 3,
.max_send_sge = SMC_IB_MAX_SEND_SGE,
.max_recv_sge = 1,
@@ -254,56 +255,132 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
return rc;
}
-/* map a new TX or RX buffer to DMA */
-int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size,
- struct smc_buf_desc *buf_slot,
- enum dma_data_direction data_direction)
+void smc_ib_put_memory_region(struct ib_mr *mr)
{
- int rc = 0;
+ ib_dereg_mr(mr);
+}
- if (buf_slot->dma_addr[SMC_SINGLE_LINK])
- return rc; /* already mapped */
- buf_slot->dma_addr[SMC_SINGLE_LINK] =
- ib_dma_map_single(smcibdev->ibdev, buf_slot->cpu_addr,
- buf_size, data_direction);
- if (ib_dma_mapping_error(smcibdev->ibdev,
- buf_slot->dma_addr[SMC_SINGLE_LINK]))
- rc = -EIO;
- return rc;
+static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
+{
+ unsigned int offset = 0;
+ int sg_num;
+
+ /* map the largest prefix of a dma mapped SG list */
+ sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK],
+ buf_slot->sgt[SMC_SINGLE_LINK].sgl,
+ buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+ &offset, PAGE_SIZE);
+
+ return sg_num;
+}
+
+/* Allocate a memory region and map the dma mapped SG list of buf_slot */
+int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
+ struct smc_buf_desc *buf_slot)
+{
+ if (buf_slot->mr_rx[SMC_SINGLE_LINK])
+ return 0; /* already done */
+
+ buf_slot->mr_rx[SMC_SINGLE_LINK] =
+ ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
+ if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) {
+ int rc;
+
+ rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]);
+ buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL;
+ return rc;
+ }
+
+ if (smc_ib_map_mr_sg(buf_slot) != 1)
+ return -EINVAL;
+
+ return 0;
+}
+
+/* synchronize buffer usage for cpu access */
+void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
+ struct smc_buf_desc *buf_slot,
+ enum dma_data_direction data_direction)
+{
+ struct scatterlist *sg;
+ unsigned int i;
+
+ /* for now there is just one DMA address */
+ for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
+ buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
+ if (!sg_dma_len(sg))
+ break;
+ ib_dma_sync_single_for_cpu(smcibdev->ibdev,
+ sg_dma_address(sg),
+ sg_dma_len(sg),
+ data_direction);
+ }
+}
+
+/* synchronize buffer usage for device access */
+void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
+ struct smc_buf_desc *buf_slot,
+ enum dma_data_direction data_direction)
+{
+ struct scatterlist *sg;
+ unsigned int i;
+
+ /* for now there is just one DMA address */
+ for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
+ buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
+ if (!sg_dma_len(sg))
+ break;
+ ib_dma_sync_single_for_device(smcibdev->ibdev,
+ sg_dma_address(sg),
+ sg_dma_len(sg),
+ data_direction);
+ }
}
-void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int buf_size,
+/* Map a new TX or RX buffer SG-table to DMA */
+int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
- if (!buf_slot->dma_addr[SMC_SINGLE_LINK])
+ int mapped_nents;
+
+ mapped_nents = ib_dma_map_sg(smcibdev->ibdev,
+ buf_slot->sgt[SMC_SINGLE_LINK].sgl,
+ buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+ data_direction);
+ if (!mapped_nents)
+ return -ENOMEM;
+
+ return mapped_nents;
+}
+
+void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
+ struct smc_buf_desc *buf_slot,
+ enum dma_data_direction data_direction)
+{
+ if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address)
return; /* already unmapped */
- ib_dma_unmap_single(smcibdev->ibdev, *buf_slot->dma_addr, buf_size,
- data_direction);
- buf_slot->dma_addr[SMC_SINGLE_LINK] = 0;
+
+ ib_dma_unmap_sg(smcibdev->ibdev,
+ buf_slot->sgt[SMC_SINGLE_LINK].sgl,
+ buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+ data_direction);
+ buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
}
static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
{
- struct net_device *ndev;
+ struct ib_gid_attr gattr;
int rc;
rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
- &smcibdev->gid[ibport - 1], NULL);
- /* the SMC protocol requires specification of the roce MAC address;
- * if net_device cannot be determined, it can be derived from gid 0
- */
- ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport);
- if (ndev) {
- memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN);
- } else if (!rc) {
- memcpy(&smcibdev->mac[ibport - 1][0],
- &smcibdev->gid[ibport - 1].raw[8], 3);
- memcpy(&smcibdev->mac[ibport - 1][3],
- &smcibdev->gid[ibport - 1].raw[13], 3);
- smcibdev->mac[ibport - 1][0] &= ~0x02;
- }
- return rc;
+ &smcibdev->gid[ibport - 1], &gattr);
+ if (rc || !gattr.ndev)
+ return -ENODEV;
+
+ memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
+ dev_put(gattr.ndev);
+ return 0;
}
/* Create an identifier unique for this instance of SMC-R.
@@ -334,6 +411,7 @@ int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
&smcibdev->pattr[ibport - 1]);
if (rc)
goto out;
+ /* the SMC protocol requires specification of the RoCE MAC address */
rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
if (rc)
goto out;
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index b567152a526d..e90630dadf8e 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -51,12 +52,12 @@ int smc_ib_register_client(void) __init;
void smc_ib_unregister_client(void);
bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport);
-int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size,
- struct smc_buf_desc *buf_slot,
- enum dma_data_direction data_direction);
-void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int bufsize,
+int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
+void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
+ struct smc_buf_desc *buf_slot,
+ enum dma_data_direction data_direction);
void smc_ib_dealloc_protection_domain(struct smc_link *lnk);
int smc_ib_create_protection_domain(struct smc_link *lnk);
void smc_ib_destroy_queue_pair(struct smc_link *lnk);
@@ -65,6 +66,13 @@ int smc_ib_ready_link(struct smc_link *lnk);
int smc_ib_modify_qp_rts(struct smc_link *lnk);
int smc_ib_modify_qp_reset(struct smc_link *lnk);
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
-
-
+int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
+ struct smc_buf_desc *buf_slot);
+void smc_ib_put_memory_region(struct ib_mr *mr);
+void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
+ struct smc_buf_desc *buf_slot,
+ enum dma_data_direction data_direction);
+void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
+ struct smc_buf_desc *buf_slot,
+ enum dma_data_direction data_direction);
#endif
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index c2f9165d13ef..92fe4cc8c82c 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index b472f853953a..51b27ce90dbd 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 78f7af28ae4f..74568cdbca70 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -181,8 +182,10 @@ static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
sizeof(new_pnetelem->ndev->name)) ||
smc_pnet_same_ibname(pnetelem,
new_pnetelem->smcibdev->ibdev->name,
- new_pnetelem->ib_port))
+ new_pnetelem->ib_port)) {
+ dev_put(pnetelem->ndev);
goto found;
+ }
}
list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
rc = 0;
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index c4f1bccd4358..5a29519db976 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index f0c8b089f770..cbf58637ee14 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -148,6 +149,8 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
read_done = sock_intr_errno(timeo);
break;
}
+ if (!timeo)
+ return -EAGAIN;
}
if (!atomic_read(&conn->bytes_to_rcv)) {
@@ -170,6 +173,7 @@ copy:
copylen, conn->rmbe_size - cons.count);
chunk_len_sum = chunk_len;
chunk_off = cons.count;
+ smc_rmb_sync_sg_for_cpu(conn);
for (chunk = 0; chunk < 2; chunk++) {
if (!(flags & MSG_TRUNC)) {
rc = memcpy_to_msg(msg, rcvbuf_base + chunk_off,
@@ -177,6 +181,7 @@ copy:
if (rc) {
if (!read_done)
read_done = -EFAULT;
+ smc_rmb_sync_sg_for_device(conn);
goto out;
}
}
@@ -190,6 +195,7 @@ copy:
chunk_len_sum += chunk_len;
chunk_off = 0; /* modulo offset in recv ring buffer */
}
+ smc_rmb_sync_sg_for_device(conn);
/* update cursors */
if (!(flags & MSG_PEEK)) {
diff --git a/net/smc/smc_rx.h b/net/smc/smc_rx.h
index b5b80e1f8b0f..3a32b59bf06c 100644
--- a/net/smc/smc_rx.h
+++ b/net/smc/smc_rx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 21ec1832ab51..c48dc2d5fd3a 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -24,6 +25,8 @@
#include "smc_cdc.h"
#include "smc_tx.h"
+#define SMC_TX_WORK_DELAY HZ
+
/***************************** sndbuf producer *******************************/
/* callback implementation for sk.sk_write_space()
@@ -174,10 +177,12 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
copylen, conn->sndbuf_size - tx_cnt_prep);
chunk_len_sum = chunk_len;
chunk_off = tx_cnt_prep;
+ smc_sndbuf_sync_sg_for_cpu(conn);
for (chunk = 0; chunk < 2; chunk++) {
rc = memcpy_from_msg(sndbuf_base + chunk_off,
msg, chunk_len);
if (rc) {
+ smc_sndbuf_sync_sg_for_device(conn);
if (send_done)
return send_done;
goto out_err;
@@ -192,6 +197,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
chunk_len_sum += chunk_len;
chunk_off = 0; /* modulo offset in send ring buffer */
}
+ smc_sndbuf_sync_sg_for_device(conn);
/* update cursors */
smc_curs_add(conn->sndbuf_size, &prep, copylen);
smc_curs_write(&conn->tx_curs_prep,
@@ -277,6 +283,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
struct smc_link_group *lgr = conn->lgr;
int to_send, rmbespace;
struct smc_link *link;
+ dma_addr_t dma_addr;
int num_sges;
int rc;
@@ -334,12 +341,11 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
src_len = conn->sndbuf_size - sent.count;
}
src_len_sum = src_len;
+ dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
for (dstchunk = 0; dstchunk < 2; dstchunk++) {
num_sges = 0;
for (srcchunk = 0; srcchunk < 2; srcchunk++) {
- sges[srcchunk].addr =
- conn->sndbuf_desc->dma_addr[SMC_SINGLE_LINK] +
- src_off;
+ sges[srcchunk].addr = dma_addr + src_off;
sges[srcchunk].length = src_len;
sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
num_sges++;
@@ -391,8 +397,7 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
int rc;
spin_lock_bh(&conn->send_lock);
- rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf,
- &pend);
+ rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
if (rc < 0) {
if (rc == -EBUSY) {
struct smc_sock *smc =
@@ -403,7 +408,8 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
goto out_unlock;
}
rc = 0;
- schedule_work(&conn->tx_work);
+ schedule_delayed_work(&conn->tx_work,
+ SMC_TX_WORK_DELAY);
}
goto out_unlock;
}
@@ -427,7 +433,7 @@ out_unlock:
*/
static void smc_tx_work(struct work_struct *work)
{
- struct smc_connection *conn = container_of(work,
+ struct smc_connection *conn = container_of(to_delayed_work(work),
struct smc_connection,
tx_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
@@ -460,12 +466,12 @@ void smc_tx_consumer_update(struct smc_connection *conn)
((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmbe_size / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
- rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
- &wr_buf, &pend);
+ rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
if (!rc)
rc = smc_cdc_msg_send(conn, wr_buf, pend);
if (rc < 0) {
- schedule_work(&conn->tx_work);
+ schedule_delayed_work(&conn->tx_work,
+ SMC_TX_WORK_DELAY);
return;
}
smc_curs_write(&conn->rx_curs_confirmed,
@@ -484,6 +490,6 @@ void smc_tx_consumer_update(struct smc_connection *conn)
void smc_tx_init(struct smc_sock *smc)
{
smc->sk.sk_write_space = smc_tx_write_space;
- INIT_WORK(&smc->conn.tx_work, smc_tx_work);
+ INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
spin_lock_init(&smc->conn.send_lock);
}
diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h
index 1d6a0dcdcfe6..78255964fa4d 100644
--- a/net/smc/smc_tx.h
+++ b/net/smc/smc_tx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 874ee9f9d796..de4537f66832 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -68,6 +69,16 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
int i;
link = wc->qp->qp_context;
+
+ if (wc->opcode == IB_WC_REG_MR) {
+ if (wc->status)
+ link->wr_reg_state = FAILED;
+ else
+ link->wr_reg_state = CONFIRMED;
+ wake_up(&link->wr_reg_wait);
+ return;
+ }
+
pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
if (pnd_snd_idx == link->wr_tx_cnt)
return;
@@ -234,7 +245,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
int rc;
ib_req_notify_cq(link->smcibdev->roce_cq_send,
- IB_CQ_SOLICITED_MASK | IB_CQ_REPORT_MISSED_EVENTS);
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
pend = container_of(priv, struct smc_wr_tx_pend, priv);
rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
&failed_wr);
@@ -243,6 +254,52 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
return rc;
}
+/* Register a memory region and wait for result. */
+int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
+{
+ struct ib_send_wr *failed_wr = NULL;
+ int rc;
+
+ ib_req_notify_cq(link->smcibdev->roce_cq_send,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+ link->wr_reg_state = POSTED;
+ link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;
+ link->wr_reg.mr = mr;
+ link->wr_reg.key = mr->rkey;
+ failed_wr = &link->wr_reg.wr;
+ rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr);
+ WARN_ON(failed_wr != &link->wr_reg.wr);
+ if (rc)
+ return rc;
+
+ rc = wait_event_interruptible_timeout(link->wr_reg_wait,
+ (link->wr_reg_state != POSTED),
+ SMC_WR_REG_MR_WAIT_TIME);
+ if (!rc) {
+ /* timeout - terminate connections */
+ struct smc_link_group *lgr;
+
+ lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+ smc_lgr_terminate(lgr);
+ return -EPIPE;
+ }
+ if (rc == -ERESTARTSYS)
+ return -EINTR;
+ switch (link->wr_reg_state) {
+ case CONFIRMED:
+ rc = 0;
+ break;
+ case FAILED:
+ rc = -EIO;
+ break;
+ case POSTED:
+ rc = -EPIPE;
+ break;
+ }
+ return rc;
+}
+
void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,
@@ -458,6 +515,11 @@ static void smc_wr_init_sge(struct smc_link *lnk)
lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i];
lnk->wr_rx_ibs[i].num_sge = 1;
}
+ lnk->wr_reg.wr.next = NULL;
+ lnk->wr_reg.wr.num_sge = 0;
+ lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED;
+ lnk->wr_reg.wr.opcode = IB_WR_REG_MR;
+ lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE;
}
void smc_wr_free_link(struct smc_link *lnk)
@@ -602,6 +664,8 @@ int smc_wr_create_link(struct smc_link *lnk)
smc_wr_init_sge(lnk);
memset(lnk->wr_tx_mask, 0,
BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
+ init_waitqueue_head(&lnk->wr_tx_wait);
+ init_waitqueue_head(&lnk->wr_reg_wait);
return rc;
dma_unmap:
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 0b9beeda6053..2acf12b06063 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -102,5 +103,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
int smc_wr_rx_post_init(struct smc_link *link);
void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
+int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr);
#endif /* SMC_WR_H */
diff --git a/net/socket.c b/net/socket.c
index ad22df1ffbd1..42d8e9c9ccd5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -568,7 +568,6 @@ struct socket *sock_alloc(void)
sock = SOCKET_I(inode);
- kmemcheck_annotate_bitfield(sock, type);
inode->i_ino = get_next_ino();
inode->i_mode = S_IFSOCK | S_IRWXUGO;
inode->i_uid = current_fsuid();
@@ -652,6 +651,20 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
}
EXPORT_SYMBOL(kernel_sendmsg);
+int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
+ struct kvec *vec, size_t num, size_t size)
+{
+ struct socket *sock = sk->sk_socket;
+
+ if (!sock->ops->sendmsg_locked)
+ return sock_no_sendmsg_locked(sk, msg, size);
+
+ iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+
+ return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
+}
+EXPORT_SYMBOL(kernel_sendmsg_locked);
+
static bool skb_is_err_queue(const struct sk_buff *skb)
{
/* pkt_type of skbs enqueued on the error queue are set to
@@ -3376,6 +3389,19 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
}
EXPORT_SYMBOL(kernel_sendpage);
+int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags)
+{
+ struct socket *sock = sk->sk_socket;
+
+ if (sock->ops->sendpage_locked)
+ return sock->ops->sendpage_locked(sk, page, offset, size,
+ flags);
+
+ return sock_no_sendpage_locked(sk, page, offset, size, flags);
+}
+EXPORT_SYMBOL(kernel_sendpage_locked);
+
int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
{
mm_segment_t oldfs = get_fs();
@@ -3405,7 +3431,6 @@ u32 kernel_sock_ip_overhead(struct sock *sk)
struct inet_sock *inet;
struct ip_options_rcu *opt;
u32 overhead = 0;
- bool owned_by_user;
#if IS_ENABLED(CONFIG_IPV6)
struct ipv6_pinfo *np;
struct ipv6_txoptions *optv6 = NULL;
@@ -3414,13 +3439,12 @@ u32 kernel_sock_ip_overhead(struct sock *sk)
if (!sk)
return overhead;
- owned_by_user = sock_owned_by_user(sk);
switch (sk->sk_family) {
case AF_INET:
inet = inet_sk(sk);
overhead += sizeof(struct iphdr);
opt = rcu_dereference_protected(inet->inet_opt,
- owned_by_user);
+ sock_owned_by_user(sk));
if (opt)
overhead += opt->opt.optlen;
return overhead;
@@ -3430,7 +3454,7 @@ u32 kernel_sock_ip_overhead(struct sock *sk)
overhead += sizeof(struct ipv6hdr);
if (np)
optv6 = rcu_dereference_protected(np->opt,
- owned_by_user);
+ sock_owned_by_user(sk));
if (optv6)
overhead += (optv6->opt_flen + optv6->opt_nflen);
return overhead;
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index b5c279b22680..c5fda15ba319 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -29,44 +29,46 @@
static struct workqueue_struct *strp_wq;
-struct _strp_rx_msg {
- /* Internal cb structure. struct strp_rx_msg must be first for passing
+struct _strp_msg {
+ /* Internal cb structure. struct strp_msg must be first for passing
* to upper layer.
*/
- struct strp_rx_msg strp;
+ struct strp_msg strp;
int accum_len;
int early_eaten;
};
-static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb)
+static inline struct _strp_msg *_strp_msg(struct sk_buff *skb)
{
- return (struct _strp_rx_msg *)((void *)skb->cb +
+ return (struct _strp_msg *)((void *)skb->cb +
offsetof(struct qdisc_skb_cb, data));
}
/* Lower lock held */
-static void strp_abort_rx_strp(struct strparser *strp, int err)
+static void strp_abort_strp(struct strparser *strp, int err)
{
- struct sock *csk = strp->sk;
-
/* Unrecoverable error in receive */
- del_timer(&strp->rx_msg_timer);
+ cancel_delayed_work(&strp->msg_timer_work);
- if (strp->rx_stopped)
+ if (strp->stopped)
return;
- strp->rx_stopped = 1;
+ strp->stopped = 1;
+
+ if (strp->sk) {
+ struct sock *sk = strp->sk;
- /* Report an error on the lower socket */
- csk->sk_err = err;
- csk->sk_error_report(csk);
+ /* Report an error on the lower socket */
+ sk->sk_err = err;
+ sk->sk_error_report(sk);
+ }
}
-static void strp_start_rx_timer(struct strparser *strp)
+static void strp_start_timer(struct strparser *strp, long timeo)
{
- if (strp->sk->sk_rcvtimeo)
- mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo);
+ if (timeo)
+ mod_delayed_work(strp_wq, &strp->msg_timer_work, timeo);
}
/* Lower lock held */
@@ -74,46 +76,55 @@ static void strp_parser_err(struct strparser *strp, int err,
read_descriptor_t *desc)
{
desc->error = err;
- kfree_skb(strp->rx_skb_head);
- strp->rx_skb_head = NULL;
+ kfree_skb(strp->skb_head);
+ strp->skb_head = NULL;
strp->cb.abort_parser(strp, err);
}
static inline int strp_peek_len(struct strparser *strp)
{
- struct socket *sock = strp->sk->sk_socket;
+ if (strp->sk) {
+ struct socket *sock = strp->sk->sk_socket;
+
+ return sock->ops->peek_len(sock);
+ }
+
+ /* If we don't have an associated socket there's nothing to peek.
+ * Return int max to avoid stopping the strparser.
+ */
- return sock->ops->peek_len(sock);
+ return INT_MAX;
}
/* Lower socket lock held */
-static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
- unsigned int orig_offset, size_t orig_len)
+static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+ unsigned int orig_offset, size_t orig_len,
+ size_t max_msg_size, long timeo)
{
struct strparser *strp = (struct strparser *)desc->arg.data;
- struct _strp_rx_msg *rxm;
+ struct _strp_msg *stm;
struct sk_buff *head, *skb;
size_t eaten = 0, cand_len;
ssize_t extra;
int err;
bool cloned_orig = false;
- if (strp->rx_paused)
+ if (strp->paused)
return 0;
- head = strp->rx_skb_head;
+ head = strp->skb_head;
if (head) {
/* Message already in progress */
- rxm = _strp_rx_msg(head);
- if (unlikely(rxm->early_eaten)) {
+ stm = _strp_msg(head);
+ if (unlikely(stm->early_eaten)) {
/* Already some number of bytes on the receive sock
- * data saved in rx_skb_head, just indicate they
+ * data saved in skb_head, just indicate they
* are consumed.
*/
- eaten = orig_len <= rxm->early_eaten ?
- orig_len : rxm->early_eaten;
- rxm->early_eaten -= eaten;
+ eaten = orig_len <= stm->early_eaten ?
+ orig_len : stm->early_eaten;
+ stm->early_eaten -= eaten;
return eaten;
}
@@ -126,12 +137,12 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
*/
orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
if (!orig_skb) {
- STRP_STATS_INCR(strp->stats.rx_mem_fail);
+ STRP_STATS_INCR(strp->stats.mem_fail);
desc->error = -ENOMEM;
return 0;
}
if (!pskb_pull(orig_skb, orig_offset)) {
- STRP_STATS_INCR(strp->stats.rx_mem_fail);
+ STRP_STATS_INCR(strp->stats.mem_fail);
kfree_skb(orig_skb);
desc->error = -ENOMEM;
return 0;
@@ -140,13 +151,13 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
orig_offset = 0;
}
- if (!strp->rx_skb_nextp) {
+ if (!strp->skb_nextp) {
/* We are going to append to the frags_list of head.
* Need to unshare the frag_list.
*/
err = skb_unclone(head, GFP_ATOMIC);
if (err) {
- STRP_STATS_INCR(strp->stats.rx_mem_fail);
+ STRP_STATS_INCR(strp->stats.mem_fail);
desc->error = err;
return 0;
}
@@ -165,20 +176,20 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
skb = alloc_skb(0, GFP_ATOMIC);
if (!skb) {
- STRP_STATS_INCR(strp->stats.rx_mem_fail);
+ STRP_STATS_INCR(strp->stats.mem_fail);
desc->error = -ENOMEM;
return 0;
}
skb->len = head->len;
skb->data_len = head->len;
skb->truesize = head->truesize;
- *_strp_rx_msg(skb) = *_strp_rx_msg(head);
- strp->rx_skb_nextp = &head->next;
+ *_strp_msg(skb) = *_strp_msg(head);
+ strp->skb_nextp = &head->next;
skb_shinfo(skb)->frag_list = head;
- strp->rx_skb_head = skb;
+ strp->skb_head = skb;
head = skb;
} else {
- strp->rx_skb_nextp =
+ strp->skb_nextp =
&skb_shinfo(head)->frag_list;
}
}
@@ -188,112 +199,112 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
/* Always clone since we will consume something */
skb = skb_clone(orig_skb, GFP_ATOMIC);
if (!skb) {
- STRP_STATS_INCR(strp->stats.rx_mem_fail);
+ STRP_STATS_INCR(strp->stats.mem_fail);
desc->error = -ENOMEM;
break;
}
cand_len = orig_len - eaten;
- head = strp->rx_skb_head;
+ head = strp->skb_head;
if (!head) {
head = skb;
- strp->rx_skb_head = head;
- /* Will set rx_skb_nextp on next packet if needed */
- strp->rx_skb_nextp = NULL;
- rxm = _strp_rx_msg(head);
- memset(rxm, 0, sizeof(*rxm));
- rxm->strp.offset = orig_offset + eaten;
+ strp->skb_head = head;
+ /* Will set skb_nextp on next packet if needed */
+ strp->skb_nextp = NULL;
+ stm = _strp_msg(head);
+ memset(stm, 0, sizeof(*stm));
+ stm->strp.offset = orig_offset + eaten;
} else {
/* Unclone since we may be appending to an skb that we
* already share a frag_list with.
*/
err = skb_unclone(skb, GFP_ATOMIC);
if (err) {
- STRP_STATS_INCR(strp->stats.rx_mem_fail);
+ STRP_STATS_INCR(strp->stats.mem_fail);
desc->error = err;
break;
}
- rxm = _strp_rx_msg(head);
- *strp->rx_skb_nextp = skb;
- strp->rx_skb_nextp = &skb->next;
+ stm = _strp_msg(head);
+ *strp->skb_nextp = skb;
+ strp->skb_nextp = &skb->next;
head->data_len += skb->len;
head->len += skb->len;
head->truesize += skb->truesize;
}
- if (!rxm->strp.full_len) {
+ if (!stm->strp.full_len) {
ssize_t len;
len = (*strp->cb.parse_msg)(strp, head);
if (!len) {
/* Need more header to determine length */
- if (!rxm->accum_len) {
+ if (!stm->accum_len) {
/* Start RX timer for new message */
- strp_start_rx_timer(strp);
+ strp_start_timer(strp, timeo);
}
- rxm->accum_len += cand_len;
+ stm->accum_len += cand_len;
eaten += cand_len;
- STRP_STATS_INCR(strp->stats.rx_need_more_hdr);
+ STRP_STATS_INCR(strp->stats.need_more_hdr);
WARN_ON(eaten != orig_len);
break;
} else if (len < 0) {
- if (len == -ESTRPIPE && rxm->accum_len) {
+ if (len == -ESTRPIPE && stm->accum_len) {
len = -ENODATA;
- strp->rx_unrecov_intr = 1;
+ strp->unrecov_intr = 1;
} else {
- strp->rx_interrupted = 1;
+ strp->interrupted = 1;
}
strp_parser_err(strp, len, desc);
break;
- } else if (len > strp->sk->sk_rcvbuf) {
+ } else if (len > max_msg_size) {
/* Message length exceeds maximum allowed */
- STRP_STATS_INCR(strp->stats.rx_msg_too_big);
+ STRP_STATS_INCR(strp->stats.msg_too_big);
strp_parser_err(strp, -EMSGSIZE, desc);
break;
} else if (len <= (ssize_t)head->len -
- skb->len - rxm->strp.offset) {
+ skb->len - stm->strp.offset) {
/* Length must be into new skb (and also
* greater than zero)
*/
- STRP_STATS_INCR(strp->stats.rx_bad_hdr_len);
+ STRP_STATS_INCR(strp->stats.bad_hdr_len);
strp_parser_err(strp, -EPROTO, desc);
break;
}
- rxm->strp.full_len = len;
+ stm->strp.full_len = len;
}
- extra = (ssize_t)(rxm->accum_len + cand_len) -
- rxm->strp.full_len;
+ extra = (ssize_t)(stm->accum_len + cand_len) -
+ stm->strp.full_len;
if (extra < 0) {
/* Message not complete yet. */
- if (rxm->strp.full_len - rxm->accum_len >
+ if (stm->strp.full_len - stm->accum_len >
strp_peek_len(strp)) {
- /* Don't have the whole messages in the socket
- * buffer. Set strp->rx_need_bytes to wait for
+ /* Don't have the whole message in the socket
+ * buffer. Set strp->need_bytes to wait for
* the rest of the message. Also, set "early
* eaten" since we've already buffered the skb
* but don't consume yet per strp_read_sock.
*/
- if (!rxm->accum_len) {
+ if (!stm->accum_len) {
/* Start RX timer for new message */
- strp_start_rx_timer(strp);
+ strp_start_timer(strp, timeo);
}
- strp->rx_need_bytes = rxm->strp.full_len -
- rxm->accum_len;
- rxm->accum_len += cand_len;
- rxm->early_eaten = cand_len;
- STRP_STATS_ADD(strp->stats.rx_bytes, cand_len);
+ strp->need_bytes = stm->strp.full_len -
+ stm->accum_len;
+ stm->accum_len += cand_len;
+ stm->early_eaten = cand_len;
+ STRP_STATS_ADD(strp->stats.bytes, cand_len);
desc->count = 0; /* Stop reading socket */
break;
}
- rxm->accum_len += cand_len;
+ stm->accum_len += cand_len;
eaten += cand_len;
WARN_ON(eaten != orig_len);
break;
@@ -308,14 +319,14 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
eaten += (cand_len - extra);
/* Hurray, we have a new message! */
- del_timer(&strp->rx_msg_timer);
- strp->rx_skb_head = NULL;
- STRP_STATS_INCR(strp->stats.rx_msgs);
+ cancel_delayed_work(&strp->msg_timer_work);
+ strp->skb_head = NULL;
+ STRP_STATS_INCR(strp->stats.msgs);
/* Give skb to upper layer */
strp->cb.rcv_msg(strp, head);
- if (unlikely(strp->rx_paused)) {
+ if (unlikely(strp->paused)) {
/* Upper layer paused strp */
break;
}
@@ -324,11 +335,33 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
if (cloned_orig)
kfree_skb(orig_skb);
- STRP_STATS_ADD(strp->stats.rx_bytes, eaten);
+ STRP_STATS_ADD(strp->stats.bytes, eaten);
return eaten;
}
+int strp_process(struct strparser *strp, struct sk_buff *orig_skb,
+ unsigned int orig_offset, size_t orig_len,
+ size_t max_msg_size, long timeo)
+{
+ read_descriptor_t desc; /* Dummy arg to strp_recv */
+
+ desc.arg.data = strp;
+
+ return __strp_recv(&desc, orig_skb, orig_offset, orig_len,
+ max_msg_size, timeo);
+}
+EXPORT_SYMBOL_GPL(strp_process);
+
+static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+ unsigned int orig_offset, size_t orig_len)
+{
+ struct strparser *strp = (struct strparser *)desc->arg.data;
+
+ return __strp_recv(desc, orig_skb, orig_offset, orig_len,
+ strp->sk->sk_rcvbuf, strp->sk->sk_rcvtimeo);
+}
+
static int default_read_sock_done(struct strparser *strp, int err)
{
return err;
@@ -340,6 +373,9 @@ static int strp_read_sock(struct strparser *strp)
struct socket *sock = strp->sk->sk_socket;
read_descriptor_t desc;
+ if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
+ return -EBUSY;
+
desc.arg.data = strp;
desc.error = 0;
desc.count = 1; /* give more than one skb per call */
@@ -355,101 +391,123 @@ static int strp_read_sock(struct strparser *strp)
/* Lower sock lock held */
void strp_data_ready(struct strparser *strp)
{
- if (unlikely(strp->rx_stopped))
+ if (unlikely(strp->stopped))
return;
- /* This check is needed to synchronize with do_strp_rx_work.
- * do_strp_rx_work acquires a process lock (lock_sock) whereas
+ /* This check is needed to synchronize with do_strp_work.
+ * do_strp_work acquires a process lock (lock_sock) whereas
* the lock held here is bh_lock_sock. The two locks can be
* held by different threads at the same time, but bh_lock_sock
* allows a thread in BH context to safely check if the process
* lock is held. In this case, if the lock is held, queue work.
*/
if (sock_owned_by_user(strp->sk)) {
- queue_work(strp_wq, &strp->rx_work);
+ queue_work(strp_wq, &strp->work);
return;
}
- if (strp->rx_paused)
+ if (strp->paused)
return;
- if (strp->rx_need_bytes) {
- if (strp_peek_len(strp) >= strp->rx_need_bytes)
- strp->rx_need_bytes = 0;
+ if (strp->need_bytes) {
+ if (strp_peek_len(strp) >= strp->need_bytes)
+ strp->need_bytes = 0;
else
return;
}
if (strp_read_sock(strp) == -ENOMEM)
- queue_work(strp_wq, &strp->rx_work);
+ queue_work(strp_wq, &strp->work);
}
EXPORT_SYMBOL_GPL(strp_data_ready);
-static void do_strp_rx_work(struct strparser *strp)
+static void do_strp_work(struct strparser *strp)
{
read_descriptor_t rd_desc;
- struct sock *csk = strp->sk;
/* We need the read lock to synchronize with strp_data_ready. We
* need the socket lock for calling strp_read_sock.
*/
- lock_sock(csk);
+ strp->cb.lock(strp);
- if (unlikely(strp->rx_stopped))
+ if (unlikely(strp->stopped))
goto out;
- if (strp->rx_paused)
+ if (strp->paused)
goto out;
rd_desc.arg.data = strp;
if (strp_read_sock(strp) == -ENOMEM)
- queue_work(strp_wq, &strp->rx_work);
+ queue_work(strp_wq, &strp->work);
out:
- release_sock(csk);
+ strp->cb.unlock(strp);
}
-static void strp_rx_work(struct work_struct *w)
+static void strp_work(struct work_struct *w)
{
- do_strp_rx_work(container_of(w, struct strparser, rx_work));
+ do_strp_work(container_of(w, struct strparser, work));
}
-static void strp_rx_msg_timeout(unsigned long arg)
+static void strp_msg_timeout(struct work_struct *w)
{
- struct strparser *strp = (struct strparser *)arg;
+ struct strparser *strp = container_of(w, struct strparser,
+ msg_timer_work.work);
/* Message assembly timed out */
- STRP_STATS_INCR(strp->stats.rx_msg_timeouts);
- lock_sock(strp->sk);
+ STRP_STATS_INCR(strp->stats.msg_timeouts);
+ strp->cb.lock(strp);
strp->cb.abort_parser(strp, ETIMEDOUT);
+ strp->cb.unlock(strp);
+}
+
+static void strp_sock_lock(struct strparser *strp)
+{
+ lock_sock(strp->sk);
+}
+
+static void strp_sock_unlock(struct strparser *strp)
+{
release_sock(strp->sk);
}
-int strp_init(struct strparser *strp, struct sock *csk,
- struct strp_callbacks *cb)
+int strp_init(struct strparser *strp, struct sock *sk,
+ const struct strp_callbacks *cb)
{
- struct socket *sock = csk->sk_socket;
if (!cb || !cb->rcv_msg || !cb->parse_msg)
return -EINVAL;
- if (!sock->ops->read_sock || !sock->ops->peek_len)
- return -EAFNOSUPPORT;
-
- memset(strp, 0, sizeof(*strp));
+ /* The sk (sock) arg determines the mode of the stream parser.
+ *
+ * If the sock is set then the strparser is in receive callback mode.
+ * The upper layer calls strp_data_ready to kick receive processing
+ * and strparser calls the read_sock function on the socket to
+ * get packets.
+ *
+ * If the sock is not set then the strparser is in general mode.
+ * The upper layer calls strp_process for each skb to be parsed.
+ */
- strp->sk = csk;
+ if (!sk) {
+ if (!cb->lock || !cb->unlock)
+ return -EINVAL;
+ }
- setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout,
- (unsigned long)strp);
+ memset(strp, 0, sizeof(*strp));
- INIT_WORK(&strp->rx_work, strp_rx_work);
+ strp->sk = sk;
+ strp->cb.lock = cb->lock ? : strp_sock_lock;
+ strp->cb.unlock = cb->unlock ? : strp_sock_unlock;
strp->cb.rcv_msg = cb->rcv_msg;
strp->cb.parse_msg = cb->parse_msg;
strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
- strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp;
+ strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp;
+
+ INIT_DELAYED_WORK(&strp->msg_timer_work, strp_msg_timeout);
+ INIT_WORK(&strp->work, strp_work);
return 0;
}
@@ -457,12 +515,12 @@ EXPORT_SYMBOL_GPL(strp_init);
void strp_unpause(struct strparser *strp)
{
- strp->rx_paused = 0;
+ strp->paused = 0;
- /* Sync setting rx_paused with RX work */
+ /* Sync setting paused with RX work */
smp_mb();
- queue_work(strp_wq, &strp->rx_work);
+ queue_work(strp_wq, &strp->work);
}
EXPORT_SYMBOL_GPL(strp_unpause);
@@ -471,27 +529,27 @@ EXPORT_SYMBOL_GPL(strp_unpause);
*/
void strp_done(struct strparser *strp)
{
- WARN_ON(!strp->rx_stopped);
+ WARN_ON(!strp->stopped);
- del_timer_sync(&strp->rx_msg_timer);
- cancel_work_sync(&strp->rx_work);
+ cancel_delayed_work_sync(&strp->msg_timer_work);
+ cancel_work_sync(&strp->work);
- if (strp->rx_skb_head) {
- kfree_skb(strp->rx_skb_head);
- strp->rx_skb_head = NULL;
+ if (strp->skb_head) {
+ kfree_skb(strp->skb_head);
+ strp->skb_head = NULL;
}
}
EXPORT_SYMBOL_GPL(strp_done);
void strp_stop(struct strparser *strp)
{
- strp->rx_stopped = 1;
+ strp->stopped = 1;
}
EXPORT_SYMBOL_GPL(strp_stop);
void strp_check_rcv(struct strparser *strp)
{
- queue_work(strp_wq, &strp->rx_work);
+ queue_work(strp_wq, &strp->work);
}
EXPORT_SYMBOL_GPL(strp_check_rcv);
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index ea7ffa12e0f9..090658c3da12 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Linux kernel SUN RPC
#
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 14e9e53e63d5..c374268b008f 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Linux kernel rpcsec_gss implementation
#
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 7b1ee5a0b03c..73165e9ca5bf 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -855,11 +855,13 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
return stat;
if (integ_len > buf->len)
return stat;
- if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len))
- BUG();
+ if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) {
+ WARN_ON_ONCE(1);
+ return stat;
+ }
/* copy out mic... */
if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
- BUG();
+ return stat;
if (mic.len > RPC_MAX_AUTH_SIZE)
return stat;
mic.data = kmalloc(mic.len, GFP_KERNEL);
@@ -1611,8 +1613,10 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
BUG_ON(integ_len % 4);
*p++ = htonl(integ_len);
*p++ = htonl(gc->gc_seq);
- if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len))
- BUG();
+ if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) {
+ WARN_ON_ONCE(1);
+ goto out_err;
+ }
if (resbuf->tail[0].iov_base == NULL) {
if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE)
goto out_err;
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 5f3d527dff65..75d72e109a04 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/net/sunrpc/auth_null.c
*
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 82337e1ec9cd..dafd6b870ba3 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/net/sunrpc/auth_unix.c
*
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index ac701c28f44f..c2c68a15b59d 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -171,10 +171,10 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
/*
* Add the temporary list to the backchannel preallocation list
*/
- spin_lock_bh(&xprt->bc_pa_lock);
+ spin_lock(&xprt->bc_pa_lock);
list_splice(&tmp_list, &xprt->bc_pa_list);
xprt_inc_alloc_count(xprt, min_reqs);
- spin_unlock_bh(&xprt->bc_pa_lock);
+ spin_unlock(&xprt->bc_pa_lock);
dprintk("RPC: setup backchannel transport done\n");
return 0;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 2e49d1f892b7..a801da812f86 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1491,7 +1491,6 @@ rpc_restart_call(struct rpc_task *task)
}
EXPORT_SYMBOL_GPL(rpc_restart_call);
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
const char
*rpc_proc_name(const struct rpc_task *task)
{
@@ -1505,7 +1504,6 @@ const char
} else
return "no proc";
}
-#endif
/*
* 0. Initial state
@@ -1519,6 +1517,7 @@ call_start(struct rpc_task *task)
struct rpc_clnt *clnt = task->tk_client;
int idx = task->tk_msg.rpc_proc->p_statidx;
+ trace_rpc_request(task);
dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
clnt->cl_program->name, clnt->cl_vers,
rpc_proc_name(task),
@@ -1586,6 +1585,7 @@ call_reserveresult(struct rpc_task *task)
switch (status) {
case -ENOMEM:
rpc_delay(task, HZ >> 2);
+ /* fall through */
case -EAGAIN: /* woken up; retry */
task->tk_action = call_retry_reserve;
return;
@@ -1647,10 +1647,13 @@ call_refreshresult(struct rpc_task *task)
/* Use rate-limiting and a max number of retries if refresh
* had status 0 but failed to update the cred.
*/
+ /* fall through */
case -ETIMEDOUT:
rpc_delay(task, 3*HZ);
+ /* fall through */
case -EAGAIN:
status = -EACCES;
+ /* fall through */
case -EKEYEXPIRED:
if (!task->tk_cred_retry)
break;
@@ -1903,6 +1906,15 @@ call_connect_status(struct rpc_task *task)
task->tk_status = 0;
switch (status) {
case -ECONNREFUSED:
+ /* A positive refusal suggests a rebind is needed. */
+ if (RPC_IS_SOFTCONN(task))
+ break;
+ if (clnt->cl_autobind) {
+ rpc_force_rebind(clnt);
+ task->tk_action = call_bind;
+ return;
+ }
+ /* fall through */
case -ECONNRESET:
case -ECONNABORTED:
case -ENETUNREACH:
@@ -1916,6 +1928,7 @@ call_connect_status(struct rpc_task *task)
break;
/* retry with existing socket, after a delay */
rpc_delay(task, 3*HZ);
+ /* fall through */
case -EAGAIN:
/* Check for timeouts before looping back to call_bind */
case -ETIMEDOUT:
@@ -2017,6 +2030,7 @@ call_transmit_status(struct rpc_task *task)
rpc_exit(task, task->tk_status);
break;
}
+ /* fall through */
case -ECONNRESET:
case -ECONNABORTED:
case -EADDRINUSE:
@@ -2137,25 +2151,25 @@ call_status(struct rpc_task *task)
* were a timeout.
*/
rpc_delay(task, 3*HZ);
+ /* fall through */
case -ETIMEDOUT:
task->tk_action = call_timeout;
- if (!(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT)
- && task->tk_client->cl_discrtry)
- xprt_conditional_disconnect(req->rq_xprt,
- req->rq_connect_cookie);
break;
case -ECONNREFUSED:
case -ECONNRESET:
case -ECONNABORTED:
rpc_force_rebind(clnt);
+ /* fall through */
case -EADDRINUSE:
rpc_delay(task, 3*HZ);
+ /* fall through */
case -EPIPE:
case -ENOTCONN:
task->tk_action = call_bind;
break;
case -ENOBUFS:
rpc_delay(task, HZ>>2);
+ /* fall through */
case -EAGAIN:
task->tk_action = call_transmit;
break;
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index c8fd0b6c1618..e980d2a493de 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/**
* debugfs interface for sunrpc
*
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index 394ce523174c..7ec10b92bea1 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __SUNRPC_NETNS_H__
#define __SUNRPC_NETNS_H__
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 61a504fb1ae2..7803f3b6aa53 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1410,8 +1410,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
return PTR_ERR(gssd_dentry);
}
- dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n",
- net, NET_NAME(net));
+ dprintk("RPC: sending pipefs MOUNT notification for net %x%s\n",
+ net->ns.inum, NET_NAME(net));
mutex_lock(&sn->pipefs_sb_lock);
sn->pipefs_sb = sb;
err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
@@ -1462,8 +1462,8 @@ static void rpc_kill_sb(struct super_block *sb)
goto out;
}
sn->pipefs_sb = NULL;
- dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n",
- net, NET_NAME(net));
+ dprintk("RPC: sending pipefs UMOUNT notification for net %x%s\n",
+ net->ns.inum, NET_NAME(net));
blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_UMOUNT,
sb);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index ea0676f199c8..c526f8fb37c9 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -216,9 +216,9 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
smp_wmb();
sn->rpcb_users = 1;
dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: "
- "%p, rpcb_local_clnt4: %p) for net %p%s\n",
- sn->rpcb_local_clnt, sn->rpcb_local_clnt4,
- net, (net == &init_net) ? " (init_net)" : "");
+ "%p, rpcb_local_clnt4: %p) for net %x%s\n",
+ sn->rpcb_local_clnt, sn->rpcb_local_clnt4,
+ net->ns.inum, (net == &init_net) ? " (init_net)" : "");
}
/*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 0cc83839c13c..b1b49edd7c4d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -44,7 +44,7 @@ static mempool_t *rpc_buffer_mempool __read_mostly;
static void rpc_async_schedule(struct work_struct *);
static void rpc_release_task(struct rpc_task *task);
-static void __rpc_queue_timer_fn(unsigned long ptr);
+static void __rpc_queue_timer_fn(struct timer_list *t);
/*
* RPC tasks sit here while waiting for conditions to improve.
@@ -228,7 +228,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
queue->maxpriority = nr_queues - 1;
rpc_reset_waitqueue_priority(queue);
queue->qlen = 0;
- setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue);
+ timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0);
INIT_LIST_HEAD(&queue->timer_list.list);
rpc_assign_waitqueue_name(queue, qname);
}
@@ -274,10 +274,9 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
static void rpc_set_active(struct rpc_task *task)
{
- trace_rpc_task_begin(task->tk_client, task, NULL);
-
rpc_task_set_debuginfo(task);
set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
+ trace_rpc_task_begin(task->tk_client, task, NULL);
}
/*
@@ -635,9 +634,9 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
}
EXPORT_SYMBOL_GPL(rpc_wake_up_status);
-static void __rpc_queue_timer_fn(unsigned long ptr)
+static void __rpc_queue_timer_fn(struct timer_list *t)
{
- struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr;
+ struct rpc_wait_queue *queue = from_timer(queue, t, timer_list.timer);
struct rpc_task *task, *n;
unsigned long expires, now, timeo;
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index c73de181467a..56f9eff74150 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -65,10 +65,13 @@ err_proc:
static __net_exit void sunrpc_exit_net(struct net *net)
{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
rpc_pipefs_exit_net(net);
unix_gid_cache_destroy(net);
ip_map_cache_destroy(net);
rpc_proc_exit(net);
+ WARN_ON_ONCE(!list_empty(&sn->all_clients));
}
static struct pernet_operations sunrpc_net_ops = {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 85ce0db5b0a6..387cc4add6f6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -50,7 +50,7 @@ EXPORT_SYMBOL_GPL(svc_pool_map);
static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
static int
-param_set_pool_mode(const char *val, struct kernel_param *kp)
+param_set_pool_mode(const char *val, const struct kernel_param *kp)
{
int *ip = (int *)kp->arg;
struct svc_pool_map *m = &svc_pool_map;
@@ -80,7 +80,7 @@ out:
}
static int
-param_get_pool_mode(char *buf, struct kernel_param *kp)
+param_get_pool_mode(char *buf, const struct kernel_param *kp)
{
int *ip = (int *)kp->arg;
@@ -421,7 +421,7 @@ __svc_init_bc(struct svc_serv *serv)
*/
static struct svc_serv *
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
- struct svc_serv_ops *ops)
+ const struct svc_serv_ops *ops)
{
struct svc_serv *serv;
unsigned int vers;
@@ -455,7 +455,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
serv->sv_xdrsize = xdrsize;
INIT_LIST_HEAD(&serv->sv_tempsocks);
INIT_LIST_HEAD(&serv->sv_permsocks);
- init_timer(&serv->sv_temptimer);
+ timer_setup(&serv->sv_temptimer, NULL, 0);
spin_lock_init(&serv->sv_lock);
__svc_init_bc(serv);
@@ -486,7 +486,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
struct svc_serv *
svc_create(struct svc_program *prog, unsigned int bufsize,
- struct svc_serv_ops *ops)
+ const struct svc_serv_ops *ops)
{
return __svc_create(prog, bufsize, /*npools*/1, ops);
}
@@ -494,7 +494,7 @@ EXPORT_SYMBOL_GPL(svc_create);
struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
- struct svc_serv_ops *ops)
+ const struct svc_serv_ops *ops)
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d16a8b423c20..e8e0831229cf 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -28,7 +28,7 @@ module_param(svc_rpc_per_connection_limit, uint, 0644);
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);
-static void svc_age_temp_xprts(unsigned long closure);
+static void svc_age_temp_xprts(struct timer_list *t);
static void svc_delete_xprt(struct svc_xprt *xprt);
/* apparently the "standard" is that clients close
@@ -250,9 +250,9 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
svc_xprt_received(new);
}
-int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
- struct net *net, const int family,
- const unsigned short port, int flags)
+static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+ struct net *net, const int family,
+ const unsigned short port, int flags)
{
struct svc_xprt_class *xcl;
@@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
struct svc_pool *pool;
struct svc_rqst *rqstp = NULL;
int cpu;
- bool queued = false;
if (!svc_xprt_has_something_to_do(xprt))
goto out;
@@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
atomic_long_inc(&pool->sp_stats.packets);
-redo_search:
+ dprintk("svc: transport %p put into queue\n", xprt);
+ spin_lock_bh(&pool->sp_lock);
+ list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
+ pool->sp_stats.sockets_queued++;
+ spin_unlock_bh(&pool->sp_lock);
+
/* find a thread for this xprt */
rcu_read_lock();
list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
- /* Do a lockless check first */
- if (test_bit(RQ_BUSY, &rqstp->rq_flags))
+ if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
continue;
-
- /*
- * Once the xprt has been queued, it can only be dequeued by
- * the task that intends to service it. All we can do at that
- * point is to try to wake this thread back up so that it can
- * do so.
- */
- if (!queued) {
- spin_lock_bh(&rqstp->rq_lock);
- if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
- /* already busy, move on... */
- spin_unlock_bh(&rqstp->rq_lock);
- continue;
- }
-
- /* this one will do */
- rqstp->rq_xprt = xprt;
- svc_xprt_get(xprt);
- spin_unlock_bh(&rqstp->rq_lock);
- }
- rcu_read_unlock();
-
atomic_long_inc(&pool->sp_stats.threads_woken);
wake_up_process(rqstp->rq_task);
- put_cpu();
- goto out;
- }
- rcu_read_unlock();
-
- /*
- * We didn't find an idle thread to use, so we need to queue the xprt.
- * Do so and then search again. If we find one, we can't hook this one
- * up to it directly but we can wake the thread up in the hopes that it
- * will pick it up once it searches for a xprt to service.
- */
- if (!queued) {
- queued = true;
- dprintk("svc: transport %p put into queue\n", xprt);
- spin_lock_bh(&pool->sp_lock);
- list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
- pool->sp_stats.sockets_queued++;
- spin_unlock_bh(&pool->sp_lock);
- goto redo_search;
+ goto out_unlock;
}
+ set_bit(SP_CONGESTED, &pool->sp_flags);
rqstp = NULL;
+out_unlock:
+ rcu_read_unlock();
put_cpu();
out:
trace_svc_xprt_do_enqueue(xprt, rqstp);
@@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp)
static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
{
- struct svc_xprt *xprt;
struct svc_pool *pool = rqstp->rq_pool;
long time_left = 0;
/* rq_xprt should be clear on entry */
WARN_ON_ONCE(rqstp->rq_xprt);
- /* Normally we will wait up to 5 seconds for any required
- * cache information to be provided.
- */
- rqstp->rq_chandle.thread_wait = 5*HZ;
-
- xprt = svc_xprt_dequeue(pool);
- if (xprt) {
- rqstp->rq_xprt = xprt;
-
- /* As there is a shortage of threads and this request
- * had to be queued, don't allow the thread to wait so
- * long for cache updates.
- */
- rqstp->rq_chandle.thread_wait = 1*HZ;
- clear_bit(SP_TASK_PENDING, &pool->sp_flags);
- return xprt;
- }
+ rqstp->rq_xprt = svc_xprt_dequeue(pool);
+ if (rqstp->rq_xprt)
+ goto out_found;
/*
* We have to be able to interrupt this wait
* to bring down the daemons ...
*/
set_current_state(TASK_INTERRUPTIBLE);
+ smp_mb__before_atomic();
+ clear_bit(SP_CONGESTED, &pool->sp_flags);
clear_bit(RQ_BUSY, &rqstp->rq_flags);
- smp_mb();
+ smp_mb__after_atomic();
if (likely(rqst_should_sleep(rqstp)))
time_left = schedule_timeout(timeout);
@@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
try_to_freeze();
- spin_lock_bh(&rqstp->rq_lock);
set_bit(RQ_BUSY, &rqstp->rq_flags);
- spin_unlock_bh(&rqstp->rq_lock);
-
- xprt = rqstp->rq_xprt;
- if (xprt != NULL)
- return xprt;
+ smp_mb__after_atomic();
+ rqstp->rq_xprt = svc_xprt_dequeue(pool);
+ if (rqstp->rq_xprt)
+ goto out_found;
if (!time_left)
atomic_long_inc(&pool->sp_stats.threads_timedout);
@@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
if (signalled() || kthread_should_stop())
return ERR_PTR(-EINTR);
return ERR_PTR(-EAGAIN);
+out_found:
+ /* Normally we will wait up to 5 seconds for any required
+ * cache information to be provided.
+ */
+ if (!test_bit(SP_CONGESTED, &pool->sp_flags))
+ rqstp->rq_chandle.thread_wait = 5*HZ;
+ else
+ rqstp->rq_chandle.thread_wait = 1*HZ;
+ return rqstp->rq_xprt;
}
static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
@@ -785,8 +745,7 @@ static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt
serv->sv_tmpcnt++;
if (serv->sv_temptimer.function == NULL) {
/* setup timer to age temp transports */
- setup_timer(&serv->sv_temptimer, svc_age_temp_xprts,
- (unsigned long)serv);
+ serv->sv_temptimer.function = (TIMER_FUNC_TYPE)svc_age_temp_xprts;
mod_timer(&serv->sv_temptimer,
jiffies + svc_conn_age_period * HZ);
}
@@ -960,9 +919,9 @@ out:
* Timer function to close old temporary transports, using
* a mark-and-sweep algorithm.
*/
-static void svc_age_temp_xprts(unsigned long closure)
+static void svc_age_temp_xprts(struct timer_list *t)
{
- struct svc_serv *serv = (struct svc_serv *)closure;
+ struct svc_serv *serv = from_timer(serv, t, sv_temptimer);
struct svc_xprt *xprt;
struct list_head *le, *next;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2b720fa35c4f..ff8e06cd067e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -421,6 +421,9 @@ static void svc_data_ready(struct sock *sk)
dprintk("svc: socket %p(inet %p), busy=%d\n",
svsk, sk,
test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
svsk->sk_odata(sk);
if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
svc_xprt_enqueue(&svsk->sk_xprt);
@@ -437,6 +440,9 @@ static void svc_write_space(struct sock *sk)
if (svsk) {
dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
svsk->sk_owspace(sk);
svc_xprt_enqueue(&svsk->sk_xprt);
}
@@ -687,7 +693,7 @@ static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
}
-static struct svc_xprt_ops svc_udp_ops = {
+static const struct svc_xprt_ops svc_udp_ops = {
.xpo_create = svc_udp_create,
.xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto,
@@ -760,8 +766,12 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
dprintk("svc: socket %p TCP (listen) state change %d\n",
sk, sk->sk_state);
- if (svsk)
+ if (svsk) {
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
svsk->sk_odata(sk);
+ }
+
/*
* This callback may called twice when a new connection
* is established as a child socket inherits everything
@@ -794,6 +804,8 @@ static void svc_tcp_state_change(struct sock *sk)
if (!svsk)
printk("svc: socket %p: no user data\n", sk);
else {
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
svsk->sk_ostate(sk);
if (sk->sk_state != TCP_ESTABLISHED) {
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1001,7 +1013,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
if (!bc_xprt)
return -EAGAIN;
- spin_lock_bh(&bc_xprt->transport_lock);
+ spin_lock(&bc_xprt->recv_lock);
req = xprt_lookup_rqst(bc_xprt, xid);
if (!req)
goto unlock_notfound;
@@ -1019,7 +1031,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
memcpy(dst->iov_base, src->iov_base, src->iov_len);
xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
rqstp->rq_arg.len = 0;
- spin_unlock_bh(&bc_xprt->transport_lock);
+ spin_unlock(&bc_xprt->recv_lock);
return 0;
unlock_notfound:
printk(KERN_NOTICE
@@ -1028,7 +1040,7 @@ unlock_notfound:
__func__, ntohl(calldir),
bc_xprt, ntohl(xid));
unlock_eagain:
- spin_unlock_bh(&bc_xprt->transport_lock);
+ spin_unlock(&bc_xprt->recv_lock);
return -EAGAIN;
}
@@ -1229,7 +1241,7 @@ static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt)
{
}
-static struct svc_xprt_ops svc_tcp_bc_ops = {
+static const struct svc_xprt_ops svc_tcp_bc_ops = {
.xpo_create = svc_bc_tcp_create,
.xpo_detach = svc_bc_tcp_sock_detach,
.xpo_free = svc_bc_sock_free,
@@ -1263,7 +1275,7 @@ static void svc_cleanup_bc_xprt_sock(void)
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-static struct svc_xprt_ops svc_tcp_ops = {
+static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_create = svc_tcp_create,
.xpo_recvfrom = svc_tcp_recvfrom,
.xpo_sendto = svc_tcp_sendto,
@@ -1381,12 +1393,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
return ERR_PTR(err);
}
- inet->sk_user_data = svsk;
svsk->sk_sock = sock;
svsk->sk_sk = inet;
svsk->sk_ostate = inet->sk_state_change;
svsk->sk_odata = inet->sk_data_ready;
svsk->sk_owspace = inet->sk_write_space;
+ /*
+ * This barrier is necessary in order to prevent race condition
+ * with svc_data_ready(), svc_listen_data_ready() and others
+ * when calling callbacks above.
+ */
+ wmb();
+ inet->sk_user_data = svsk;
/* Initialize the socket */
if (sock->type == SOCK_DGRAM)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 4654a9934269..333b9d697ae5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -696,9 +696,9 @@ xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
}
static void
-xprt_init_autodisconnect(unsigned long data)
+xprt_init_autodisconnect(struct timer_list *t)
{
- struct rpc_xprt *xprt = (struct rpc_xprt *)data;
+ struct rpc_xprt *xprt = from_timer(xprt, t, timer);
spin_lock(&xprt->transport_lock);
if (!list_empty(&xprt->recv))
@@ -844,6 +844,50 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
}
EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
+/**
+ * xprt_pin_rqst - Pin a request on the transport receive list
+ * @req: Request to pin
+ *
+ * Caller must ensure this is atomic with the call to xprt_lookup_rqst()
+ * so should be holding the xprt transport lock.
+ */
+void xprt_pin_rqst(struct rpc_rqst *req)
+{
+ set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate);
+}
+EXPORT_SYMBOL_GPL(xprt_pin_rqst);
+
+/**
+ * xprt_unpin_rqst - Unpin a request on the transport receive list
+ * @req: Request to pin
+ *
+ * Caller should be holding the xprt transport lock.
+ */
+void xprt_unpin_rqst(struct rpc_rqst *req)
+{
+ struct rpc_task *task = req->rq_task;
+
+ clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate);
+ if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate))
+ wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV);
+}
+EXPORT_SYMBOL_GPL(xprt_unpin_rqst);
+
+static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
+__must_hold(&req->rq_xprt->recv_lock)
+{
+ struct rpc_task *task = req->rq_task;
+
+ if (task && test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) {
+ spin_unlock(&req->rq_xprt->recv_lock);
+ set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
+ wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV,
+ TASK_UNINTERRUPTIBLE);
+ clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
+ spin_lock(&req->rq_xprt->recv_lock);
+ }
+}
+
static void xprt_update_rtt(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
@@ -966,13 +1010,13 @@ void xprt_transmit(struct rpc_task *task)
/*
* Add to the list only if we're expecting a reply
*/
- spin_lock_bh(&xprt->transport_lock);
/* Update the softirq receive buffer */
memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
sizeof(req->rq_private_buf));
/* Add request to the receive list */
+ spin_lock(&xprt->recv_lock);
list_add_tail(&req->rq_list, &xprt->recv);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->recv_lock);
xprt_reset_majortimeo(req);
/* Turn off autodisconnect */
del_singleshot_timer_sync(&xprt->timer);
@@ -1095,6 +1139,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
case -EAGAIN:
xprt_add_backlog(xprt, task);
dprintk("RPC: waiting for request slot\n");
+ /* fall through */
default:
task->tk_status = -EAGAIN;
}
@@ -1287,12 +1332,16 @@ void xprt_release(struct rpc_task *task)
task->tk_ops->rpc_count_stats(task, task->tk_calldata);
else if (task->tk_client)
rpc_count_iostats(task, task->tk_client->cl_metrics);
+ spin_lock(&xprt->recv_lock);
+ if (!list_empty(&req->rq_list)) {
+ list_del_init(&req->rq_list);
+ xprt_wait_on_pinned_rqst(req);
+ }
+ spin_unlock(&xprt->recv_lock);
spin_lock_bh(&xprt->transport_lock);
xprt->ops->release_xprt(xprt, task);
if (xprt->ops->release_request)
xprt->ops->release_request(task);
- if (!list_empty(&req->rq_list))
- list_del(&req->rq_list);
xprt->last_used = jiffies;
xprt_schedule_autodisconnect(xprt);
spin_unlock_bh(&xprt->transport_lock);
@@ -1318,6 +1367,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
spin_lock_init(&xprt->transport_lock);
spin_lock_init(&xprt->reserve_lock);
+ spin_lock_init(&xprt->recv_lock);
INIT_LIST_HEAD(&xprt->free);
INIT_LIST_HEAD(&xprt->recv);
@@ -1373,10 +1423,9 @@ found:
xprt->idle_timeout = 0;
INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
if (xprt_has_timer(xprt))
- setup_timer(&xprt->timer, xprt_init_autodisconnect,
- (unsigned long)xprt);
+ timer_setup(&xprt->timer, xprt_init_autodisconnect, 0);
else
- init_timer(&xprt->timer);
+ timer_setup(&xprt->timer, NULL, 0);
if (strlen(args->servername) > RPC_MAXNETNAMELEN) {
xprt_destroy(xprt);
@@ -1396,6 +1445,23 @@ out:
return xprt;
}
+static void xprt_destroy_cb(struct work_struct *work)
+{
+ struct rpc_xprt *xprt =
+ container_of(work, struct rpc_xprt, task_cleanup);
+
+ rpc_xprt_debugfs_unregister(xprt);
+ rpc_destroy_wait_queue(&xprt->binding);
+ rpc_destroy_wait_queue(&xprt->pending);
+ rpc_destroy_wait_queue(&xprt->sending);
+ rpc_destroy_wait_queue(&xprt->backlog);
+ kfree(xprt->servername);
+ /*
+ * Tear down transport state and free the rpc_xprt
+ */
+ xprt->ops->destroy(xprt);
+}
+
/**
* xprt_destroy - destroy an RPC transport, killing off all requests.
* @xprt: transport to destroy
@@ -1405,22 +1471,19 @@ static void xprt_destroy(struct rpc_xprt *xprt)
{
dprintk("RPC: destroying transport %p\n", xprt);
- /* Exclude transport connect/disconnect handlers */
+ /*
+ * Exclude transport connect/disconnect handlers and autoclose
+ */
wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE);
del_timer_sync(&xprt->timer);
- rpc_xprt_debugfs_unregister(xprt);
- rpc_destroy_wait_queue(&xprt->binding);
- rpc_destroy_wait_queue(&xprt->pending);
- rpc_destroy_wait_queue(&xprt->sending);
- rpc_destroy_wait_queue(&xprt->backlog);
- cancel_work_sync(&xprt->task_cleanup);
- kfree(xprt->servername);
/*
- * Tear down transport state and free the rpc_xprt
+ * Destroy sockets etc from the system workqueue so they can
+ * safely flush receive work running on rpciod.
*/
- xprt->ops->destroy(xprt);
+ INIT_WORK(&xprt->task_cleanup, xprt_destroy_cb);
+ schedule_work(&xprt->task_cleanup);
}
static void xprt_destroy_kref(struct kref *kref)
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index ae92a9e9ba52..e2d64c7138c3 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Multipath support for RPC
*
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index b8213ddce2f2..8bf19e142b6b 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
rpcrdma-y := transport.o rpc_rdma.o verbs.o \
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 03f6b5840764..8b818bb3518a 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015 Oracle. All rights reserved.
*
@@ -42,13 +43,14 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
req = rpcrdma_create_req(r_xprt);
if (IS_ERR(req))
return PTR_ERR(req);
- req->rl_backchannel = true;
+ __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
DMA_TO_DEVICE, GFP_KERNEL);
if (IS_ERR(rb))
goto out_fail;
req->rl_rdmabuf = rb;
+ xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
size = r_xprt->rx_data.inline_rsize;
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
@@ -202,23 +204,27 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
*/
int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
{
- struct rpc_xprt *xprt = rqst->rq_xprt;
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- struct rpcrdma_msg *headerp;
-
- headerp = rdmab_to_msg(req->rl_rdmabuf);
- headerp->rm_xid = rqst->rq_xid;
- headerp->rm_vers = rpcrdma_version;
- headerp->rm_credit =
- cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests);
- headerp->rm_type = rdma_msg;
- headerp->rm_body.rm_chunks[0] = xdr_zero;
- headerp->rm_body.rm_chunks[1] = xdr_zero;
- headerp->rm_body.rm_chunks[2] = xdr_zero;
-
- if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN,
- &rqst->rq_snd_buf, rpcrdma_noch))
+ __be32 *p;
+
+ rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
+ xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf,
+ req->rl_rdmabuf->rg_base);
+
+ p = xdr_reserve_space(&req->rl_stream, 28);
+ if (unlikely(!p))
+ return -EIO;
+ *p++ = rqst->rq_xid;
+ *p++ = rpcrdma_version;
+ *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests);
+ *p++ = rdma_msg;
+ *p++ = xdr_zero;
+ *p++ = xdr_zero;
+ *p = xdr_zero;
+
+ if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
+ &rqst->rq_snd_buf, rpcrdma_noch))
return -EIO;
return 0;
}
@@ -271,9 +277,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
* @xprt: transport receiving the call
* @rep: receive buffer containing the call
*
- * Called in the RPC reply handler, which runs in a tasklet.
- * Be quick about it.
- *
* Operational assumptions:
* o Backchannel credits are ignored, just as the NFS server
* forechannel currently does
@@ -284,7 +287,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_rep *rep)
{
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
- struct rpcrdma_msg *headerp;
struct svc_serv *bc_serv;
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
@@ -292,24 +294,15 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
size_t size;
__be32 *p;
- headerp = rdmab_to_msg(rep->rr_rdmabuf);
+ p = xdr_inline_decode(&rep->rr_stream, 0);
+ size = xdr_stream_remaining(&rep->rr_stream);
+
#ifdef RPCRDMA_BACKCHANNEL_DEBUG
pr_info("RPC: %s: callback XID %08x, length=%u\n",
- __func__, be32_to_cpu(headerp->rm_xid), rep->rr_len);
- pr_info("RPC: %s: %*ph\n", __func__, rep->rr_len, headerp);
+ __func__, be32_to_cpup(p), size);
+ pr_info("RPC: %s: %*ph\n", __func__, size, p);
#endif
- /* Sanity check:
- * Need at least enough bytes for RPC/RDMA header, as code
- * here references the header fields by array offset. Also,
- * backward calls are always inline, so ensure there
- * are some bytes beyond the RPC/RDMA header.
- */
- if (rep->rr_len < RPCRDMA_HDRLEN_MIN + 24)
- goto out_short;
- p = (__be32 *)((unsigned char *)headerp + RPCRDMA_HDRLEN_MIN);
- size = rep->rr_len - RPCRDMA_HDRLEN_MIN;
-
/* Grab a free bc rqst */
spin_lock(&xprt->bc_pa_lock);
if (list_empty(&xprt->bc_pa_list)) {
@@ -325,7 +318,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
/* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0;
rqst->rq_bytes_sent = 0;
- rqst->rq_xid = headerp->rm_xid;
+ rqst->rq_xid = *p;
rqst->rq_private_buf.len = size;
set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
@@ -337,9 +330,9 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
buf->len = size;
/* The receive buffer has to be hooked to the rpcrdma_req
- * so that it can be reposted after the server is done
- * parsing it but just before sending the backward
- * direction reply.
+ * so that it is not released while the req is pointing
+ * to its buffer, and so that it can be reposted after
+ * the Upper Layer is done decoding it.
*/
req = rpcr_to_rdmar(rqst);
dprintk("RPC: %s: attaching rep %p to req %p\n",
@@ -367,13 +360,4 @@ out_overflow:
* when the connection is re-established.
*/
return;
-
-out_short:
- pr_warn("RPC/RDMA short backward direction call\n");
-
- if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
- xprt_disconnect_done(xprt);
- else
- pr_warn("RPC: %s: reposting rep %p\n",
- __func__, rep);
}
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index d3f84bb1d443..29fc84c7ff98 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -177,7 +178,7 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
/* Use the ib_map_phys_fmr() verb to register a memory region
* for remote access via RDMA READ or RDMA WRITE.
*/
-static int
+static struct rpcrdma_mr_seg *
fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, struct rpcrdma_mw **out)
{
@@ -188,7 +189,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mw = rpcrdma_get_mw(r_xprt);
if (!mw)
- return -ENOBUFS;
+ return ERR_PTR(-ENOBUFS);
pageoff = offset_in_page(seg1->mr_offset);
seg1->mr_offset -= pageoff; /* start of page */
@@ -232,13 +233,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mw->mw_offset = dma_pages[0] + pageoff;
*out = mw;
- return mw->mw_nents;
+ return seg;
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
mw->mw_sg, i);
rpcrdma_put_mw(r_xprt, mw);
- return -EIO;
+ return ERR_PTR(-EIO);
out_maperr:
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
@@ -247,7 +248,7 @@ out_maperr:
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
- return -EIO;
+ return ERR_PTR(-EIO);
}
/* Invalidate all memory regions that were registered for "req".
@@ -305,28 +306,9 @@ out_reset:
}
}
-/* Use a slow, safe mechanism to invalidate all memory regions
- * that were registered for "req".
- */
-static void
-fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
- bool sync)
-{
- struct rpcrdma_mw *mw;
-
- while (!list_empty(&req->rl_registered)) {
- mw = rpcrdma_pop_mw(&req->rl_registered);
- if (sync)
- fmr_op_recover_mr(mw);
- else
- rpcrdma_defer_mr_recovery(mw);
- }
-}
-
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_map = fmr_op_map,
.ro_unmap_sync = fmr_op_unmap_sync,
- .ro_unmap_safe = fmr_op_unmap_safe,
.ro_recover_mr = fmr_op_recover_mr,
.ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 6aea36a38bfd..773e66e10a15 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -344,7 +345,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
/* Post a REG_MR Work Request to register a memory region
* for remote access via RDMA READ or RDMA WRITE.
*/
-static int
+static struct rpcrdma_mr_seg *
frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, struct rpcrdma_mw **out)
{
@@ -364,7 +365,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
rpcrdma_defer_mr_recovery(mw);
mw = rpcrdma_get_mw(r_xprt);
if (!mw)
- return -ENOBUFS;
+ return ERR_PTR(-ENOBUFS);
} while (mw->frmr.fr_state != FRMR_IS_INVALID);
frmr = &mw->frmr;
frmr->fr_state = FRMR_IS_VALID;
@@ -401,7 +402,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
if (unlikely(n != mw->mw_nents))
goto out_mapmr_err;
- dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n",
+ dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n",
__func__, frmr, mw->mw_nents, mr->length);
key = (u8)(mr->rkey & 0x000000FF);
@@ -419,7 +420,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
- rpcrdma_set_signaled(&r_xprt->rx_ep, &reg_wr->wr);
rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
if (rc)
goto out_senderr;
@@ -429,25 +429,25 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mw->mw_offset = mr->iova;
*out = mw;
- return mw->mw_nents;
+ return seg;
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
mw->mw_sg, i);
frmr->fr_state = FRMR_IS_INVALID;
rpcrdma_put_mw(r_xprt, mw);
- return -EIO;
+ return ERR_PTR(-EIO);
out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
frmr->fr_mr, n, mw->mw_nents);
rpcrdma_defer_mr_recovery(mw);
- return -EIO;
+ return ERR_PTR(-EIO);
out_senderr:
pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc);
rpcrdma_defer_mr_recovery(mw);
- return -ENOTCONN;
+ return ERR_PTR(-ENOTCONN);
}
/* Invalidate all memory regions that were registered for "req".
@@ -507,12 +507,6 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
f->fr_cqe.done = frwr_wc_localinv_wake;
reinit_completion(&f->fr_linv_done);
- /* Initialize CQ count, since there is always a signaled
- * WR being posted here. The new cqcount depends on how
- * many SQEs are about to be consumed.
- */
- rpcrdma_init_cqcount(&r_xprt->rx_ep, count);
-
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
* unless ri_id->qp is a valid pointer.
@@ -545,7 +539,6 @@ reset_mrs:
/* Find and reset the MRs in the LOCAL_INV WRs that did not
* get posted.
*/
- rpcrdma_init_cqcount(&r_xprt->rx_ep, -count);
while (bad_wr) {
f = container_of(bad_wr, struct rpcrdma_frmr,
fr_invwr);
@@ -558,28 +551,9 @@ reset_mrs:
goto unmap;
}
-/* Use a slow, safe mechanism to invalidate all memory regions
- * that were registered for "req".
- */
-static void
-frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
- bool sync)
-{
- struct rpcrdma_mw *mw;
-
- while (!list_empty(&req->rl_registered)) {
- mw = rpcrdma_pop_mw(&req->rl_registered);
- if (sync)
- frwr_op_recover_mr(mw);
- else
- rpcrdma_defer_mr_recovery(mw);
- }
-}
-
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
.ro_unmap_sync = frwr_op_unmap_sync,
- .ro_unmap_safe = frwr_op_unmap_safe,
.ro_recover_mr = frwr_op_recover_mr,
.ro_open = frwr_op_open,
.ro_maxpages = frwr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index ca4d6e4528f3..ed34dc0f144c 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -75,11 +76,11 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
/* Maximum Read list size */
maxsegs += 2; /* segment for head and tail buffers */
- size = maxsegs * sizeof(struct rpcrdma_read_chunk);
+ size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
/* Minimal Read chunk size */
size += sizeof(__be32); /* segment count */
- size += sizeof(struct rpcrdma_segment);
+ size += rpcrdma_segment_maxsz * sizeof(__be32);
size += sizeof(__be32); /* list discriminator */
dprintk("RPC: %s: max call header size = %u\n",
@@ -102,7 +103,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
/* Maximum Write list size */
maxsegs += 2; /* segment for head and tail buffers */
size = sizeof(__be32); /* segment count */
- size += maxsegs * sizeof(struct rpcrdma_segment);
+ size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
size += sizeof(__be32); /* list discriminator */
dprintk("RPC: %s: max reply header size = %u\n",
@@ -169,40 +170,41 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
}
-/* Split "vec" on page boundaries into segments. FMR registers pages,
- * not a byte range. Other modes coalesce these segments into a single
- * MR when they can.
+/* Split @vec on page boundaries into SGEs. FMR registers pages, not
+ * a byte range. Other modes coalesce these SGEs into a single MR
+ * when they can.
+ *
+ * Returns pointer to next available SGE, and bumps the total number
+ * of SGEs consumed.
*/
-static int
-rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n)
+static struct rpcrdma_mr_seg *
+rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
+ unsigned int *n)
{
- size_t page_offset;
- u32 remaining;
+ u32 remaining, page_offset;
char *base;
base = vec->iov_base;
page_offset = offset_in_page(base);
remaining = vec->iov_len;
- while (remaining && n < RPCRDMA_MAX_SEGS) {
- seg[n].mr_page = NULL;
- seg[n].mr_offset = base;
- seg[n].mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
- remaining -= seg[n].mr_len;
- base += seg[n].mr_len;
- ++n;
+ while (remaining) {
+ seg->mr_page = NULL;
+ seg->mr_offset = base;
+ seg->mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
+ remaining -= seg->mr_len;
+ base += seg->mr_len;
+ ++seg;
+ ++(*n);
page_offset = 0;
}
- return n;
+ return seg;
}
-/*
- * Chunk assembly from upper layer xdr_buf.
+/* Convert @xdrbuf into SGEs no larger than a page each. As they
+ * are registered, these SGEs are then coalesced into RDMA segments
+ * when the selected memreg mode supports it.
*
- * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk
- * elements. Segments are then coalesced when registered, if possible
- * within the selected memreg mode.
- *
- * Returns positive number of segments converted, or a negative errno.
+ * Returns positive number of SGEs consumed, or a negative errno.
*/
static int
@@ -210,47 +212,41 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
unsigned int pos, enum rpcrdma_chunktype type,
struct rpcrdma_mr_seg *seg)
{
- int len, n, p, page_base;
+ unsigned long page_base;
+ unsigned int len, n;
struct page **ppages;
n = 0;
- if (pos == 0) {
- n = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, n);
- if (n == RPCRDMA_MAX_SEGS)
- goto out_overflow;
- }
+ if (pos == 0)
+ seg = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, &n);
len = xdrbuf->page_len;
ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
page_base = offset_in_page(xdrbuf->page_base);
- p = 0;
- while (len && n < RPCRDMA_MAX_SEGS) {
- if (!ppages[p]) {
- /* alloc the pagelist for receiving buffer */
- ppages[p] = alloc_page(GFP_ATOMIC);
- if (!ppages[p])
+ while (len) {
+ if (unlikely(!*ppages)) {
+ /* XXX: Certain upper layer operations do
+ * not provide receive buffer pages.
+ */
+ *ppages = alloc_page(GFP_ATOMIC);
+ if (!*ppages)
return -EAGAIN;
}
- seg[n].mr_page = ppages[p];
- seg[n].mr_offset = (void *)(unsigned long) page_base;
- seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
- if (seg[n].mr_len > PAGE_SIZE)
- goto out_overflow;
- len -= seg[n].mr_len;
+ seg->mr_page = *ppages;
+ seg->mr_offset = (char *)page_base;
+ seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
+ len -= seg->mr_len;
+ ++ppages;
+ ++seg;
++n;
- ++p;
- page_base = 0; /* page offset only applies to first page */
+ page_base = 0;
}
- /* Message overflows the seg array */
- if (len && n == RPCRDMA_MAX_SEGS)
- goto out_overflow;
-
/* When encoding a Read chunk, the tail iovec contains an
* XDR pad and may be omitted.
*/
if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
- return n;
+ goto out;
/* When encoding a Write chunk, some servers need to see an
* extra segment for non-XDR-aligned Write chunks. The upper
@@ -258,30 +254,81 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
* for this purpose.
*/
if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
- return n;
+ goto out;
- if (xdrbuf->tail[0].iov_len) {
- n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n);
- if (n == RPCRDMA_MAX_SEGS)
- goto out_overflow;
- }
+ if (xdrbuf->tail[0].iov_len)
+ seg = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
+out:
+ if (unlikely(n > RPCRDMA_MAX_SEGS))
+ return -EIO;
return n;
+}
+
+static inline int
+encode_item_present(struct xdr_stream *xdr)
+{
+ __be32 *p;
-out_overflow:
- pr_err("rpcrdma: segment array overflow\n");
- return -EIO;
+ p = xdr_reserve_space(xdr, sizeof(*p));
+ if (unlikely(!p))
+ return -EMSGSIZE;
+
+ *p = xdr_one;
+ return 0;
}
-static inline __be32 *
+static inline int
+encode_item_not_present(struct xdr_stream *xdr)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, sizeof(*p));
+ if (unlikely(!p))
+ return -EMSGSIZE;
+
+ *p = xdr_zero;
+ return 0;
+}
+
+static void
xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw)
{
*iptr++ = cpu_to_be32(mw->mw_handle);
*iptr++ = cpu_to_be32(mw->mw_length);
- return xdr_encode_hyper(iptr, mw->mw_offset);
+ xdr_encode_hyper(iptr, mw->mw_offset);
+}
+
+static int
+encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4 * sizeof(*p));
+ if (unlikely(!p))
+ return -EMSGSIZE;
+
+ xdr_encode_rdma_segment(p, mw);
+ return 0;
}
-/* XDR-encode the Read list. Supports encoding a list of read
+static int
+encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
+ u32 position)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 6 * sizeof(*p));
+ if (unlikely(!p))
+ return -EMSGSIZE;
+
+ *p++ = xdr_one; /* Item present */
+ *p++ = cpu_to_be32(position);
+ xdr_encode_rdma_segment(p, mw);
+ return 0;
+}
+
+/* Register and XDR encode the Read list. Supports encoding a list of read
* segments that belong to a single read chunk.
*
* Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
@@ -290,23 +337,20 @@ xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw)
* N elements, position P (same P for all chunks of same arg!):
* 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
*
- * Returns a pointer to the XDR word in the RDMA header following
- * the end of the Read list, or an error pointer.
+ * Returns zero on success, or a negative errno if a failure occurred.
+ * @xdr is advanced to the next position in the stream.
+ *
+ * Only a single @pos value is currently supported.
*/
-static __be32 *
-rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
- struct rpcrdma_req *req, struct rpc_rqst *rqst,
- __be32 *iptr, enum rpcrdma_chunktype rtype)
+static noinline int
+rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+ struct rpc_rqst *rqst, enum rpcrdma_chunktype rtype)
{
+ struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
struct rpcrdma_mw *mw;
unsigned int pos;
- int n, nsegs;
-
- if (rtype == rpcrdma_noch) {
- *iptr++ = xdr_zero; /* item not present */
- return iptr;
- }
+ int nsegs;
pos = rqst->rq_snd_buf.head[0].iov_len;
if (rtype == rpcrdma_areadch)
@@ -315,40 +359,33 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
rtype, seg);
if (nsegs < 0)
- return ERR_PTR(nsegs);
+ return nsegs;
do {
- n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- false, &mw);
- if (n < 0)
- return ERR_PTR(n);
+ seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
+ false, &mw);
+ if (IS_ERR(seg))
+ return PTR_ERR(seg);
rpcrdma_push_mw(mw, &req->rl_registered);
- *iptr++ = xdr_one; /* item present */
-
- /* All read segments in this chunk
- * have the same "position".
- */
- *iptr++ = cpu_to_be32(pos);
- iptr = xdr_encode_rdma_segment(iptr, mw);
+ if (encode_read_segment(xdr, mw, pos) < 0)
+ return -EMSGSIZE;
dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n",
rqst->rq_task->tk_pid, __func__, pos,
mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, n < nsegs ? "more" : "last");
+ mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
r_xprt->rx_stats.read_chunk_count++;
- seg += n;
- nsegs -= n;
+ nsegs -= mw->mw_nents;
} while (nsegs);
- /* Finish Read list */
- *iptr++ = xdr_zero; /* Next item not present */
- return iptr;
+ return 0;
}
-/* XDR-encode the Write list. Supports encoding a list containing
- * one array of plain segments that belong to a single write chunk.
+/* Register and XDR encode the Write list. Supports encoding a list
+ * containing one array of plain segments that belong to a single
+ * write chunk.
*
* Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
*
@@ -356,66 +393,65 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
* N elements:
* 1 - N - HLOO - HLOO - ... - HLOO - 0
*
- * Returns a pointer to the XDR word in the RDMA header following
- * the end of the Write list, or an error pointer.
+ * Returns zero on success, or a negative errno if a failure occurred.
+ * @xdr is advanced to the next position in the stream.
+ *
+ * Only a single Write chunk is currently supported.
*/
-static __be32 *
+static noinline int
rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
- struct rpc_rqst *rqst, __be32 *iptr,
- enum rpcrdma_chunktype wtype)
+ struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype)
{
+ struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
struct rpcrdma_mw *mw;
- int n, nsegs, nchunks;
+ int nsegs, nchunks;
__be32 *segcount;
- if (wtype != rpcrdma_writech) {
- *iptr++ = xdr_zero; /* no Write list present */
- return iptr;
- }
-
seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
rqst->rq_rcv_buf.head[0].iov_len,
wtype, seg);
if (nsegs < 0)
- return ERR_PTR(nsegs);
+ return nsegs;
- *iptr++ = xdr_one; /* Write list present */
- segcount = iptr++; /* save location of segment count */
+ if (encode_item_present(xdr) < 0)
+ return -EMSGSIZE;
+ segcount = xdr_reserve_space(xdr, sizeof(*segcount));
+ if (unlikely(!segcount))
+ return -EMSGSIZE;
+ /* Actual value encoded below */
nchunks = 0;
do {
- n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- true, &mw);
- if (n < 0)
- return ERR_PTR(n);
+ seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
+ true, &mw);
+ if (IS_ERR(seg))
+ return PTR_ERR(seg);
rpcrdma_push_mw(mw, &req->rl_registered);
- iptr = xdr_encode_rdma_segment(iptr, mw);
+ if (encode_rdma_segment(xdr, mw) < 0)
+ return -EMSGSIZE;
dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n",
rqst->rq_task->tk_pid, __func__,
mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, n < nsegs ? "more" : "last");
+ mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
r_xprt->rx_stats.write_chunk_count++;
r_xprt->rx_stats.total_rdma_request += seg->mr_len;
nchunks++;
- seg += n;
- nsegs -= n;
+ nsegs -= mw->mw_nents;
} while (nsegs);
/* Update count of segments in this Write chunk */
*segcount = cpu_to_be32(nchunks);
- /* Finish Write list */
- *iptr++ = xdr_zero; /* Next item not present */
- return iptr;
+ return 0;
}
-/* XDR-encode the Reply chunk. Supports encoding an array of plain
- * segments that belong to a single write (reply) chunk.
+/* Register and XDR encode the Reply chunk. Supports encoding an array
+ * of plain segments that belong to a single write (reply) chunk.
*
* Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
*
@@ -423,81 +459,113 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
* N elements:
* 1 - N - HLOO - HLOO - ... - HLOO
*
- * Returns a pointer to the XDR word in the RDMA header following
- * the end of the Reply chunk, or an error pointer.
+ * Returns zero on success, or a negative errno if a failure occurred.
+ * @xdr is advanced to the next position in the stream.
*/
-static __be32 *
-rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
- struct rpcrdma_req *req, struct rpc_rqst *rqst,
- __be32 *iptr, enum rpcrdma_chunktype wtype)
+static noinline int
+rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+ struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype)
{
+ struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
struct rpcrdma_mw *mw;
- int n, nsegs, nchunks;
+ int nsegs, nchunks;
__be32 *segcount;
- if (wtype != rpcrdma_replych) {
- *iptr++ = xdr_zero; /* no Reply chunk present */
- return iptr;
- }
-
seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
if (nsegs < 0)
- return ERR_PTR(nsegs);
+ return nsegs;
- *iptr++ = xdr_one; /* Reply chunk present */
- segcount = iptr++; /* save location of segment count */
+ if (encode_item_present(xdr) < 0)
+ return -EMSGSIZE;
+ segcount = xdr_reserve_space(xdr, sizeof(*segcount));
+ if (unlikely(!segcount))
+ return -EMSGSIZE;
+ /* Actual value encoded below */
nchunks = 0;
do {
- n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- true, &mw);
- if (n < 0)
- return ERR_PTR(n);
+ seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
+ true, &mw);
+ if (IS_ERR(seg))
+ return PTR_ERR(seg);
rpcrdma_push_mw(mw, &req->rl_registered);
- iptr = xdr_encode_rdma_segment(iptr, mw);
+ if (encode_rdma_segment(xdr, mw) < 0)
+ return -EMSGSIZE;
dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n",
rqst->rq_task->tk_pid, __func__,
mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, n < nsegs ? "more" : "last");
+ mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
r_xprt->rx_stats.reply_chunk_count++;
r_xprt->rx_stats.total_rdma_request += seg->mr_len;
nchunks++;
- seg += n;
- nsegs -= n;
+ nsegs -= mw->mw_nents;
} while (nsegs);
/* Update count of segments in the Reply chunk */
*segcount = cpu_to_be32(nchunks);
- return iptr;
+ return 0;
+}
+
+/**
+ * rpcrdma_unmap_sendctx - DMA-unmap Send buffers
+ * @sc: sendctx containing SGEs to unmap
+ *
+ */
+void
+rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
+{
+ struct rpcrdma_ia *ia = &sc->sc_xprt->rx_ia;
+ struct ib_sge *sge;
+ unsigned int count;
+
+ dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
+ __func__, sc->sc_unmap_count, sc);
+
+ /* The first two SGEs contain the transport header and
+ * the inline buffer. These are always left mapped so
+ * they can be cheaply re-used.
+ */
+ sge = &sc->sc_sges[2];
+ for (count = sc->sc_unmap_count; count; ++sge, --count)
+ ib_dma_unmap_page(ia->ri_device,
+ sge->addr, sge->length, DMA_TO_DEVICE);
+
+ if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) {
+ smp_mb__after_atomic();
+ wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
+ }
}
-/* Prepare the RPC-over-RDMA header SGE.
+/* Prepare an SGE for the RPC-over-RDMA transport header.
*/
static bool
rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
u32 len)
{
+ struct rpcrdma_sendctx *sc = req->rl_sendctx;
struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
- struct ib_sge *sge = &req->rl_send_sge[0];
+ struct ib_sge *sge = sc->sc_sges;
- if (unlikely(!rpcrdma_regbuf_is_mapped(rb))) {
- if (!__rpcrdma_dma_map_regbuf(ia, rb))
- return false;
- sge->addr = rdmab_addr(rb);
- sge->lkey = rdmab_lkey(rb);
- }
+ if (!rpcrdma_dma_map_regbuf(ia, rb))
+ goto out_regbuf;
+ sge->addr = rdmab_addr(rb);
sge->length = len;
+ sge->lkey = rdmab_lkey(rb);
ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
sge->length, DMA_TO_DEVICE);
- req->rl_send_wr.num_sge++;
+ sc->sc_wr.num_sge++;
return true;
+
+out_regbuf:
+ pr_err("rpcrdma: failed to DMA map a Send buffer\n");
+ return false;
}
/* Prepare the Send SGEs. The head and tail iovec, and each entry
@@ -507,10 +575,11 @@ static bool
rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
{
+ struct rpcrdma_sendctx *sc = req->rl_sendctx;
unsigned int sge_no, page_base, len, remaining;
struct rpcrdma_regbuf *rb = req->rl_sendbuf;
struct ib_device *device = ia->ri_device;
- struct ib_sge *sge = req->rl_send_sge;
+ struct ib_sge *sge = sc->sc_sges;
u32 lkey = ia->ri_pd->local_dma_lkey;
struct page *page, **ppages;
@@ -518,7 +587,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
* DMA-mapped. Sync the content that has changed.
*/
if (!rpcrdma_dma_map_regbuf(ia, rb))
- return false;
+ goto out_regbuf;
sge_no = 1;
sge[sge_no].addr = rdmab_addr(rb);
sge[sge_no].length = xdr->head[0].iov_len;
@@ -573,7 +642,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
sge[sge_no].length = len;
sge[sge_no].lkey = lkey;
- req->rl_mapped_sges++;
+ sc->sc_unmap_count++;
ppages++;
remaining -= len;
page_base = 0;
@@ -599,89 +668,109 @@ map_tail:
goto out_mapping_err;
sge[sge_no].length = len;
sge[sge_no].lkey = lkey;
- req->rl_mapped_sges++;
+ sc->sc_unmap_count++;
}
out:
- req->rl_send_wr.num_sge = sge_no + 1;
+ sc->sc_wr.num_sge += sge_no;
+ if (sc->sc_unmap_count)
+ __set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
return true;
+out_regbuf:
+ pr_err("rpcrdma: failed to DMA map a Send buffer\n");
+ return false;
+
out_mapping_overflow:
+ rpcrdma_unmap_sendctx(sc);
pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
return false;
out_mapping_err:
+ rpcrdma_unmap_sendctx(sc);
pr_err("rpcrdma: Send mapping error\n");
return false;
}
-bool
-rpcrdma_prepare_send_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
- u32 hdrlen, struct xdr_buf *xdr,
- enum rpcrdma_chunktype rtype)
+/**
+ * rpcrdma_prepare_send_sges - Construct SGEs for a Send WR
+ * @r_xprt: controlling transport
+ * @req: context of RPC Call being marshalled
+ * @hdrlen: size of transport header, in bytes
+ * @xdr: xdr_buf containing RPC Call
+ * @rtype: chunk type being encoded
+ *
+ * Returns 0 on success; otherwise a negative errno is returned.
+ */
+int
+rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_req *req, u32 hdrlen,
+ struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
{
- req->rl_send_wr.num_sge = 0;
- req->rl_mapped_sges = 0;
+ req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
+ if (!req->rl_sendctx)
+ return -ENOBUFS;
+ req->rl_sendctx->sc_wr.num_sge = 0;
+ req->rl_sendctx->sc_unmap_count = 0;
+ req->rl_sendctx->sc_req = req;
+ __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
- if (!rpcrdma_prepare_hdr_sge(ia, req, hdrlen))
- goto out_map;
+ if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen))
+ return -EIO;
if (rtype != rpcrdma_areadch)
- if (!rpcrdma_prepare_msg_sges(ia, req, xdr, rtype))
- goto out_map;
-
- return true;
-
-out_map:
- pr_err("rpcrdma: failed to DMA map a Send buffer\n");
- return false;
-}
-
-void
-rpcrdma_unmap_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
-{
- struct ib_device *device = ia->ri_device;
- struct ib_sge *sge;
- int count;
+ if (!rpcrdma_prepare_msg_sges(&r_xprt->rx_ia, req, xdr, rtype))
+ return -EIO;
- sge = &req->rl_send_sge[2];
- for (count = req->rl_mapped_sges; count--; sge++)
- ib_dma_unmap_page(device, sge->addr, sge->length,
- DMA_TO_DEVICE);
- req->rl_mapped_sges = 0;
+ return 0;
}
-/*
- * Marshal a request: the primary job of this routine is to choose
- * the transfer modes. See comments below.
+/**
+ * rpcrdma_marshal_req - Marshal and send one RPC request
+ * @r_xprt: controlling transport
+ * @rqst: RPC request to be marshaled
+ *
+ * For the RPC in "rqst", this function:
+ * - Chooses the transfer mode (eg., RDMA_MSG or RDMA_NOMSG)
+ * - Registers Read, Write, and Reply chunks
+ * - Constructs the transport header
+ * - Posts a Send WR to send the transport header and request
*
- * Returns zero on success, otherwise a negative errno.
+ * Returns:
+ * %0 if the RPC was sent successfully,
+ * %-ENOTCONN if the connection was lost,
+ * %-EAGAIN if not enough pages are available for on-demand reply buffer,
+ * %-ENOBUFS if no MRs are available to register chunks,
+ * %-EMSGSIZE if the transport header is too small,
+ * %-EIO if a permanent problem occurred while marshaling.
*/
-
int
-rpcrdma_marshal_req(struct rpc_rqst *rqst)
+rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
{
- struct rpc_xprt *xprt = rqst->rq_xprt;
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ struct xdr_stream *xdr = &req->rl_stream;
enum rpcrdma_chunktype rtype, wtype;
- struct rpcrdma_msg *headerp;
bool ddp_allowed;
- ssize_t hdrlen;
- size_t rpclen;
- __be32 *iptr;
+ __be32 *p;
+ int ret;
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
return rpcrdma_bc_marshal_reply(rqst);
#endif
- headerp = rdmab_to_msg(req->rl_rdmabuf);
- /* don't byte-swap XID, it's already done in request */
- headerp->rm_xid = rqst->rq_xid;
- headerp->rm_vers = rpcrdma_version;
- headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_max_requests);
- headerp->rm_type = rdma_msg;
+ rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
+ xdr_init_encode(xdr, &req->rl_hdrbuf,
+ req->rl_rdmabuf->rg_base);
+
+ /* Fixed header fields */
+ ret = -EMSGSIZE;
+ p = xdr_reserve_space(xdr, 4 * sizeof(*p));
+ if (!p)
+ goto out_err;
+ *p++ = rqst->rq_xid;
+ *p++ = rpcrdma_version;
+ *p++ = cpu_to_be32(r_xprt->rx_buf.rb_max_requests);
/* When the ULP employs a GSS flavor that guarantees integrity
* or privacy, direct data placement of individual data items
@@ -721,22 +810,17 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* by themselves are larger than the inline threshold.
*/
if (rpcrdma_args_inline(r_xprt, rqst)) {
+ *p++ = rdma_msg;
rtype = rpcrdma_noch;
- rpclen = rqst->rq_snd_buf.len;
} else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) {
+ *p++ = rdma_msg;
rtype = rpcrdma_readch;
- rpclen = rqst->rq_snd_buf.head[0].iov_len +
- rqst->rq_snd_buf.tail[0].iov_len;
} else {
r_xprt->rx_stats.nomsg_call_count++;
- headerp->rm_type = htonl(RDMA_NOMSG);
+ *p++ = rdma_nomsg;
rtype = rpcrdma_areadch;
- rpclen = 0;
}
- req->rl_xid = rqst->rq_xid;
- rpcrdma_insert_req(&r_xprt->rx_buf, req);
-
/* This implementation supports the following combinations
* of chunk lists in one RPC-over-RDMA Call message:
*
@@ -759,79 +843,48 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* send a Call message with a Position Zero Read chunk and a
* regular Read chunk at the same time.
*/
- iptr = headerp->rm_body.rm_chunks;
- iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype);
- if (IS_ERR(iptr))
+ if (rtype != rpcrdma_noch) {
+ ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
+ if (ret)
+ goto out_err;
+ }
+ ret = encode_item_not_present(xdr);
+ if (ret)
goto out_err;
- iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype);
- if (IS_ERR(iptr))
+
+ if (wtype == rpcrdma_writech) {
+ ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
+ if (ret)
+ goto out_err;
+ }
+ ret = encode_item_not_present(xdr);
+ if (ret)
goto out_err;
- iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype);
- if (IS_ERR(iptr))
+
+ if (wtype != rpcrdma_replych)
+ ret = encode_item_not_present(xdr);
+ else
+ ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
+ if (ret)
goto out_err;
- hdrlen = (unsigned char *)iptr - (unsigned char *)headerp;
- dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n",
+ dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n",
rqst->rq_task->tk_pid, __func__,
transfertypes[rtype], transfertypes[wtype],
- hdrlen, rpclen);
+ xdr_stream_pos(xdr));
- if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen,
- &rqst->rq_snd_buf, rtype)) {
- iptr = ERR_PTR(-EIO);
+ ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
+ &rqst->rq_snd_buf, rtype);
+ if (ret)
goto out_err;
- }
return 0;
out_err:
- if (PTR_ERR(iptr) != -ENOBUFS) {
- pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
- PTR_ERR(iptr));
+ if (ret != -ENOBUFS) {
+ pr_err("rpcrdma: header marshaling failed (%d)\n", ret);
r_xprt->rx_stats.failed_marshal_count++;
}
- return PTR_ERR(iptr);
-}
-
-/*
- * Chase down a received write or reply chunklist to get length
- * RDMA'd by server. See map at rpcrdma_create_chunks()! :-)
- */
-static int
-rpcrdma_count_chunks(struct rpcrdma_rep *rep, int wrchunk, __be32 **iptrp)
-{
- unsigned int i, total_len;
- struct rpcrdma_write_chunk *cur_wchunk;
- char *base = (char *)rdmab_to_msg(rep->rr_rdmabuf);
-
- i = be32_to_cpu(**iptrp);
- cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1);
- total_len = 0;
- while (i--) {
- struct rpcrdma_segment *seg = &cur_wchunk->wc_target;
- ifdebug(FACILITY) {
- u64 off;
- xdr_decode_hyper((__be32 *)&seg->rs_offset, &off);
- dprintk("RPC: %s: chunk %d@0x%016llx:0x%08x\n",
- __func__,
- be32_to_cpu(seg->rs_length),
- (unsigned long long)off,
- be32_to_cpu(seg->rs_handle));
- }
- total_len += be32_to_cpu(seg->rs_length);
- ++cur_wchunk;
- }
- /* check and adjust for properly terminated write chunk */
- if (wrchunk) {
- __be32 *w = (__be32 *) cur_wchunk;
- if (*w++ != xdr_zero)
- return -1;
- cur_wchunk = (struct rpcrdma_write_chunk *) w;
- }
- if ((char *)cur_wchunk > base + rep->rr_len)
- return -1;
-
- *iptrp = (__be32 *) cur_wchunk;
- return total_len;
+ return ret;
}
/**
@@ -949,196 +1002,417 @@ rpcrdma_mark_remote_invalidation(struct list_head *mws,
}
}
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/* By convention, backchannel calls arrive via rdma_msg type
* messages, and never populate the chunk lists. This makes
* the RPC/RDMA header small and fixed in size, so it is
* straightforward to check the RPC header's direction field.
*/
static bool
-rpcrdma_is_bcall(struct rpcrdma_msg *headerp)
+rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
{
- __be32 *p = (__be32 *)headerp;
+ struct xdr_stream *xdr = &rep->rr_stream;
+ __be32 *p;
- if (headerp->rm_type != rdma_msg)
+ if (rep->rr_proc != rdma_msg)
return false;
- if (headerp->rm_body.rm_chunks[0] != xdr_zero)
+
+ /* Peek at stream contents without advancing. */
+ p = xdr_inline_decode(xdr, 0);
+
+ /* Chunk lists */
+ if (*p++ != xdr_zero)
return false;
- if (headerp->rm_body.rm_chunks[1] != xdr_zero)
+ if (*p++ != xdr_zero)
return false;
- if (headerp->rm_body.rm_chunks[2] != xdr_zero)
+ if (*p++ != xdr_zero)
return false;
- /* sanity */
- if (p[7] != headerp->rm_xid)
+ /* RPC header */
+ if (*p++ != rep->rr_xid)
return false;
- /* call direction */
- if (p[8] != cpu_to_be32(RPC_CALL))
+ if (*p != cpu_to_be32(RPC_CALL))
return false;
+ /* Now that we are sure this is a backchannel call,
+ * advance to the RPC header.
+ */
+ p = xdr_inline_decode(xdr, 3 * sizeof(*p));
+ if (unlikely(!p))
+ goto out_short;
+
+ rpcrdma_bc_receive_call(r_xprt, rep);
+ return true;
+
+out_short:
+ pr_warn("RPC/RDMA short backward direction call\n");
+ if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
+ xprt_disconnect_done(&r_xprt->rx_xprt);
return true;
}
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
+{
+ return false;
+}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4 * sizeof(*p));
+ if (unlikely(!p))
+ return -EIO;
+
+ ifdebug(FACILITY) {
+ u64 offset;
+ u32 handle;
+
+ handle = be32_to_cpup(p++);
+ *length = be32_to_cpup(p++);
+ xdr_decode_hyper(p, &offset);
+ dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n",
+ __func__, *length, (unsigned long long)offset,
+ handle);
+ } else {
+ *length = be32_to_cpup(p + 1);
+ }
+
+ return 0;
+}
+
+static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
+{
+ u32 segcount, seglength;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, sizeof(*p));
+ if (unlikely(!p))
+ return -EIO;
+
+ *length = 0;
+ segcount = be32_to_cpup(p);
+ while (segcount--) {
+ if (decode_rdma_segment(xdr, &seglength))
+ return -EIO;
+ *length += seglength;
+ }
+
+ dprintk("RPC: %s: segcount=%u, %u bytes\n",
+ __func__, be32_to_cpup(p), *length);
+ return 0;
+}
+
+/* In RPC-over-RDMA Version One replies, a Read list is never
+ * expected. This decoder is a stub that returns an error if
+ * a Read list is present.
+ */
+static int decode_read_list(struct xdr_stream *xdr)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, sizeof(*p));
+ if (unlikely(!p))
+ return -EIO;
+ if (unlikely(*p != xdr_zero))
+ return -EIO;
+ return 0;
+}
+
+/* Supports only one Write chunk in the Write list
+ */
+static int decode_write_list(struct xdr_stream *xdr, u32 *length)
+{
+ u32 chunklen;
+ bool first;
+ __be32 *p;
+
+ *length = 0;
+ first = true;
+ do {
+ p = xdr_inline_decode(xdr, sizeof(*p));
+ if (unlikely(!p))
+ return -EIO;
+ if (*p == xdr_zero)
+ break;
+ if (!first)
+ return -EIO;
+
+ if (decode_write_chunk(xdr, &chunklen))
+ return -EIO;
+ *length += chunklen;
+ first = false;
+ } while (true);
+ return 0;
+}
+
+static int decode_reply_chunk(struct xdr_stream *xdr, u32 *length)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, sizeof(*p));
+ if (unlikely(!p))
+ return -EIO;
+
+ *length = 0;
+ if (*p != xdr_zero)
+ if (decode_write_chunk(xdr, length))
+ return -EIO;
+ return 0;
+}
+
+static int
+rpcrdma_decode_msg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
+ struct rpc_rqst *rqst)
+{
+ struct xdr_stream *xdr = &rep->rr_stream;
+ u32 writelist, replychunk, rpclen;
+ char *base;
+
+ /* Decode the chunk lists */
+ if (decode_read_list(xdr))
+ return -EIO;
+ if (decode_write_list(xdr, &writelist))
+ return -EIO;
+ if (decode_reply_chunk(xdr, &replychunk))
+ return -EIO;
+
+ /* RDMA_MSG sanity checks */
+ if (unlikely(replychunk))
+ return -EIO;
+
+ /* Build the RPC reply's Payload stream in rqst->rq_rcv_buf */
+ base = (char *)xdr_inline_decode(xdr, 0);
+ rpclen = xdr_stream_remaining(xdr);
+ r_xprt->rx_stats.fixup_copy_count +=
+ rpcrdma_inline_fixup(rqst, base, rpclen, writelist & 3);
+
+ r_xprt->rx_stats.total_rdma_reply += writelist;
+ return rpclen + xdr_align_size(writelist);
+}
+
+static noinline int
+rpcrdma_decode_nomsg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
+{
+ struct xdr_stream *xdr = &rep->rr_stream;
+ u32 writelist, replychunk;
+
+ /* Decode the chunk lists */
+ if (decode_read_list(xdr))
+ return -EIO;
+ if (decode_write_list(xdr, &writelist))
+ return -EIO;
+ if (decode_reply_chunk(xdr, &replychunk))
+ return -EIO;
+
+ /* RDMA_NOMSG sanity checks */
+ if (unlikely(writelist))
+ return -EIO;
+ if (unlikely(!replychunk))
+ return -EIO;
+
+ /* Reply chunk buffer already is the reply vector */
+ r_xprt->rx_stats.total_rdma_reply += replychunk;
+ return replychunk;
+}
+
+static noinline int
+rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
+ struct rpc_rqst *rqst)
+{
+ struct xdr_stream *xdr = &rep->rr_stream;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, sizeof(*p));
+ if (unlikely(!p))
+ return -EIO;
+
+ switch (*p) {
+ case err_vers:
+ p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+ if (!p)
+ break;
+ dprintk("RPC: %5u: %s: server reports version error (%u-%u)\n",
+ rqst->rq_task->tk_pid, __func__,
+ be32_to_cpup(p), be32_to_cpu(*(p + 1)));
+ break;
+ case err_chunk:
+ dprintk("RPC: %5u: %s: server reports header decoding error\n",
+ rqst->rq_task->tk_pid, __func__);
+ break;
+ default:
+ dprintk("RPC: %5u: %s: server reports unrecognized error %d\n",
+ rqst->rq_task->tk_pid, __func__, be32_to_cpup(p));
+ }
+
+ r_xprt->rx_stats.bad_reply_count++;
+ return -EREMOTEIO;
+}
+
+/* Perform XID lookup, reconstruction of the RPC reply, and
+ * RPC completion while holding the transport lock to ensure
+ * the rep, rqst, and rq_task pointers remain stable.
+ */
+void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
+{
+ struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+ struct rpc_rqst *rqst = rep->rr_rqst;
+ unsigned long cwnd;
+ int status;
+
+ xprt->reestablish_timeout = 0;
+
+ switch (rep->rr_proc) {
+ case rdma_msg:
+ status = rpcrdma_decode_msg(r_xprt, rep, rqst);
+ break;
+ case rdma_nomsg:
+ status = rpcrdma_decode_nomsg(r_xprt, rep);
+ break;
+ case rdma_error:
+ status = rpcrdma_decode_error(r_xprt, rep, rqst);
+ break;
+ default:
+ status = -EIO;
+ }
+ if (status < 0)
+ goto out_badheader;
+
+out:
+ spin_lock(&xprt->recv_lock);
+ cwnd = xprt->cwnd;
+ xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT;
+ if (xprt->cwnd > cwnd)
+ xprt_release_rqst_cong(rqst->rq_task);
+
+ xprt_complete_rqst(rqst->rq_task, status);
+ xprt_unpin_rqst(rqst);
+ spin_unlock(&xprt->recv_lock);
+ return;
+
+/* If the incoming reply terminated a pending RPC, the next
+ * RPC call will post a replacement receive buffer as it is
+ * being marshaled.
+ */
+out_badheader:
+ dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
+ rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
+ r_xprt->rx_stats.bad_reply_count++;
+ status = -EIO;
+ goto out;
+}
+
+void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+{
+ /* Invalidate and unmap the data payloads before waking
+ * the waiting application. This guarantees the memory
+ * regions are properly fenced from the server before the
+ * application accesses the data. It also ensures proper
+ * send flow control: waking the next RPC waits until this
+ * RPC has relinquished all its Send Queue entries.
+ */
+ if (!list_empty(&req->rl_registered))
+ r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
+ &req->rl_registered);
+
+ /* Ensure that any DMA mapped pages associated with
+ * the Send of the RPC Call have been unmapped before
+ * allowing the RPC to complete. This protects argument
+ * memory not controlled by the RPC client from being
+ * re-used before we're done with it.
+ */
+ if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
+ r_xprt->rx_stats.reply_waits_for_send++;
+ out_of_line_wait_on_bit(&req->rl_flags,
+ RPCRDMA_REQ_F_TX_RESOURCES,
+ bit_wait,
+ TASK_UNINTERRUPTIBLE);
+ }
+}
+
+/* Reply handling runs in the poll worker thread. Anything that
+ * might wait is deferred to a separate workqueue.
+ */
+void rpcrdma_deferred_completion(struct work_struct *work)
+{
+ struct rpcrdma_rep *rep =
+ container_of(work, struct rpcrdma_rep, rr_work);
+ struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
+
+ rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
+ rpcrdma_release_rqst(rep->rr_rxprt, req);
+ rpcrdma_complete_rqst(rep);
+}
+
/* Process received RPC/RDMA messages.
*
* Errors must result in the RPC task either being awakened, or
* allowed to timeout, to discover the errors at that time.
*/
-void
-rpcrdma_reply_handler(struct work_struct *work)
+void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
{
- struct rpcrdma_rep *rep =
- container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
- struct rpcrdma_msg *headerp;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
- __be32 *iptr;
- int rdmalen, status, rmerr;
- unsigned long cwnd;
- struct list_head mws;
+ u32 credits;
+ __be32 *p;
dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
- if (rep->rr_len == RPCRDMA_BAD_LEN)
+ if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus;
- if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
+
+ xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
+ rep->rr_hdrbuf.head[0].iov_base);
+
+ /* Fixed transport header fields */
+ p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
+ if (unlikely(!p))
goto out_shortreply;
+ rep->rr_xid = *p++;
+ rep->rr_vers = *p++;
+ credits = be32_to_cpu(*p++);
+ rep->rr_proc = *p++;
- headerp = rdmab_to_msg(rep->rr_rdmabuf);
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
- if (rpcrdma_is_bcall(headerp))
- goto out_bcall;
-#endif
+ if (rep->rr_vers != rpcrdma_version)
+ goto out_badversion;
+
+ if (rpcrdma_is_bcall(r_xprt, rep))
+ return;
/* Match incoming rpcrdma_rep to an rpcrdma_req to
* get context for handling any incoming chunks.
*/
- spin_lock(&buf->rb_lock);
- req = rpcrdma_lookup_req_locked(&r_xprt->rx_buf,
- headerp->rm_xid);
- if (!req)
- goto out_nomatch;
- if (req->rl_reply)
- goto out_duplicate;
-
- list_replace_init(&req->rl_registered, &mws);
- rpcrdma_mark_remote_invalidation(&mws, rep);
-
- /* Avoid races with signals and duplicate replies
- * by marking this req as matched.
- */
- req->rl_reply = rep;
- spin_unlock(&buf->rb_lock);
-
- dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
- __func__, rep, req, be32_to_cpu(headerp->rm_xid));
-
- /* Invalidate and unmap the data payloads before waking the
- * waiting application. This guarantees the memory regions
- * are properly fenced from the server before the application
- * accesses the data. It also ensures proper send flow control:
- * waking the next RPC waits until this RPC has relinquished
- * all its Send Queue entries.
- */
- if (!list_empty(&mws))
- r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, &mws);
-
- /* Perform XID lookup, reconstruction of the RPC reply, and
- * RPC completion while holding the transport lock to ensure
- * the rep, rqst, and rq_task pointers remain stable.
- */
- spin_lock_bh(&xprt->transport_lock);
- rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
+ spin_lock(&xprt->recv_lock);
+ rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
if (!rqst)
goto out_norqst;
- xprt->reestablish_timeout = 0;
- if (headerp->rm_vers != rpcrdma_version)
- goto out_badversion;
-
- /* check for expected message types */
- /* The order of some of these tests is important. */
- switch (headerp->rm_type) {
- case rdma_msg:
- /* never expect read chunks */
- /* never expect reply chunks (two ways to check) */
- if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
- (headerp->rm_body.rm_chunks[1] == xdr_zero &&
- headerp->rm_body.rm_chunks[2] != xdr_zero))
- goto badheader;
- if (headerp->rm_body.rm_chunks[1] != xdr_zero) {
- /* count any expected write chunks in read reply */
- /* start at write chunk array count */
- iptr = &headerp->rm_body.rm_chunks[2];
- rdmalen = rpcrdma_count_chunks(rep, 1, &iptr);
- /* check for validity, and no reply chunk after */
- if (rdmalen < 0 || *iptr++ != xdr_zero)
- goto badheader;
- rep->rr_len -=
- ((unsigned char *)iptr - (unsigned char *)headerp);
- status = rep->rr_len + rdmalen;
- r_xprt->rx_stats.total_rdma_reply += rdmalen;
- /* special case - last chunk may omit padding */
- if (rdmalen &= 3) {
- rdmalen = 4 - rdmalen;
- status += rdmalen;
- }
- } else {
- /* else ordinary inline */
- rdmalen = 0;
- iptr = (__be32 *)((unsigned char *)headerp +
- RPCRDMA_HDRLEN_MIN);
- rep->rr_len -= RPCRDMA_HDRLEN_MIN;
- status = rep->rr_len;
- }
+ xprt_pin_rqst(rqst);
- r_xprt->rx_stats.fixup_copy_count +=
- rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len,
- rdmalen);
- break;
+ if (credits == 0)
+ credits = 1; /* don't deadlock */
+ else if (credits > buf->rb_max_requests)
+ credits = buf->rb_max_requests;
+ buf->rb_credits = credits;
- case rdma_nomsg:
- /* never expect read or write chunks, always reply chunks */
- if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
- headerp->rm_body.rm_chunks[1] != xdr_zero ||
- headerp->rm_body.rm_chunks[2] != xdr_one)
- goto badheader;
- iptr = (__be32 *)((unsigned char *)headerp +
- RPCRDMA_HDRLEN_MIN);
- rdmalen = rpcrdma_count_chunks(rep, 0, &iptr);
- if (rdmalen < 0)
- goto badheader;
- r_xprt->rx_stats.total_rdma_reply += rdmalen;
- /* Reply chunk buffer already is the reply vector - no fixup. */
- status = rdmalen;
- break;
+ spin_unlock(&xprt->recv_lock);
- case rdma_error:
- goto out_rdmaerr;
-
-badheader:
- default:
- dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
- rqst->rq_task->tk_pid, __func__,
- be32_to_cpu(headerp->rm_type));
- status = -EIO;
- r_xprt->rx_stats.bad_reply_count++;
- break;
- }
+ req = rpcr_to_rdmar(rqst);
+ req->rl_reply = rep;
+ rep->rr_rqst = rqst;
+ clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
-out:
- cwnd = xprt->cwnd;
- xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
- if (xprt->cwnd > cwnd)
- xprt_release_rqst_cong(rqst->rq_task);
+ dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
+ __func__, rep, req, be32_to_cpu(rep->rr_xid));
- xprt_complete_rqst(rqst->rq_task, status);
- spin_unlock_bh(&xprt->transport_lock);
- dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
- __func__, xprt, rqst, status);
+ if (list_empty(&req->rl_registered) &&
+ !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
+ rpcrdma_complete_rqst(rep);
+ else
+ queue_work(rpcrdma_receive_wq, &rep->rr_work);
return;
out_badstatus:
@@ -1149,71 +1423,22 @@ out_badstatus:
}
return;
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-out_bcall:
- rpcrdma_bc_receive_call(r_xprt, rep);
- return;
-#endif
-
-/* If the incoming reply terminated a pending RPC, the next
- * RPC call will post a replacement receive buffer as it is
- * being marshaled.
- */
out_badversion:
dprintk("RPC: %s: invalid version %d\n",
- __func__, be32_to_cpu(headerp->rm_vers));
- status = -EIO;
- r_xprt->rx_stats.bad_reply_count++;
- goto out;
-
-out_rdmaerr:
- rmerr = be32_to_cpu(headerp->rm_body.rm_error.rm_err);
- switch (rmerr) {
- case ERR_VERS:
- pr_err("%s: server reports header version error (%u-%u)\n",
- __func__,
- be32_to_cpu(headerp->rm_body.rm_error.rm_vers_low),
- be32_to_cpu(headerp->rm_body.rm_error.rm_vers_high));
- break;
- case ERR_CHUNK:
- pr_err("%s: server reports header decoding error\n",
- __func__);
- break;
- default:
- pr_err("%s: server reports unknown error %d\n",
- __func__, rmerr);
- }
- status = -EREMOTEIO;
- r_xprt->rx_stats.bad_reply_count++;
- goto out;
+ __func__, be32_to_cpu(rep->rr_vers));
+ goto repost;
-/* The req was still available, but by the time the transport_lock
- * was acquired, the rqst and task had been released. Thus the RPC
- * has already been terminated.
+/* The RPC transaction has already been terminated, or the header
+ * is corrupt.
*/
out_norqst:
- spin_unlock_bh(&xprt->transport_lock);
- rpcrdma_buffer_put(req);
- dprintk("RPC: %s: race, no rqst left for req %p\n",
- __func__, req);
- return;
+ spin_unlock(&xprt->recv_lock);
+ dprintk("RPC: %s: no match for incoming xid 0x%08x\n",
+ __func__, be32_to_cpu(rep->rr_xid));
+ goto repost;
out_shortreply:
dprintk("RPC: %s: short/invalid reply\n", __func__);
- goto repost;
-
-out_nomatch:
- spin_unlock(&buf->rb_lock);
- dprintk("RPC: %s: no match for incoming xid 0x%08x len %d\n",
- __func__, be32_to_cpu(headerp->rm_xid),
- rep->rr_len);
- goto repost;
-
-out_duplicate:
- spin_unlock(&buf->rb_lock);
- dprintk("RPC: %s: "
- "duplicate reply %p to RPC request %p: xid 0x%08x\n",
- __func__, rep, req, be32_to_cpu(headerp->rm_xid));
/* If no pending RPC transaction was matched, post a replacement
* receive buffer before returning.
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index c676ed0efb5a..af7893501e40 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015 Oracle. All rights reserved.
*
@@ -52,7 +53,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
if (src->iov_len < 24)
goto out_shortreply;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->recv_lock);
req = xprt_lookup_rqst(xprt, xid);
if (!req)
goto out_notfound;
@@ -69,17 +70,20 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
credits = r_xprt->rx_buf.rb_bc_max_requests;
+ spin_lock_bh(&xprt->transport_lock);
cwnd = xprt->cwnd;
xprt->cwnd = credits << RPC_CWNDSHIFT;
if (xprt->cwnd > cwnd)
xprt_release_rqst_cong(req->rq_task);
+ spin_unlock_bh(&xprt->transport_lock);
+
ret = 0;
xprt_complete_rqst(req->rq_task, rcvbuf->len);
rcvbuf->len = 0;
out_unlock:
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->recv_lock);
out:
return ret;
@@ -129,6 +133,10 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
if (ret)
goto out_err;
+ /* Bump page refcnt so Send completion doesn't release
+ * the rq_buffer before all retransmits are complete.
+ */
+ get_page(virt_to_page(rqst->rq_buffer));
ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
if (ret)
goto out_unmap;
@@ -161,7 +169,6 @@ xprt_rdma_bc_allocate(struct rpc_task *task)
return -EINVAL;
}
- /* svc_rdma_sendto releases this page */
page = alloc_page(RPCRDMA_DEF_GFP);
if (!page)
return -ENOMEM;
@@ -180,6 +187,7 @@ xprt_rdma_bc_free(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
+ put_page(virt_to_page(rqst->rq_buffer));
kfree(rqst->rq_rbuffer);
}
@@ -266,7 +274,7 @@ xprt_rdma_bc_put(struct rpc_xprt *xprt)
module_put(THIS_MODULE);
}
-static struct rpc_xprt_ops xprt_rdma_bc_procs = {
+static const struct rpc_xprt_ops xprt_rdma_bc_procs = {
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong,
.alloc_slot = xprt_alloc_slot,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 933f79bed270..9bd04549a1ad 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2016 Oracle. All rights reserved.
*
@@ -660,19 +661,21 @@ out_initerr:
return -EIO;
}
+/* Walk the segments in the Read chunk starting at @p and construct
+ * RDMA Read operations to pull the chunk to the server.
+ */
static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
struct svc_rdma_read_info *info,
__be32 *p)
{
int ret;
+ ret = -EINVAL;
info->ri_chunklen = 0;
- while (*p++ != xdr_zero) {
+ while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) {
u32 rs_handle, rs_length;
u64 rs_offset;
- if (be32_to_cpup(p++) != info->ri_position)
- break;
rs_handle = be32_to_cpup(p++);
rs_length = be32_to_cpup(p++);
p = xdr_decode_hyper(p, &rs_offset);
@@ -689,78 +692,6 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
return ret;
}
-/* If there is inline content following the Read chunk, append it to
- * the page list immediately following the data payload. This has to
- * be done after the reader function has determined how many pages
- * were consumed for RDMA Read.
- *
- * On entry, ri_pageno and ri_pageoff point directly to the end of the
- * page list. On exit, both have been updated to the new "next byte".
- *
- * Assumptions:
- * - Inline content fits entirely in rq_pages[0]
- * - Trailing content is only a handful of bytes
- */
-static int svc_rdma_copy_tail(struct svc_rqst *rqstp,
- struct svc_rdma_read_info *info)
-{
- struct svc_rdma_op_ctxt *head = info->ri_readctxt;
- unsigned int tail_length, remaining;
- u8 *srcp, *destp;
-
- /* Assert that all inline content fits in page 0. This is an
- * implementation limit, not a protocol limit.
- */
- if (head->arg.head[0].iov_len > PAGE_SIZE) {
- pr_warn_once("svcrdma: too much trailing inline content\n");
- return -EINVAL;
- }
-
- srcp = head->arg.head[0].iov_base;
- srcp += info->ri_position;
- tail_length = head->arg.head[0].iov_len - info->ri_position;
- remaining = tail_length;
-
- /* If there is room on the last page in the page list, try to
- * fit the trailing content there.
- */
- if (info->ri_pageoff > 0) {
- unsigned int len;
-
- len = min_t(unsigned int, remaining,
- PAGE_SIZE - info->ri_pageoff);
- destp = page_address(rqstp->rq_pages[info->ri_pageno]);
- destp += info->ri_pageoff;
-
- memcpy(destp, srcp, len);
- srcp += len;
- destp += len;
- info->ri_pageoff += len;
- remaining -= len;
-
- if (info->ri_pageoff == PAGE_SIZE) {
- info->ri_pageno++;
- info->ri_pageoff = 0;
- }
- }
-
- /* Otherwise, a fresh page is needed. */
- if (remaining) {
- head->arg.pages[info->ri_pageno] =
- rqstp->rq_pages[info->ri_pageno];
- head->count++;
-
- destp = page_address(rqstp->rq_pages[info->ri_pageno]);
- memcpy(destp, srcp, remaining);
- info->ri_pageoff += remaining;
- }
-
- head->arg.page_len += tail_length;
- head->arg.len += tail_length;
- head->arg.buflen += tail_length;
- return 0;
-}
-
/* Construct RDMA Reads to pull over a normal Read chunk. The chunk
* data lands in the page list of head->arg.pages.
*
@@ -785,34 +716,28 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
if (ret < 0)
goto out;
- /* Read chunk may need XDR round-up (see RFC 5666, s. 3.7).
+ /* Split the Receive buffer between the head and tail
+ * buffers at Read chunk's position. XDR roundup of the
+ * chunk is not included in either the pagelist or in
+ * the tail.
*/
- if (info->ri_chunklen & 3) {
- u32 padlen = 4 - (info->ri_chunklen & 3);
-
- info->ri_chunklen += padlen;
+ head->arg.tail[0].iov_base =
+ head->arg.head[0].iov_base + info->ri_position;
+ head->arg.tail[0].iov_len =
+ head->arg.head[0].iov_len - info->ri_position;
+ head->arg.head[0].iov_len = info->ri_position;
- /* NB: data payload always starts on XDR alignment,
- * thus the pad can never contain a page boundary.
- */
- info->ri_pageoff += padlen;
- if (info->ri_pageoff == PAGE_SIZE) {
- info->ri_pageno++;
- info->ri_pageoff = 0;
- }
- }
+ /* Read chunk may need XDR roundup (see RFC 5666, s. 3.7).
+ *
+ * NFSv2/3 write decoders need the length of the tail to
+ * contain the size of the roundup padding.
+ */
+ head->arg.tail[0].iov_len += 4 - (info->ri_chunklen & 3);
head->arg.page_len = info->ri_chunklen;
head->arg.len += info->ri_chunklen;
head->arg.buflen += info->ri_chunklen;
- if (info->ri_position < head->arg.head[0].iov_len) {
- ret = svc_rdma_copy_tail(rqstp, info);
- if (ret < 0)
- goto out;
- }
- head->arg.head[0].iov_len = info->ri_position;
-
out:
return ret;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e660d4965b18..46ec069150d5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -51,6 +51,7 @@
#include <linux/workqueue.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
+#include <rdma/rw.h>
#include <linux/sunrpc/svc_rdma.h>
#include <linux/export.h>
#include "xprt_rdma.h"
@@ -70,7 +71,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt);
static int svc_rdma_secure_port(struct svc_rqst *);
static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
-static struct svc_xprt_ops svc_rdma_ops = {
+static const struct svc_xprt_ops svc_rdma_ops = {
.xpo_create = svc_rdma_create,
.xpo_recvfrom = svc_rdma_recvfrom,
.xpo_sendto = svc_rdma_sendto,
@@ -98,7 +99,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
static void svc_rdma_bc_detach(struct svc_xprt *);
static void svc_rdma_bc_free(struct svc_xprt *);
-static struct svc_xprt_ops svc_rdma_bc_ops = {
+static const struct svc_xprt_ops svc_rdma_bc_ops = {
.xpo_create = svc_rdma_bc_create,
.xpo_detach = svc_rdma_bc_detach,
.xpo_free = svc_rdma_bc_free,
@@ -167,8 +168,8 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
{
unsigned int i;
- /* Each RPC/RDMA credit can consume a number of send
- * and receive WQEs. One ctxt is allocated for each.
+ /* Each RPC/RDMA credit can consume one Receive and
+ * one Send WQE at the same time.
*/
i = xprt->sc_sq_depth + xprt->sc_rq_depth;
@@ -289,6 +290,7 @@ static void qp_event_handler(struct ib_event *event, void *context)
ib_event_msg(event->event), event->event,
event->element.qp);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
break;
}
}
@@ -321,8 +323,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
goto out;
- svc_xprt_enqueue(&xprt->sc_xprt);
- goto out;
+ goto out_enqueue;
flushed:
if (wc->status != IB_WC_WR_FLUSH_ERR)
@@ -332,6 +333,8 @@ flushed:
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_rdma_put_context(ctxt, 1);
+out_enqueue:
+ svc_xprt_enqueue(&xprt->sc_xprt);
out:
svc_xprt_put(&xprt->sc_xprt);
}
@@ -357,6 +360,7 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->status != IB_WC_SUCCESS)) {
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ svc_xprt_enqueue(&xprt->sc_xprt);
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("svcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
@@ -568,8 +572,10 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
case RDMA_CM_EVENT_DEVICE_REMOVAL:
dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n",
xprt, cma_id);
- if (xprt)
+ if (xprt) {
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ svc_xprt_enqueue(&xprt->sc_xprt);
+ }
break;
default:
@@ -713,7 +719,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct ib_qp_init_attr qp_attr;
struct ib_device *dev;
struct sockaddr *sap;
- unsigned int i;
+ unsigned int i, ctxts;
int ret = 0;
listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
@@ -742,14 +748,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
(size_t)RPCSVC_MAXPAGES);
newxprt->sc_max_req_size = svcrdma_max_req_size;
- newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
- svcrdma_max_requests);
- newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
- newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
- svcrdma_max_bc_requests);
+ newxprt->sc_max_requests = svcrdma_max_requests;
+ newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
newxprt->sc_rq_depth = newxprt->sc_max_requests +
newxprt->sc_max_bc_requests;
- newxprt->sc_sq_depth = newxprt->sc_rq_depth;
+ if (newxprt->sc_rq_depth > dev->attrs.max_qp_wr) {
+ pr_warn("svcrdma: reducing receive depth to %d\n",
+ dev->attrs.max_qp_wr);
+ newxprt->sc_rq_depth = dev->attrs.max_qp_wr;
+ newxprt->sc_max_requests = newxprt->sc_rq_depth - 2;
+ newxprt->sc_max_bc_requests = 2;
+ }
+ newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
+ ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
+ ctxts *= newxprt->sc_max_requests;
+ newxprt->sc_sq_depth = newxprt->sc_rq_depth + ctxts;
+ if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) {
+ pr_warn("svcrdma: reducing send depth to %d\n",
+ dev->attrs.max_qp_wr);
+ newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
+ }
atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
if (!svc_rdma_prealloc_ctxts(newxprt))
@@ -784,8 +802,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.event_handler = qp_event_handler;
qp_attr.qp_context = &newxprt->sc_xprt;
qp_attr.port_num = newxprt->sc_port_num;
- qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests;
- qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
+ qp_attr.cap.max_rdma_ctxs = ctxts;
+ qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts;
qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
@@ -853,6 +871,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap));
dprintk(" max_sge : %d\n", newxprt->sc_max_sge);
dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
+ dprintk(" rdma_rw_ctxs : %d\n", ctxts);
dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
dprintk(" ord : %d\n", newxprt->sc_ord);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index d1c458e5ec4d..646c24494ea7 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -149,7 +150,7 @@ static struct ctl_table sunrpc_table[] = {
#endif
-static struct rpc_xprt_ops xprt_rdma_procs; /*forward reference */
+static const struct rpc_xprt_ops xprt_rdma_procs;
static void
xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
@@ -559,6 +560,7 @@ rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
r_xprt->rx_stats.hardway_register_count += size;
req->rl_rdmabuf = rb;
+ xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
return true;
}
@@ -677,17 +679,14 @@ xprt_rdma_free(struct rpc_task *task)
struct rpc_rqst *rqst = task->tk_rqstp;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- if (req->rl_backchannel)
+ if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
return;
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
- rpcrdma_remove_req(&r_xprt->rx_buf, req);
- if (!list_empty(&req->rl_registered))
- ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
- rpcrdma_unmap_sges(ia, req);
+ if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
+ rpcrdma_release_rqst(r_xprt, req);
rpcrdma_buffer_put(req);
}
@@ -728,9 +727,10 @@ xprt_rdma_send_request(struct rpc_task *task)
/* On retransmit, remove any previously registered chunks */
if (unlikely(!list_empty(&req->rl_registered)))
- r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
+ r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
+ &req->rl_registered);
- rc = rpcrdma_marshal_req(rqst);
+ rc = rpcrdma_marshal_req(r_xprt, rqst);
if (rc < 0)
goto failed_marshal;
@@ -742,6 +742,7 @@ xprt_rdma_send_request(struct rpc_task *task)
goto drop_connection;
req->rl_connect_cookie = xprt->connect_cookie;
+ set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
@@ -789,11 +790,13 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
r_xprt->rx_stats.failed_marshal_count,
r_xprt->rx_stats.bad_reply_count,
r_xprt->rx_stats.nomsg_call_count);
- seq_printf(seq, "%lu %lu %lu %lu\n",
+ seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
r_xprt->rx_stats.mrs_recovered,
r_xprt->rx_stats.mrs_orphaned,
r_xprt->rx_stats.mrs_allocated,
- r_xprt->rx_stats.local_inv_needed);
+ r_xprt->rx_stats.local_inv_needed,
+ r_xprt->rx_stats.empty_sendctx_q,
+ r_xprt->rx_stats.reply_waits_for_send);
}
static int
@@ -811,7 +814,7 @@ xprt_rdma_disable_swap(struct rpc_xprt *xprt)
* Plumbing for rpc transport switch and kernel module
*/
-static struct rpc_xprt_ops xprt_rdma_procs = {
+static const struct rpc_xprt_ops xprt_rdma_procs = {
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
.alloc_slot = xprt_alloc_slot,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index e4171f2abe37..710b3f77db82 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -49,9 +50,10 @@
#include <linux/interrupt.h>
#include <linux/slab.h>
-#include <linux/prefetch.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/svc_rdma.h>
+
+#include <asm-generic/barrier.h>
#include <asm/bitops.h>
#include <rdma/ib_cm.h>
@@ -73,7 +75,7 @@ static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
-static struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
+struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
int
rpcrdma_alloc_wq(void)
@@ -126,33 +128,17 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
static void
rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_sendctx *sc =
+ container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
+
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
-}
-
-/* Perform basic sanity checking to avoid using garbage
- * to update the credit grant value.
- */
-static void
-rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
-{
- struct rpcrdma_msg *rmsgp = rdmab_to_msg(rep->rr_rdmabuf);
- struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
- u32 credits;
-
- if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
- return;
-
- credits = be32_to_cpu(rmsgp->rm_credit);
- if (credits == 0)
- credits = 1; /* don't deadlock */
- else if (credits > buffer->rb_max_requests)
- credits = buffer->rb_max_requests;
- atomic_set(&buffer->rb_credits, credits);
+ rpcrdma_sendctx_put_locked(sc);
}
/**
@@ -173,24 +159,19 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
goto out_fail;
/* status == SUCCESS means all fields in wc are trustworthy */
- if (wc->opcode != IB_WC_RECV)
- return;
-
dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
__func__, rep, wc->byte_len);
- rep->rr_len = wc->byte_len;
+ rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
rep->rr_wc_flags = wc->wc_flags;
rep->rr_inv_rkey = wc->ex.invalidate_rkey;
ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf),
rdmab_addr(rep->rr_rdmabuf),
- rep->rr_len, DMA_FROM_DEVICE);
-
- rpcrdma_update_granted_credits(rep);
+ wc->byte_len, DMA_FROM_DEVICE);
out_schedule:
- queue_work(rpcrdma_receive_wq, &rep->rr_work);
+ rpcrdma_reply_handler(rep);
return;
out_fail:
@@ -198,7 +179,7 @@ out_fail:
pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
- rep->rr_len = RPCRDMA_BAD_LEN;
+ rpcrdma_set_xdrlen(&rep->rr_hdrbuf, 0);
goto out_schedule;
}
@@ -300,7 +281,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DISCONNECTED:
connstate = -ECONNABORTED;
connected:
- atomic_set(&xprt->rx_buf.rb_credits, 1);
+ xprt->rx_buf.rb_credits = 1;
ep->rep_connected = connstate;
rpcrdma_conn_func(ep);
wake_up_all(&ep->rep_connect_wait);
@@ -569,16 +550,15 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_recv_sge);
/* set trigger for requesting send completion */
- ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
- if (ep->rep_cqinit <= 2)
- ep->rep_cqinit = 0; /* always signal? */
- rpcrdma_init_cqcount(ep, 0);
+ ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH,
+ cdata->max_requests >> 2);
+ ep->rep_send_count = ep->rep_send_batch;
init_waitqueue_head(&ep->rep_connect_wait);
INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
sendcq = ib_alloc_cq(ia->ri_device, NULL,
ep->rep_attr.cap.max_send_wr + 1,
- 0, IB_POLL_SOFTIRQ);
+ 1, IB_POLL_WORKQUEUE);
if (IS_ERR(sendcq)) {
rc = PTR_ERR(sendcq);
dprintk("RPC: %s: failed to create send CQ: %i\n",
@@ -588,7 +568,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
recvcq = ib_alloc_cq(ia->ri_device, NULL,
ep->rep_attr.cap.max_recv_wr + 1,
- 0, IB_POLL_SOFTIRQ);
+ 0, IB_POLL_WORKQUEUE);
if (IS_ERR(recvcq)) {
rc = PTR_ERR(recvcq);
dprintk("RPC: %s: failed to create recv CQ: %i\n",
@@ -851,6 +831,168 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
ib_drain_qp(ia->ri_id->qp);
}
+/* Fixed-size circular FIFO queue. This implementation is wait-free and
+ * lock-free.
+ *
+ * Consumer is the code path that posts Sends. This path dequeues a
+ * sendctx for use by a Send operation. Multiple consumer threads
+ * are serialized by the RPC transport lock, which allows only one
+ * ->send_request call at a time.
+ *
+ * Producer is the code path that handles Send completions. This path
+ * enqueues a sendctx that has been completed. Multiple producer
+ * threads are serialized by the ib_poll_cq() function.
+ */
+
+/* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
+ * queue activity, and ib_drain_qp has flushed all remaining Send
+ * requests.
+ */
+static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf)
+{
+ unsigned long i;
+
+ for (i = 0; i <= buf->rb_sc_last; i++)
+ kfree(buf->rb_sc_ctxs[i]);
+ kfree(buf->rb_sc_ctxs);
+}
+
+static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_sendctx *sc;
+
+ sc = kzalloc(sizeof(*sc) +
+ ia->ri_max_send_sges * sizeof(struct ib_sge),
+ GFP_KERNEL);
+ if (!sc)
+ return NULL;
+
+ sc->sc_wr.wr_cqe = &sc->sc_cqe;
+ sc->sc_wr.sg_list = sc->sc_sges;
+ sc->sc_wr.opcode = IB_WR_SEND;
+ sc->sc_cqe.done = rpcrdma_wc_send;
+ return sc;
+}
+
+static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
+{
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpcrdma_sendctx *sc;
+ unsigned long i;
+
+ /* Maximum number of concurrent outstanding Send WRs. Capping
+ * the circular queue size stops Send Queue overflow by causing
+ * the ->send_request call to fail temporarily before too many
+ * Sends are posted.
+ */
+ i = buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS;
+ dprintk("RPC: %s: allocating %lu send_ctxs\n", __func__, i);
+ buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL);
+ if (!buf->rb_sc_ctxs)
+ return -ENOMEM;
+
+ buf->rb_sc_last = i - 1;
+ for (i = 0; i <= buf->rb_sc_last; i++) {
+ sc = rpcrdma_sendctx_create(&r_xprt->rx_ia);
+ if (!sc)
+ goto out_destroy;
+
+ sc->sc_xprt = r_xprt;
+ buf->rb_sc_ctxs[i] = sc;
+ }
+
+ return 0;
+
+out_destroy:
+ rpcrdma_sendctxs_destroy(buf);
+ return -ENOMEM;
+}
+
+/* The sendctx queue is not guaranteed to have a size that is a
+ * power of two, thus the helpers in circ_buf.h cannot be used.
+ * The other option is to use modulus (%), which can be expensive.
+ */
+static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf,
+ unsigned long item)
+{
+ return likely(item < buf->rb_sc_last) ? item + 1 : 0;
+}
+
+/**
+ * rpcrdma_sendctx_get_locked - Acquire a send context
+ * @buf: transport buffers from which to acquire an unused context
+ *
+ * Returns pointer to a free send completion context; or NULL if
+ * the queue is empty.
+ *
+ * Usage: Called to acquire an SGE array before preparing a Send WR.
+ *
+ * The caller serializes calls to this function (per rpcrdma_buffer),
+ * and provides an effective memory barrier that flushes the new value
+ * of rb_sc_head.
+ */
+struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_xprt *r_xprt;
+ struct rpcrdma_sendctx *sc;
+ unsigned long next_head;
+
+ next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head);
+
+ if (next_head == READ_ONCE(buf->rb_sc_tail))
+ goto out_emptyq;
+
+ /* ORDER: item must be accessed _before_ head is updated */
+ sc = buf->rb_sc_ctxs[next_head];
+
+ /* Releasing the lock in the caller acts as a memory
+ * barrier that flushes rb_sc_head.
+ */
+ buf->rb_sc_head = next_head;
+
+ return sc;
+
+out_emptyq:
+ /* The queue is "empty" if there have not been enough Send
+ * completions recently. This is a sign the Send Queue is
+ * backing up. Cause the caller to pause and try again.
+ */
+ dprintk("RPC: %s: empty sendctx queue\n", __func__);
+ r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
+ r_xprt->rx_stats.empty_sendctx_q++;
+ return NULL;
+}
+
+/**
+ * rpcrdma_sendctx_put_locked - Release a send context
+ * @sc: send context to release
+ *
+ * Usage: Called from Send completion to return a sendctxt
+ * to the queue.
+ *
+ * The caller serializes calls to this function (per rpcrdma_buffer).
+ */
+void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
+{
+ struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
+ unsigned long next_tail;
+
+ /* Unmap SGEs of previously completed by unsignaled
+ * Sends by walking up the queue until @sc is found.
+ */
+ next_tail = buf->rb_sc_tail;
+ do {
+ next_tail = rpcrdma_sendctx_next(buf, next_tail);
+
+ /* ORDER: item must be accessed _before_ tail is updated */
+ rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]);
+
+ } while (buf->rb_sc_ctxs[next_tail] != sc);
+
+ /* Paired with READ_ONCE */
+ smp_store_release(&buf->rb_sc_tail, next_tail);
+}
+
static void
rpcrdma_mr_recovery_worker(struct work_struct *work)
{
@@ -946,13 +1088,8 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
spin_lock(&buffer->rb_reqslock);
list_add(&req->rl_all, &buffer->rb_allreqs);
spin_unlock(&buffer->rb_reqslock);
- req->rl_cqe.done = rpcrdma_wc_send;
req->rl_buffer = &r_xprt->rx_buf;
INIT_LIST_HEAD(&req->rl_registered);
- req->rl_send_wr.next = NULL;
- req->rl_send_wr.wr_cqe = &req->rl_cqe;
- req->rl_send_wr.sg_list = req->rl_send_sge;
- req->rl_send_wr.opcode = IB_WR_SEND;
return req;
}
@@ -974,10 +1111,12 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
rc = PTR_ERR(rep->rr_rdmabuf);
goto out_free;
}
+ xdr_buf_init(&rep->rr_hdrbuf, rep->rr_rdmabuf->rg_base,
+ rdmab_length(rep->rr_rdmabuf));
rep->rr_cqe.done = rpcrdma_wc_receive;
rep->rr_rxprt = r_xprt;
- INIT_WORK(&rep->rr_work, rpcrdma_reply_handler);
+ INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion);
rep->rr_recv_wr.next = NULL;
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
@@ -998,13 +1137,11 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
buf->rb_max_requests = r_xprt->rx_data.max_requests;
buf->rb_bc_srv_max_requests = 0;
- atomic_set(&buf->rb_credits, 1);
spin_lock_init(&buf->rb_mwlock);
spin_lock_init(&buf->rb_lock);
spin_lock_init(&buf->rb_recovery_lock);
INIT_LIST_HEAD(&buf->rb_mws);
INIT_LIST_HEAD(&buf->rb_all);
- INIT_LIST_HEAD(&buf->rb_pending);
INIT_LIST_HEAD(&buf->rb_stale_mrs);
INIT_DELAYED_WORK(&buf->rb_refresh_worker,
rpcrdma_mr_refresh_worker);
@@ -1026,7 +1163,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
rc = PTR_ERR(req);
goto out;
}
- req->rl_backchannel = false;
list_add(&req->rl_list, &buf->rb_send_bufs);
}
@@ -1044,6 +1180,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
list_add(&rep->rr_list, &buf->rb_recv_bufs);
}
+ rc = rpcrdma_sendctxs_create(r_xprt);
+ if (rc)
+ goto out;
+
return 0;
out:
rpcrdma_buffer_destroy(buf);
@@ -1120,6 +1260,8 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
cancel_delayed_work_sync(&buf->rb_recovery_worker);
cancel_delayed_work_sync(&buf->rb_refresh_worker);
+ rpcrdma_sendctxs_destroy(buf);
+
while (!list_empty(&buf->rb_recv_bufs)) {
struct rpcrdma_rep *rep;
@@ -1235,7 +1377,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
struct rpcrdma_buffer *buffers = req->rl_buffer;
struct rpcrdma_rep *rep = req->rl_reply;
- req->rl_send_wr.num_sge = 0;
req->rl_reply = NULL;
spin_lock(&buffers->rb_lock);
@@ -1367,7 +1508,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct rpcrdma_ep *ep,
struct rpcrdma_req *req)
{
- struct ib_send_wr *send_wr = &req->rl_send_wr;
+ struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
struct ib_send_wr *send_wr_fail;
int rc;
@@ -1381,7 +1522,14 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
dprintk("RPC: %s: posting %d s/g entries\n",
__func__, send_wr->num_sge);
- rpcrdma_set_signaled(ep, send_wr);
+ if (!ep->rep_send_count ||
+ test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
+ send_wr->send_flags |= IB_SEND_SIGNALED;
+ ep->rep_send_count = ep->rep_send_batch;
+ } else {
+ send_wr->send_flags &= ~IB_SEND_SIGNALED;
+ --ep->rep_send_count;
+ }
rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
if (rc)
goto out_postsend_err;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index b282d3f8cdd8..51686d9eac5f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -93,8 +94,8 @@ enum {
*/
struct rpcrdma_ep {
- atomic_t rep_cqcount;
- int rep_cqinit;
+ unsigned int rep_send_count;
+ unsigned int rep_send_batch;
int rep_connected;
struct ib_qp_init_attr rep_attr;
wait_queue_head_t rep_connect_wait;
@@ -104,25 +105,6 @@ struct rpcrdma_ep {
struct delayed_work rep_connect_worker;
};
-static inline void
-rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count)
-{
- atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count);
-}
-
-/* To update send queue accounting, provider must take a
- * send completion every now and then.
- */
-static inline void
-rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr)
-{
- send_wr->send_flags = 0;
- if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) {
- rpcrdma_init_cqcount(ep, 0);
- send_wr->send_flags = IB_SEND_SIGNALED;
- }
-}
-
/* Pre-allocate extra Work Requests for handling backward receives
* and sends. This is a fixed value because the Work Queues are
* allocated when the forward channel is set up.
@@ -164,12 +146,6 @@ rdmab_lkey(struct rpcrdma_regbuf *rb)
return rb->rg_iov.lkey;
}
-static inline struct rpcrdma_msg *
-rdmab_to_msg(struct rpcrdma_regbuf *rb)
-{
- return (struct rpcrdma_msg *)rb->rg_base;
-}
-
static inline struct ib_device *
rdmab_device(struct rpcrdma_regbuf *rb)
{
@@ -202,33 +178,58 @@ enum {
};
/*
- * struct rpcrdma_rep -- this structure encapsulates state required to recv
- * and complete a reply, asychronously. It needs several pieces of
- * state:
- * o recv buffer (posted to provider)
- * o ib_sge (also donated to provider)
- * o status of reply (length, success or not)
- * o bookkeeping state to get run by reply handler (list, etc)
+ * struct rpcrdma_rep -- this structure encapsulates state required
+ * to receive and complete an RPC Reply, asychronously. It needs
+ * several pieces of state:
*
- * These are allocated during initialization, per-transport instance.
+ * o receive buffer and ib_sge (donated to provider)
+ * o status of receive (success or not, length, inv rkey)
+ * o bookkeeping state to get run by reply handler (XDR stream)
*
- * N of these are associated with a transport instance, and stored in
- * struct rpcrdma_buffer. N is the max number of outstanding requests.
+ * These structures are allocated during transport initialization.
+ * N of these are associated with a transport instance, managed by
+ * struct rpcrdma_buffer. N is the max number of outstanding RPCs.
*/
struct rpcrdma_rep {
struct ib_cqe rr_cqe;
- unsigned int rr_len;
+ __be32 rr_xid;
+ __be32 rr_vers;
+ __be32 rr_proc;
int rr_wc_flags;
u32 rr_inv_rkey;
+ struct rpcrdma_regbuf *rr_rdmabuf;
struct rpcrdma_xprt *rr_rxprt;
struct work_struct rr_work;
+ struct xdr_buf rr_hdrbuf;
+ struct xdr_stream rr_stream;
+ struct rpc_rqst *rr_rqst;
struct list_head rr_list;
struct ib_recv_wr rr_recv_wr;
- struct rpcrdma_regbuf *rr_rdmabuf;
};
-#define RPCRDMA_BAD_LEN (~0U)
+/* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes
+ */
+struct rpcrdma_req;
+struct rpcrdma_xprt;
+struct rpcrdma_sendctx {
+ struct ib_send_wr sc_wr;
+ struct ib_cqe sc_cqe;
+ struct rpcrdma_xprt *sc_xprt;
+ struct rpcrdma_req *sc_req;
+ unsigned int sc_unmap_count;
+ struct ib_sge sc_sges[];
+};
+
+/* Limit the number of SGEs that can be unmapped during one
+ * Send completion. This caps the amount of work a single
+ * completion can do before returning to the provider.
+ *
+ * Setting this to zero disables Send completion batching.
+ */
+enum {
+ RPCRDMA_MAX_SEND_BATCH = 7,
+};
/*
* struct rpcrdma_mw - external memory region metadata
@@ -341,25 +342,30 @@ enum {
struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_list;
- __be32 rl_xid;
- unsigned int rl_mapped_sges;
unsigned int rl_connect_cookie;
struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;
- struct ib_send_wr rl_send_wr;
- struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES];
+ struct xdr_stream rl_stream;
+ struct xdr_buf rl_hdrbuf;
+ struct rpcrdma_sendctx *rl_sendctx;
struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */
struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */
struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */
- struct ib_cqe rl_cqe;
struct list_head rl_all;
- bool rl_backchannel;
+ unsigned long rl_flags;
struct list_head rl_registered; /* registered segments */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
};
+/* rl_flags */
+enum {
+ RPCRDMA_REQ_F_BACKCHANNEL = 0,
+ RPCRDMA_REQ_F_PENDING,
+ RPCRDMA_REQ_F_TX_RESOURCES,
+};
+
static inline void
rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
{
@@ -399,13 +405,17 @@ struct rpcrdma_buffer {
struct list_head rb_mws;
struct list_head rb_all;
+ unsigned long rb_sc_head;
+ unsigned long rb_sc_tail;
+ unsigned long rb_sc_last;
+ struct rpcrdma_sendctx **rb_sc_ctxs;
+
spinlock_t rb_lock; /* protect buf lists */
int rb_send_count, rb_recv_count;
struct list_head rb_send_bufs;
struct list_head rb_recv_bufs;
- struct list_head rb_pending;
u32 rb_max_requests;
- atomic_t rb_credits; /* most recent credit grant */
+ u32 rb_credits; /* most recent credit grant */
u32 rb_bc_srv_max_requests;
spinlock_t rb_reqslock; /* protect rb_allreqs */
@@ -440,24 +450,29 @@ struct rpcrdma_create_data_internal {
* Statistics for RPCRDMA
*/
struct rpcrdma_stats {
+ /* accessed when sending a call */
unsigned long read_chunk_count;
unsigned long write_chunk_count;
unsigned long reply_chunk_count;
-
unsigned long long total_rdma_request;
- unsigned long long total_rdma_reply;
+ /* rarely accessed error counters */
unsigned long long pullup_copy_count;
- unsigned long long fixup_copy_count;
unsigned long hardway_register_count;
unsigned long failed_marshal_count;
unsigned long bad_reply_count;
- unsigned long nomsg_call_count;
- unsigned long bcall_count;
unsigned long mrs_recovered;
unsigned long mrs_orphaned;
unsigned long mrs_allocated;
+ unsigned long empty_sendctx_q;
+
+ /* accessed when receiving a reply */
+ unsigned long long total_rdma_reply;
+ unsigned long long fixup_copy_count;
+ unsigned long reply_waits_for_send;
unsigned long local_inv_needed;
+ unsigned long nomsg_call_count;
+ unsigned long bcall_count;
};
/*
@@ -465,13 +480,12 @@ struct rpcrdma_stats {
*/
struct rpcrdma_xprt;
struct rpcrdma_memreg_ops {
- int (*ro_map)(struct rpcrdma_xprt *,
+ struct rpcrdma_mr_seg *
+ (*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool,
struct rpcrdma_mw **);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct list_head *);
- void (*ro_unmap_safe)(struct rpcrdma_xprt *,
- struct rpcrdma_req *, bool);
void (*ro_recover_mr)(struct rpcrdma_mw *);
int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *,
@@ -529,6 +543,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *);
bool fmr_is_supported(struct rpcrdma_ia *);
+extern struct workqueue_struct *rpcrdma_receive_wq;
+
/*
* Endpoint calls - xprtrdma/verbs.c
*/
@@ -551,34 +567,8 @@ struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_req *);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
-
-static inline void
-rpcrdma_insert_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
-{
- spin_lock(&buffers->rb_lock);
- if (list_empty(&req->rl_list))
- list_add_tail(&req->rl_list, &buffers->rb_pending);
- spin_unlock(&buffers->rb_lock);
-}
-
-static inline struct rpcrdma_req *
-rpcrdma_lookup_req_locked(struct rpcrdma_buffer *buffers, __be32 xid)
-{
- struct rpcrdma_req *pos;
-
- list_for_each_entry(pos, &buffers->rb_pending, rl_list)
- if (pos->rl_xid == xid)
- return pos;
- return NULL;
-}
-
-static inline void
-rpcrdma_remove_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
-{
- spin_lock(&buffers->rb_lock);
- list_del(&req->rl_list);
- spin_unlock(&buffers->rb_lock);
-}
+struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
+void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
@@ -635,12 +625,24 @@ enum rpcrdma_chunktype {
rpcrdma_replych
};
-bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *,
- u32, struct xdr_buf *, enum rpcrdma_chunktype);
-void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *);
-int rpcrdma_marshal_req(struct rpc_rqst *);
+int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_req *req, u32 hdrlen,
+ struct xdr_buf *xdr,
+ enum rpcrdma_chunktype rtype);
+void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc);
+int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
-void rpcrdma_reply_handler(struct work_struct *work);
+void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
+void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
+void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_req *req);
+void rpcrdma_deferred_completion(struct work_struct *work);
+
+static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
+{
+ xdr->head[0].iov_len = len;
+ xdr->len = len;
+}
/* RPC/RDMA module init - xprtrdma/transport.c
*/
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 4f154d388748..9cc850c2719e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/net/sunrpc/xprtsock.c
*
@@ -551,6 +552,7 @@ static int xs_local_send_request(struct rpc_task *task)
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
+ /* fall through */
case -EPIPE:
xs_close(xprt);
status = -ENOTCONN;
@@ -969,10 +971,12 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt,
return;
/* Look up and lock the request corresponding to the given XID */
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->recv_lock);
rovr = xprt_lookup_rqst(xprt, *xp);
if (!rovr)
goto out_unlock;
+ xprt_pin_rqst(rovr);
+ spin_unlock(&xprt->recv_lock);
task = rovr->rq_task;
copied = rovr->rq_private_buf.buflen;
@@ -981,13 +985,16 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt,
if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
dprintk("RPC: sk_buff copy failed\n");
- goto out_unlock;
+ spin_lock(&xprt->recv_lock);
+ goto out_unpin;
}
+ spin_lock(&xprt->recv_lock);
xprt_complete_rqst(task, copied);
-
+out_unpin:
+ xprt_unpin_rqst(rovr);
out_unlock:
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->recv_lock);
}
static void xs_local_data_receive(struct sock_xprt *transport)
@@ -1050,10 +1057,12 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
return;
/* Look up and lock the request corresponding to the given XID */
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->recv_lock);
rovr = xprt_lookup_rqst(xprt, *xp);
if (!rovr)
goto out_unlock;
+ xprt_pin_rqst(rovr);
+ spin_unlock(&xprt->recv_lock);
task = rovr->rq_task;
if ((copied = rovr->rq_private_buf.buflen) > repsize)
@@ -1062,16 +1071,21 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
/* Suck it into the iovec, verify checksum if not done by hw. */
if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
__UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
- goto out_unlock;
+ spin_lock(&xprt->recv_lock);
+ goto out_unpin;
}
__UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
+ spin_lock_bh(&xprt->transport_lock);
xprt_adjust_cwnd(xprt, task, copied);
+ spin_unlock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->recv_lock);
xprt_complete_rqst(task, copied);
-
+out_unpin:
+ xprt_unpin_rqst(rovr);
out_unlock:
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->recv_lock);
}
static void xs_udp_data_receive(struct sock_xprt *transport)
@@ -1277,25 +1291,12 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
}
len = desc->count;
- if (len > transport->tcp_reclen - transport->tcp_offset) {
- struct xdr_skb_reader my_desc;
-
- len = transport->tcp_reclen - transport->tcp_offset;
- memcpy(&my_desc, desc, sizeof(my_desc));
- my_desc.count = len;
- r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
- &my_desc, xdr_skb_read_bits);
- desc->count -= r;
- desc->offset += r;
- } else
- r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
+ if (len > transport->tcp_reclen - transport->tcp_offset)
+ desc->count = transport->tcp_reclen - transport->tcp_offset;
+ r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
desc, xdr_skb_read_bits);
- if (r > 0) {
- transport->tcp_copied += r;
- transport->tcp_offset += r;
- }
- if (r != len) {
+ if (desc->count) {
/* Error when copying to the receive buffer,
* usually because we weren't able to allocate
* additional buffer pages. All we can do now
@@ -1315,6 +1316,10 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
return;
}
+ transport->tcp_copied += r;
+ transport->tcp_offset += r;
+ desc->count = len - r;
+
dprintk("RPC: XID %08x read %zd bytes\n",
ntohl(transport->tcp_xid), r);
dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, "
@@ -1343,21 +1348,24 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid));
/* Find and lock the request corresponding to this xid */
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->recv_lock);
req = xprt_lookup_rqst(xprt, transport->tcp_xid);
if (!req) {
dprintk("RPC: XID %08x request not found!\n",
ntohl(transport->tcp_xid));
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->recv_lock);
return -1;
}
+ xprt_pin_rqst(req);
+ spin_unlock(&xprt->recv_lock);
xs_tcp_read_common(xprt, desc, req);
+ spin_lock(&xprt->recv_lock);
if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
xprt_complete_rqst(req->rq_task, transport->tcp_copied);
-
- spin_unlock_bh(&xprt->transport_lock);
+ xprt_unpin_rqst(req);
+ spin_unlock(&xprt->recv_lock);
return 0;
}
@@ -1376,11 +1384,9 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
container_of(xprt, struct sock_xprt, xprt);
struct rpc_rqst *req;
- /* Look up and lock the request corresponding to the given XID */
- spin_lock_bh(&xprt->transport_lock);
+ /* Look up the request corresponding to the given XID */
req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
if (req == NULL) {
- spin_unlock_bh(&xprt->transport_lock);
printk(KERN_WARNING "Callback slot table overflowed\n");
xprt_force_disconnect(xprt);
return -1;
@@ -1391,7 +1397,6 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
xprt_complete_bc_request(req, transport->tcp_copied);
- spin_unlock_bh(&xprt->transport_lock);
return 0;
}
@@ -1516,6 +1521,7 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
.arg.data = xprt,
};
unsigned long total = 0;
+ int loop;
int read = 0;
mutex_lock(&transport->recv_mutex);
@@ -1524,20 +1530,20 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
goto out;
/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
- for (;;) {
+ for (loop = 0; loop < 64; loop++) {
lock_sock(sk);
read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
if (read <= 0) {
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
release_sock(sk);
- if (!test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
- break;
- } else {
- release_sock(sk);
- total += read;
+ break;
}
+ release_sock(sk);
+ total += read;
rd_desc.count = 65536;
}
+ if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
+ queue_work(xprtiod_workqueue, &transport->recv_worker);
out:
mutex_unlock(&transport->recv_mutex);
trace_xs_tcp_data_ready(xprt, read, total);
@@ -1606,6 +1612,7 @@ static void xs_tcp_state_change(struct sock *sk)
xprt->connect_cookie++;
clear_bit(XPRT_CONNECTED, &xprt->state);
xs_tcp_force_close(xprt);
+ /* fall through */
case TCP_CLOSING:
/*
* If the server closed down the connection, make sure that
@@ -2199,7 +2206,7 @@ static void xs_udp_setup_socket(struct work_struct *work)
struct sock_xprt *transport =
container_of(work, struct sock_xprt, connect_worker.work);
struct rpc_xprt *xprt = &transport->xprt;
- struct socket *sock = transport->sock;
+ struct socket *sock;
int status = -EIO;
sock = xs_create_sock(xprt, transport,
@@ -2363,6 +2370,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
switch (ret) {
case 0:
xs_set_srcport(transport, sock);
+ /* fall through */
case -EINPROGRESS:
/* SYN_SENT! */
if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
@@ -2414,6 +2422,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
default:
printk("%s: connect returned unhandled error %d\n",
__func__, status);
+ /* fall through */
case -EADDRNOTAVAIL:
/* We're probably in TIME_WAIT. Get rid of existing socket,
* and retry
@@ -2724,7 +2733,7 @@ static void bc_destroy(struct rpc_xprt *xprt)
module_put(THIS_MODULE);
}
-static struct rpc_xprt_ops xs_local_ops = {
+static const struct rpc_xprt_ops xs_local_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xs_tcp_release_xprt,
.alloc_slot = xprt_alloc_slot,
@@ -2742,7 +2751,7 @@ static struct rpc_xprt_ops xs_local_ops = {
.disable_swap = xs_disable_swap,
};
-static struct rpc_xprt_ops xs_udp_ops = {
+static const struct rpc_xprt_ops xs_udp_ops = {
.set_buffer_size = xs_udp_set_buffer_size,
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong,
@@ -2764,7 +2773,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
.inject_disconnect = xs_inject_disconnect,
};
-static struct rpc_xprt_ops xs_tcp_ops = {
+static const struct rpc_xprt_ops xs_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xs_tcp_release_xprt,
.alloc_slot = xprt_lock_and_alloc_slot,
@@ -2795,7 +2804,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
* The rpc_xprt_ops for the server backchannel
*/
-static struct rpc_xprt_ops bc_tcp_ops = {
+static const struct rpc_xprt_ops bc_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xprt_release_xprt,
.alloc_slot = xprt_alloc_slot,
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 25dc67ef9d37..74b9d916a58b 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -343,10 +343,10 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj)
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_VLAN:
return sizeof(struct switchdev_obj_port_vlan);
- case SWITCHDEV_OBJ_ID_PORT_FDB:
- return sizeof(struct switchdev_obj_port_fdb);
case SWITCHDEV_OBJ_ID_PORT_MDB:
return sizeof(struct switchdev_obj_port_mdb);
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ return sizeof(struct switchdev_obj_port_mdb);
default:
BUG();
}
@@ -534,43 +534,6 @@ int switchdev_port_obj_del(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
-/**
- * switchdev_port_obj_dump - Dump port objects
- *
- * @dev: port device
- * @id: object ID
- * @obj: object to dump
- * @cb: function to call with a filled object
- *
- * rtnl_lock must be held.
- */
-int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
- switchdev_obj_dump_cb_t *cb)
-{
- const struct switchdev_ops *ops = dev->switchdev_ops;
- struct net_device *lower_dev;
- struct list_head *iter;
- int err = -EOPNOTSUPP;
-
- ASSERT_RTNL();
-
- if (ops && ops->switchdev_port_obj_dump)
- return ops->switchdev_port_obj_dump(dev, obj, cb);
-
- /* Switch device port(s) may be stacked under
- * bond/team/vlan dev, so recurse down to dump objects on
- * first port at bottom of stack.
- */
-
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = switchdev_port_obj_dump(lower_dev, obj, cb);
- break;
- }
-
- return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
-
static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
/**
@@ -613,486 +576,6 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
}
EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
-struct switchdev_vlan_dump {
- struct switchdev_obj_port_vlan vlan;
- struct sk_buff *skb;
- u32 filter_mask;
- u16 flags;
- u16 begin;
- u16 end;
-};
-
-static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump)
-{
- struct bridge_vlan_info vinfo;
-
- vinfo.flags = dump->flags;
-
- if (dump->begin == 0 && dump->end == 0) {
- return 0;
- } else if (dump->begin == dump->end) {
- vinfo.vid = dump->begin;
- if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
- sizeof(vinfo), &vinfo))
- return -EMSGSIZE;
- } else {
- vinfo.vid = dump->begin;
- vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
- if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
- sizeof(vinfo), &vinfo))
- return -EMSGSIZE;
- vinfo.vid = dump->end;
- vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
- vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
- if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
- sizeof(vinfo), &vinfo))
- return -EMSGSIZE;
- }
-
- return 0;
-}
-
-static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj)
-{
- struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
- struct switchdev_vlan_dump *dump =
- container_of(vlan, struct switchdev_vlan_dump, vlan);
- int err = 0;
-
- if (vlan->vid_begin > vlan->vid_end)
- return -EINVAL;
-
- if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
- dump->flags = vlan->flags;
- for (dump->begin = dump->end = vlan->vid_begin;
- dump->begin <= vlan->vid_end;
- dump->begin++, dump->end++) {
- err = switchdev_port_vlan_dump_put(dump);
- if (err)
- return err;
- }
- } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
- if (dump->begin > vlan->vid_begin &&
- dump->begin >= vlan->vid_end) {
- if ((dump->begin - 1) == vlan->vid_end &&
- dump->flags == vlan->flags) {
- /* prepend */
- dump->begin = vlan->vid_begin;
- } else {
- err = switchdev_port_vlan_dump_put(dump);
- dump->flags = vlan->flags;
- dump->begin = vlan->vid_begin;
- dump->end = vlan->vid_end;
- }
- } else if (dump->end <= vlan->vid_begin &&
- dump->end < vlan->vid_end) {
- if ((dump->end + 1) == vlan->vid_begin &&
- dump->flags == vlan->flags) {
- /* append */
- dump->end = vlan->vid_end;
- } else {
- err = switchdev_port_vlan_dump_put(dump);
- dump->flags = vlan->flags;
- dump->begin = vlan->vid_begin;
- dump->end = vlan->vid_end;
- }
- } else {
- err = -EINVAL;
- }
- }
-
- return err;
-}
-
-static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
- u32 filter_mask)
-{
- struct switchdev_vlan_dump dump = {
- .vlan.obj.orig_dev = dev,
- .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
- .skb = skb,
- .filter_mask = filter_mask,
- };
- int err = 0;
-
- if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
- (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
- err = switchdev_port_obj_dump(dev, &dump.vlan.obj,
- switchdev_port_vlan_dump_cb);
- if (err)
- goto err_out;
- if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
- /* last one */
- err = switchdev_port_vlan_dump_put(&dump);
- }
-
-err_out:
- return err == -EOPNOTSUPP ? 0 : err;
-}
-
-/**
- * switchdev_port_bridge_getlink - Get bridge port attributes
- *
- * @dev: port device
- *
- * Called for SELF on rtnl_bridge_getlink to get bridge port
- * attributes.
- */
-int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
- struct net_device *dev, u32 filter_mask,
- int nlflags)
-{
- struct switchdev_attr attr = {
- .orig_dev = dev,
- .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
- };
- u16 mode = BRIDGE_MODE_UNDEF;
- u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD;
- int err;
-
- if (!netif_is_bridge_port(dev))
- return -EOPNOTSUPP;
-
- err = switchdev_port_attr_get(dev, &attr);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
- attr.u.brport_flags, mask, nlflags,
- filter_mask, switchdev_port_vlan_fill);
-}
-EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
-
-static int switchdev_port_br_setflag(struct net_device *dev,
- struct nlattr *nlattr,
- unsigned long brport_flag)
-{
- struct switchdev_attr attr = {
- .orig_dev = dev,
- .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
- };
- u8 flag = nla_get_u8(nlattr);
- int err;
-
- err = switchdev_port_attr_get(dev, &attr);
- if (err)
- return err;
-
- if (flag)
- attr.u.brport_flags |= brport_flag;
- else
- attr.u.brport_flags &= ~brport_flag;
-
- return switchdev_port_attr_set(dev, &attr);
-}
-
-static const struct nla_policy
-switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
- [IFLA_BRPORT_STATE] = { .type = NLA_U8 },
- [IFLA_BRPORT_COST] = { .type = NLA_U32 },
- [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 },
- [IFLA_BRPORT_MODE] = { .type = NLA_U8 },
- [IFLA_BRPORT_GUARD] = { .type = NLA_U8 },
- [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 },
- [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 },
- [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 },
- [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 },
- [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },
-};
-
-static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
- struct nlattr *protinfo)
-{
- struct nlattr *attr;
- int rem;
- int err;
-
- err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
- switchdev_port_bridge_policy, NULL);
- if (err)
- return err;
-
- nla_for_each_nested(attr, protinfo, rem) {
- switch (nla_type(attr)) {
- case IFLA_BRPORT_LEARNING:
- err = switchdev_port_br_setflag(dev, attr,
- BR_LEARNING);
- break;
- case IFLA_BRPORT_LEARNING_SYNC:
- err = switchdev_port_br_setflag(dev, attr,
- BR_LEARNING_SYNC);
- break;
- case IFLA_BRPORT_UNICAST_FLOOD:
- err = switchdev_port_br_setflag(dev, attr, BR_FLOOD);
- break;
- default:
- err = -EOPNOTSUPP;
- break;
- }
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static int switchdev_port_br_afspec(struct net_device *dev,
- struct nlattr *afspec,
- int (*f)(struct net_device *dev,
- const struct switchdev_obj *obj))
-{
- struct nlattr *attr;
- struct bridge_vlan_info *vinfo;
- struct switchdev_obj_port_vlan vlan = {
- .obj.orig_dev = dev,
- .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
- };
- int rem;
- int err;
-
- nla_for_each_nested(attr, afspec, rem) {
- if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
- continue;
- if (nla_len(attr) != sizeof(struct bridge_vlan_info))
- return -EINVAL;
- vinfo = nla_data(attr);
- if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
- return -EINVAL;
- vlan.flags = vinfo->flags;
- if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
- if (vlan.vid_begin)
- return -EINVAL;
- vlan.vid_begin = vinfo->vid;
- /* don't allow range of pvids */
- if (vlan.flags & BRIDGE_VLAN_INFO_PVID)
- return -EINVAL;
- } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
- if (!vlan.vid_begin)
- return -EINVAL;
- vlan.vid_end = vinfo->vid;
- if (vlan.vid_end <= vlan.vid_begin)
- return -EINVAL;
- err = f(dev, &vlan.obj);
- if (err)
- return err;
- vlan.vid_begin = 0;
- } else {
- if (vlan.vid_begin)
- return -EINVAL;
- vlan.vid_begin = vinfo->vid;
- vlan.vid_end = vinfo->vid;
- err = f(dev, &vlan.obj);
- if (err)
- return err;
- vlan.vid_begin = 0;
- }
- }
-
- return 0;
-}
-
-/**
- * switchdev_port_bridge_setlink - Set bridge port attributes
- *
- * @dev: port device
- * @nlh: netlink header
- * @flags: netlink flags
- *
- * Called for SELF on rtnl_bridge_setlink to set bridge port
- * attributes.
- */
-int switchdev_port_bridge_setlink(struct net_device *dev,
- struct nlmsghdr *nlh, u16 flags)
-{
- struct nlattr *protinfo;
- struct nlattr *afspec;
- int err = 0;
-
- if (!netif_is_bridge_port(dev))
- return -EOPNOTSUPP;
-
- protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
- IFLA_PROTINFO);
- if (protinfo) {
- err = switchdev_port_br_setlink_protinfo(dev, protinfo);
- if (err)
- return err;
- }
-
- afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
- IFLA_AF_SPEC);
- if (afspec)
- err = switchdev_port_br_afspec(dev, afspec,
- switchdev_port_obj_add);
-
- return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
-
-/**
- * switchdev_port_bridge_dellink - Set bridge port attributes
- *
- * @dev: port device
- * @nlh: netlink header
- * @flags: netlink flags
- *
- * Called for SELF on rtnl_bridge_dellink to set bridge port
- * attributes.
- */
-int switchdev_port_bridge_dellink(struct net_device *dev,
- struct nlmsghdr *nlh, u16 flags)
-{
- struct nlattr *afspec;
-
- if (!netif_is_bridge_port(dev))
- return -EOPNOTSUPP;
-
- afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
- IFLA_AF_SPEC);
- if (afspec)
- return switchdev_port_br_afspec(dev, afspec,
- switchdev_port_obj_del);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
-
-/**
- * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
- *
- * @ndmsg: netlink hdr
- * @nlattr: netlink attributes
- * @dev: port device
- * @addr: MAC address to add
- * @vid: VLAN to add
- *
- * Add FDB entry to switch device.
- */
-int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev, const unsigned char *addr,
- u16 vid, u16 nlm_flags)
-{
- struct switchdev_obj_port_fdb fdb = {
- .obj.orig_dev = dev,
- .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
- .vid = vid,
- };
-
- ether_addr_copy(fdb.addr, addr);
- return switchdev_port_obj_add(dev, &fdb.obj);
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
-
-/**
- * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
- *
- * @ndmsg: netlink hdr
- * @nlattr: netlink attributes
- * @dev: port device
- * @addr: MAC address to delete
- * @vid: VLAN to delete
- *
- * Delete FDB entry from switch device.
- */
-int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev, const unsigned char *addr,
- u16 vid)
-{
- struct switchdev_obj_port_fdb fdb = {
- .obj.orig_dev = dev,
- .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
- .vid = vid,
- };
-
- ether_addr_copy(fdb.addr, addr);
- return switchdev_port_obj_del(dev, &fdb.obj);
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
-
-struct switchdev_fdb_dump {
- struct switchdev_obj_port_fdb fdb;
- struct net_device *dev;
- struct sk_buff *skb;
- struct netlink_callback *cb;
- int idx;
-};
-
-static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj)
-{
- struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
- struct switchdev_fdb_dump *dump =
- container_of(fdb, struct switchdev_fdb_dump, fdb);
- u32 portid = NETLINK_CB(dump->cb->skb).portid;
- u32 seq = dump->cb->nlh->nlmsg_seq;
- struct nlmsghdr *nlh;
- struct ndmsg *ndm;
-
- if (dump->idx < dump->cb->args[2])
- goto skip;
-
- nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
- sizeof(*ndm), NLM_F_MULTI);
- if (!nlh)
- return -EMSGSIZE;
-
- ndm = nlmsg_data(nlh);
- ndm->ndm_family = AF_BRIDGE;
- ndm->ndm_pad1 = 0;
- ndm->ndm_pad2 = 0;
- ndm->ndm_flags = NTF_SELF;
- ndm->ndm_type = 0;
- ndm->ndm_ifindex = dump->dev->ifindex;
- ndm->ndm_state = fdb->ndm_state;
-
- if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr))
- goto nla_put_failure;
-
- if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid))
- goto nla_put_failure;
-
- nlmsg_end(dump->skb, nlh);
-
-skip:
- dump->idx++;
- return 0;
-
-nla_put_failure:
- nlmsg_cancel(dump->skb, nlh);
- return -EMSGSIZE;
-}
-
-/**
- * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
- *
- * @skb: netlink skb
- * @cb: netlink callback
- * @dev: port device
- * @filter_dev: filter device
- * @idx:
- *
- * Dump FDB entries from switch device.
- */
-int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
- struct net_device *dev,
- struct net_device *filter_dev, int *idx)
-{
- struct switchdev_fdb_dump dump = {
- .fdb.obj.orig_dev = dev,
- .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
- .dev = dev,
- .skb = skb,
- .cb = cb,
- .idx = *idx,
- };
- int err;
-
- err = switchdev_port_obj_dump(dev, &dump.fdb.obj,
- switchdev_port_fdb_dump_cb);
- *idx = dump.idx;
- return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
-
bool switchdev_port_same_parent_id(struct net_device *a,
struct net_device *b)
{
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 31b9f9c52974..37bb0bfbd936 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux TIPC layer
#
@@ -8,7 +9,7 @@ tipc-y += addr.o bcast.o bearer.o \
core.o link.o discover.o msg.o \
name_distr.o subscr.o monitor.o name_table.o net.o \
netlink.o netlink_compat.o node.o socket.o eth_media.o \
- server.o socket.o
+ server.o socket.o group.o
tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o
tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 7d99029df342..329325bd553e 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -233,7 +233,7 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
struct sk_buff_head xmitq;
int rc = 0;
- __skb_queue_head_init(&xmitq);
+ skb_queue_head_init(&xmitq);
tipc_bcast_lock(net);
if (tipc_link_bc_peers(l))
rc = tipc_link_xmit(l, pkts, &xmitq);
@@ -258,20 +258,20 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts,
struct tipc_nlist *dests, u16 *cong_link_cnt)
{
+ struct tipc_dest *dst, *tmp;
struct sk_buff_head _pkts;
- struct u32_item *n, *tmp;
- u32 dst, selector;
+ u32 dnode, selector;
selector = msg_link_selector(buf_msg(skb_peek(pkts)));
- __skb_queue_head_init(&_pkts);
+ skb_queue_head_init(&_pkts);
- list_for_each_entry_safe(n, tmp, &dests->list, list) {
- dst = n->value;
- if (!tipc_msg_pskb_copy(dst, pkts, &_pkts))
+ list_for_each_entry_safe(dst, tmp, &dests->list, list) {
+ dnode = dst->node;
+ if (!tipc_msg_pskb_copy(dnode, pkts, &_pkts))
return -ENOMEM;
/* Any other return value than -ELINKCONG is ignored */
- if (tipc_node_xmit(net, &_pkts, dst, selector) == -ELINKCONG)
+ if (tipc_node_xmit(net, &_pkts, dnode, selector) == -ELINKCONG)
(*cong_link_cnt)++;
}
return 0;
@@ -554,7 +554,7 @@ void tipc_nlist_add(struct tipc_nlist *nl, u32 node)
{
if (node == nl->self)
nl->local = true;
- else if (u32_push(&nl->list, node))
+ else if (tipc_dest_push(&nl->list, node, 0))
nl->remote++;
}
@@ -562,13 +562,13 @@ void tipc_nlist_del(struct tipc_nlist *nl, u32 node)
{
if (node == nl->self)
nl->local = false;
- else if (u32_del(&nl->list, node))
+ else if (tipc_dest_del(&nl->list, node, 0))
nl->remote--;
}
void tipc_nlist_purge(struct tipc_nlist *nl)
{
- u32_list_purge(&nl->list);
+ tipc_dest_list_purge(&nl->list);
nl->remote = 0;
nl->local = 0;
}
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 767e0537dde5..47ec121574ce 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -65,6 +65,8 @@ static struct tipc_bearer *bearer_get(struct net *net, int bearer_id)
}
static void bearer_disable(struct net *net, struct tipc_bearer *b);
+static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev);
/**
* tipc_media_find - locates specified media object by name
@@ -365,30 +367,6 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
return 0;
}
-/* tipc_bearer_reset_all - reset all links on all bearers
- */
-void tipc_bearer_reset_all(struct net *net)
-{
- struct tipc_bearer *b;
- int i;
-
- for (i = 0; i < MAX_BEARERS; i++) {
- b = bearer_get(net, i);
- if (b)
- clear_bit_unlock(0, &b->up);
- }
- for (i = 0; i < MAX_BEARERS; i++) {
- b = bearer_get(net, i);
- if (b)
- tipc_reset_bearer(net, b);
- }
- for (i = 0; i < MAX_BEARERS; i++) {
- b = bearer_get(net, i);
- if (b)
- test_and_set_bit_lock(0, &b->up);
- }
-}
-
/**
* bearer_disable
*
@@ -428,6 +406,10 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
/* Associate TIPC bearer with L2 bearer */
rcu_assign_pointer(b->media_ptr, dev);
+ b->pt.dev = dev;
+ b->pt.type = htons(ETH_P_TIPC);
+ b->pt.func = tipc_l2_rcv_msg;
+ dev_add_pack(&b->pt);
memset(&b->bcast_addr, 0, sizeof(b->bcast_addr));
memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len);
b->bcast_addr.media_id = b->media->type_id;
@@ -447,6 +429,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b)
struct net_device *dev;
dev = (struct net_device *)rtnl_dereference(b->media_ptr);
+ dev_remove_pack(&b->pt);
RCU_INIT_POINTER(dev->tipc_ptr, NULL);
synchronize_net();
dev_put(dev);
@@ -594,11 +577,12 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
struct tipc_bearer *b;
rcu_read_lock();
- b = rcu_dereference_rtnl(dev->tipc_ptr);
+ b = rcu_dereference_rtnl(dev->tipc_ptr) ?:
+ rcu_dereference_rtnl(orig_dev->tipc_ptr);
if (likely(b && test_bit(0, &b->up) &&
(skb->pkt_type <= PACKET_MULTICAST))) {
skb->next = NULL;
- tipc_rcv(dev_net(dev), skb, b);
+ tipc_rcv(dev_net(b->pt.dev), skb, b);
rcu_read_unlock();
return NET_RX_SUCCESS;
}
@@ -653,17 +637,12 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
break;
case NETDEV_UNREGISTER:
case NETDEV_CHANGENAME:
- bearer_disable(dev_net(dev), b);
+ bearer_disable(net, b);
break;
}
return NOTIFY_OK;
}
-static struct packet_type tipc_packet_type __read_mostly = {
- .type = htons(ETH_P_TIPC),
- .func = tipc_l2_rcv_msg,
-};
-
static struct notifier_block notifier = {
.notifier_call = tipc_l2_device_event,
.priority = 0,
@@ -671,19 +650,12 @@ static struct notifier_block notifier = {
int tipc_bearer_setup(void)
{
- int err;
-
- err = register_netdevice_notifier(&notifier);
- if (err)
- return err;
- dev_add_pack(&tipc_packet_type);
- return 0;
+ return register_netdevice_notifier(&notifier);
}
void tipc_bearer_cleanup(void)
{
unregister_netdevice_notifier(&notifier);
- dev_remove_pack(&tipc_packet_type);
}
void tipc_bearer_stop(struct net *net)
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 635c9086e19a..42d6eeeb646d 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -131,6 +131,7 @@ struct tipc_media {
* @name: bearer name (format = media:interface)
* @media: ptr to media structure associated with bearer
* @bcast_addr: media address used in broadcasting
+ * @pt: packet type for bearer
* @rcu: rcu struct for tipc_bearer
* @priority: default link priority for bearer
* @window: default window size for bearer
@@ -151,6 +152,7 @@ struct tipc_bearer {
char name[TIPC_MAX_BEARER_NAME];
struct tipc_media *media;
struct tipc_media_addr bcast_addr;
+ struct packet_type pt;
struct rcu_head rcu;
u32 priority;
u32 window;
@@ -210,7 +212,6 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest);
struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name);
int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id);
struct tipc_media *tipc_media_find(const char *name);
-void tipc_bearer_reset_all(struct net *net);
int tipc_bearer_setup(void);
void tipc_bearer_cleanup(void);
void tipc_bearer_stop(struct net *net);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 5cc5398be722..964342689f2c 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -132,6 +132,11 @@ static inline struct list_head *tipc_nodes(struct net *net)
return &tipc_net(net)->node_list;
}
+static inline struct tipc_server *tipc_topsrv(struct net *net)
+{
+ return tipc_net(net)->topsrv;
+}
+
static inline unsigned int tipc_hashfn(u32 addr)
{
return addr & (NODE_HTABLE_SIZE - 1);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 02462d67d191..92e4828c6b09 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -224,9 +224,9 @@ void tipc_disc_remove_dest(struct tipc_link_req *req)
*
* Called whenever a link setup request timer associated with a bearer expires.
*/
-static void disc_timeout(unsigned long data)
+static void disc_timeout(struct timer_list *t)
{
- struct tipc_link_req *req = (struct tipc_link_req *)data;
+ struct tipc_link_req *req = from_timer(req, t, timer);
struct sk_buff *skb;
int max_delay;
@@ -292,7 +292,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b,
req->num_nodes = 0;
req->timer_intv = TIPC_LINK_REQ_INIT;
spin_lock_init(&req->lock);
- setup_timer(&req->timer, disc_timeout, (unsigned long)req);
+ timer_setup(&req->timer, disc_timeout, 0);
mod_timer(&req->timer, jiffies + req->timer_intv);
b->link_req = req;
*skb = skb_clone(req->buf, GFP_ATOMIC);
diff --git a/net/tipc/group.c b/net/tipc/group.c
new file mode 100644
index 000000000000..7821085a7dd8
--- /dev/null
+++ b/net/tipc/group.c
@@ -0,0 +1,871 @@
+/*
+ * net/tipc/group.c: TIPC group messaging code
+ *
+ * Copyright (c) 2017, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "addr.h"
+#include "group.h"
+#include "bcast.h"
+#include "server.h"
+#include "msg.h"
+#include "socket.h"
+#include "node.h"
+#include "name_table.h"
+#include "subscr.h"
+
+#define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1)
+#define ADV_IDLE ADV_UNIT
+#define ADV_ACTIVE (ADV_UNIT * 12)
+
+enum mbr_state {
+ MBR_QUARANTINED,
+ MBR_DISCOVERED,
+ MBR_JOINING,
+ MBR_PUBLISHED,
+ MBR_JOINED,
+ MBR_PENDING,
+ MBR_ACTIVE,
+ MBR_RECLAIMING,
+ MBR_REMITTED,
+ MBR_LEAVING
+};
+
+struct tipc_member {
+ struct rb_node tree_node;
+ struct list_head list;
+ struct list_head congested;
+ struct sk_buff *event_msg;
+ struct sk_buff_head deferredq;
+ struct tipc_group *group;
+ u32 node;
+ u32 port;
+ u32 instance;
+ enum mbr_state state;
+ u16 advertised;
+ u16 window;
+ u16 bc_rcv_nxt;
+ u16 bc_syncpt;
+ u16 bc_acked;
+ bool usr_pending;
+};
+
+struct tipc_group {
+ struct rb_root members;
+ struct list_head congested;
+ struct list_head pending;
+ struct list_head active;
+ struct list_head reclaiming;
+ struct tipc_nlist dests;
+ struct net *net;
+ int subid;
+ u32 type;
+ u32 instance;
+ u32 domain;
+ u32 scope;
+ u32 portid;
+ u16 member_cnt;
+ u16 active_cnt;
+ u16 max_active;
+ u16 bc_snd_nxt;
+ u16 bc_ackers;
+ bool loopback;
+ bool events;
+};
+
+static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
+ int mtyp, struct sk_buff_head *xmitq);
+
+static void tipc_group_decr_active(struct tipc_group *grp,
+ struct tipc_member *m)
+{
+ if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
+ grp->active_cnt--;
+}
+
+static int tipc_group_rcvbuf_limit(struct tipc_group *grp)
+{
+ int max_active, active_pool, idle_pool;
+ int mcnt = grp->member_cnt + 1;
+
+ /* Limit simultaneous reception from other members */
+ max_active = min(mcnt / 8, 64);
+ max_active = max(max_active, 16);
+ grp->max_active = max_active;
+
+ /* Reserve blocks for active and idle members */
+ active_pool = max_active * ADV_ACTIVE;
+ idle_pool = (mcnt - max_active) * ADV_IDLE;
+
+ /* Scale to bytes, considering worst-case truesize/msgsize ratio */
+ return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4;
+}
+
+u16 tipc_group_bc_snd_nxt(struct tipc_group *grp)
+{
+ return grp->bc_snd_nxt;
+}
+
+static bool tipc_group_is_enabled(struct tipc_member *m)
+{
+ return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING;
+}
+
+static bool tipc_group_is_receiver(struct tipc_member *m)
+{
+ return m && m->state >= MBR_JOINED;
+}
+
+u32 tipc_group_exclude(struct tipc_group *grp)
+{
+ if (!grp->loopback)
+ return grp->portid;
+ return 0;
+}
+
+int tipc_group_size(struct tipc_group *grp)
+{
+ return grp->member_cnt;
+}
+
+struct tipc_group *tipc_group_create(struct net *net, u32 portid,
+ struct tipc_group_req *mreq)
+{
+ struct tipc_group *grp;
+ u32 type = mreq->type;
+
+ grp = kzalloc(sizeof(*grp), GFP_ATOMIC);
+ if (!grp)
+ return NULL;
+ tipc_nlist_init(&grp->dests, tipc_own_addr(net));
+ INIT_LIST_HEAD(&grp->congested);
+ INIT_LIST_HEAD(&grp->active);
+ INIT_LIST_HEAD(&grp->pending);
+ INIT_LIST_HEAD(&grp->reclaiming);
+ grp->members = RB_ROOT;
+ grp->net = net;
+ grp->portid = portid;
+ grp->domain = addr_domain(net, mreq->scope);
+ grp->type = type;
+ grp->instance = mreq->instance;
+ grp->scope = mreq->scope;
+ grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
+ grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
+ if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid))
+ return grp;
+ kfree(grp);
+ return NULL;
+}
+
+void tipc_group_delete(struct net *net, struct tipc_group *grp)
+{
+ struct rb_root *tree = &grp->members;
+ struct tipc_member *m, *tmp;
+ struct sk_buff_head xmitq;
+
+ __skb_queue_head_init(&xmitq);
+
+ rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
+ tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq);
+ list_del(&m->list);
+ kfree(m);
+ }
+ tipc_node_distr_xmit(net, &xmitq);
+ tipc_nlist_purge(&grp->dests);
+ tipc_topsrv_kern_unsubscr(net, grp->subid);
+ kfree(grp);
+}
+
+struct tipc_member *tipc_group_find_member(struct tipc_group *grp,
+ u32 node, u32 port)
+{
+ struct rb_node *n = grp->members.rb_node;
+ u64 nkey, key = (u64)node << 32 | port;
+ struct tipc_member *m;
+
+ while (n) {
+ m = container_of(n, struct tipc_member, tree_node);
+ nkey = (u64)m->node << 32 | m->port;
+ if (key < nkey)
+ n = n->rb_left;
+ else if (key > nkey)
+ n = n->rb_right;
+ else
+ return m;
+ }
+ return NULL;
+}
+
+static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp,
+ u32 node, u32 port)
+{
+ struct tipc_member *m;
+
+ m = tipc_group_find_member(grp, node, port);
+ if (m && tipc_group_is_enabled(m))
+ return m;
+ return NULL;
+}
+
+static struct tipc_member *tipc_group_find_node(struct tipc_group *grp,
+ u32 node)
+{
+ struct tipc_member *m;
+ struct rb_node *n;
+
+ for (n = rb_first(&grp->members); n; n = rb_next(n)) {
+ m = container_of(n, struct tipc_member, tree_node);
+ if (m->node == node)
+ return m;
+ }
+ return NULL;
+}
+
+static void tipc_group_add_to_tree(struct tipc_group *grp,
+ struct tipc_member *m)
+{
+ u64 nkey, key = (u64)m->node << 32 | m->port;
+ struct rb_node **n, *parent = NULL;
+ struct tipc_member *tmp;
+
+ n = &grp->members.rb_node;
+ while (*n) {
+ tmp = container_of(*n, struct tipc_member, tree_node);
+ parent = *n;
+ tmp = container_of(parent, struct tipc_member, tree_node);
+ nkey = (u64)tmp->node << 32 | tmp->port;
+ if (key < nkey)
+ n = &(*n)->rb_left;
+ else if (key > nkey)
+ n = &(*n)->rb_right;
+ else
+ return;
+ }
+ rb_link_node(&m->tree_node, parent, n);
+ rb_insert_color(&m->tree_node, &grp->members);
+}
+
+static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
+ u32 node, u32 port,
+ int state)
+{
+ struct tipc_member *m;
+
+ m = kzalloc(sizeof(*m), GFP_ATOMIC);
+ if (!m)
+ return NULL;
+ INIT_LIST_HEAD(&m->list);
+ INIT_LIST_HEAD(&m->congested);
+ __skb_queue_head_init(&m->deferredq);
+ m->group = grp;
+ m->node = node;
+ m->port = port;
+ m->bc_acked = grp->bc_snd_nxt - 1;
+ grp->member_cnt++;
+ tipc_group_add_to_tree(grp, m);
+ tipc_nlist_add(&grp->dests, m->node);
+ m->state = state;
+ return m;
+}
+
+void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port)
+{
+ tipc_group_create_member(grp, node, port, MBR_DISCOVERED);
+}
+
+static void tipc_group_delete_member(struct tipc_group *grp,
+ struct tipc_member *m)
+{
+ rb_erase(&m->tree_node, &grp->members);
+ grp->member_cnt--;
+
+ /* Check if we were waiting for replicast ack from this member */
+ if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1))
+ grp->bc_ackers--;
+
+ list_del_init(&m->list);
+ list_del_init(&m->congested);
+ tipc_group_decr_active(grp, m);
+
+ /* If last member on a node, remove node from dest list */
+ if (!tipc_group_find_node(grp, m->node))
+ tipc_nlist_del(&grp->dests, m->node);
+
+ kfree(m);
+}
+
+struct tipc_nlist *tipc_group_dests(struct tipc_group *grp)
+{
+ return &grp->dests;
+}
+
+void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
+ int *scope)
+{
+ seq->type = grp->type;
+ seq->lower = grp->instance;
+ seq->upper = grp->instance;
+ *scope = grp->scope;
+}
+
+void tipc_group_update_member(struct tipc_member *m, int len)
+{
+ struct tipc_group *grp = m->group;
+ struct tipc_member *_m, *tmp;
+
+ if (!tipc_group_is_enabled(m))
+ return;
+
+ m->window -= len;
+
+ if (m->window >= ADV_IDLE)
+ return;
+
+ if (!list_empty(&m->congested))
+ return;
+
+ /* Sort member into congested members' list */
+ list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
+ if (m->window > _m->window)
+ continue;
+ list_add_tail(&m->congested, &_m->congested);
+ return;
+ }
+ list_add_tail(&m->congested, &grp->congested);
+}
+
+void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
+{
+ u16 prev = grp->bc_snd_nxt - 1;
+ struct tipc_member *m;
+ struct rb_node *n;
+
+ for (n = rb_first(&grp->members); n; n = rb_next(n)) {
+ m = container_of(n, struct tipc_member, tree_node);
+ if (tipc_group_is_enabled(m)) {
+ tipc_group_update_member(m, len);
+ m->bc_acked = prev;
+ }
+ }
+
+ /* Mark number of acknowledges to expect, if any */
+ if (ack)
+ grp->bc_ackers = grp->member_cnt;
+ grp->bc_snd_nxt++;
+}
+
+bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
+ int len, struct tipc_member **mbr)
+{
+ struct sk_buff_head xmitq;
+ struct tipc_member *m;
+ int adv, state;
+
+ m = tipc_group_find_dest(grp, dnode, dport);
+ *mbr = m;
+ if (!m)
+ return false;
+ if (m->usr_pending)
+ return true;
+ if (m->window >= len)
+ return false;
+ m->usr_pending = true;
+
+ /* If not fully advertised, do it now to prevent mutual blocking */
+ adv = m->advertised;
+ state = m->state;
+ if (state < MBR_JOINED)
+ return true;
+ if (state == MBR_JOINED && adv == ADV_IDLE)
+ return true;
+ if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
+ return true;
+ if (state == MBR_PENDING && adv == ADV_IDLE)
+ return true;
+ skb_queue_head_init(&xmitq);
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq);
+ tipc_node_distr_xmit(grp->net, &xmitq);
+ return true;
+}
+
+bool tipc_group_bc_cong(struct tipc_group *grp, int len)
+{
+ struct tipc_member *m = NULL;
+
+ /* If prev bcast was replicast, reject until all receivers have acked */
+ if (grp->bc_ackers)
+ return true;
+
+ if (list_empty(&grp->congested))
+ return false;
+
+ m = list_first_entry(&grp->congested, struct tipc_member, congested);
+ if (m->window >= len)
+ return false;
+
+ return tipc_group_cong(grp, m->node, m->port, len, &m);
+}
+
+/* tipc_group_sort_msg() - sort msg into queue by bcast sequence number
+ */
+static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq)
+{
+ struct tipc_msg *_hdr, *hdr = buf_msg(skb);
+ u16 bc_seqno = msg_grp_bc_seqno(hdr);
+ struct sk_buff *_skb, *tmp;
+ int mtyp = msg_type(hdr);
+
+ /* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */
+ if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) {
+ skb_queue_walk_safe(defq, _skb, tmp) {
+ _hdr = buf_msg(_skb);
+ if (!less(bc_seqno, msg_grp_bc_seqno(_hdr)))
+ continue;
+ __skb_queue_before(defq, _skb, skb);
+ return;
+ }
+ /* Bcast was not bypassed, - add to tail */
+ }
+ /* Unicasts are never bypassed, - always add to tail */
+ __skb_queue_tail(defq, skb);
+}
+
+/* tipc_group_filter_msg() - determine if we should accept arriving message
+ */
+void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb = __skb_dequeue(inputq);
+ bool ack, deliver, update, leave = false;
+ struct sk_buff_head *defq;
+ struct tipc_member *m;
+ struct tipc_msg *hdr;
+ u32 node, port;
+ int mtyp, blks;
+
+ if (!skb)
+ return;
+
+ hdr = buf_msg(skb);
+ node = msg_orignode(hdr);
+ port = msg_origport(hdr);
+
+ if (!msg_in_group(hdr))
+ goto drop;
+
+ m = tipc_group_find_member(grp, node, port);
+ if (!tipc_group_is_receiver(m))
+ goto drop;
+
+ if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
+ goto drop;
+
+ TIPC_SKB_CB(skb)->orig_member = m->instance;
+ defq = &m->deferredq;
+ tipc_group_sort_msg(skb, defq);
+
+ while ((skb = skb_peek(defq))) {
+ hdr = buf_msg(skb);
+ mtyp = msg_type(hdr);
+ deliver = true;
+ ack = false;
+ update = false;
+
+ if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
+ break;
+
+ /* Decide what to do with message */
+ switch (mtyp) {
+ case TIPC_GRP_MCAST_MSG:
+ if (msg_nameinst(hdr) != grp->instance) {
+ update = true;
+ deliver = false;
+ }
+ /* Fall thru */
+ case TIPC_GRP_BCAST_MSG:
+ m->bc_rcv_nxt++;
+ ack = msg_grp_bc_ack_req(hdr);
+ break;
+ case TIPC_GRP_UCAST_MSG:
+ break;
+ case TIPC_GRP_MEMBER_EVT:
+ if (m->state == MBR_LEAVING)
+ leave = true;
+ if (!grp->events)
+ deliver = false;
+ break;
+ default:
+ break;
+ }
+
+ /* Execute decisions */
+ __skb_dequeue(defq);
+ if (deliver)
+ __skb_queue_tail(inputq, skb);
+ else
+ kfree_skb(skb);
+
+ if (ack)
+ tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq);
+
+ if (leave) {
+ tipc_group_delete_member(grp, m);
+ __skb_queue_purge(defq);
+ break;
+ }
+ if (!update)
+ continue;
+
+ blks = msg_blocks(hdr);
+ tipc_group_update_rcv_win(grp, blks, node, port, xmitq);
+ }
+ return;
+drop:
+ kfree_skb(skb);
+}
+
+void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
+ u32 port, struct sk_buff_head *xmitq)
+{
+ struct list_head *active = &grp->active;
+ int max_active = grp->max_active;
+ int reclaim_limit = max_active * 3 / 4;
+ int active_cnt = grp->active_cnt;
+ struct tipc_member *m, *rm;
+
+ m = tipc_group_find_member(grp, node, port);
+ if (!m)
+ return;
+
+ m->advertised -= blks;
+
+ switch (m->state) {
+ case MBR_JOINED:
+ /* Reclaim advertised space from least active member */
+ if (!list_empty(active) && active_cnt >= reclaim_limit) {
+ rm = list_first_entry(active, struct tipc_member, list);
+ rm->state = MBR_RECLAIMING;
+ list_move_tail(&rm->list, &grp->reclaiming);
+ tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq);
+ }
+ /* If max active, become pending and wait for reclaimed space */
+ if (active_cnt >= max_active) {
+ m->state = MBR_PENDING;
+ list_add_tail(&m->list, &grp->pending);
+ break;
+ }
+ /* Otherwise become active */
+ m->state = MBR_ACTIVE;
+ list_add_tail(&m->list, &grp->active);
+ grp->active_cnt++;
+ /* Fall through */
+ case MBR_ACTIVE:
+ if (!list_is_last(&m->list, &grp->active))
+ list_move_tail(&m->list, &grp->active);
+ if (m->advertised > (ADV_ACTIVE * 3 / 4))
+ break;
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ break;
+ case MBR_REMITTED:
+ if (m->advertised > ADV_IDLE)
+ break;
+ m->state = MBR_JOINED;
+ if (m->advertised < ADV_IDLE) {
+ pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ }
+ break;
+ case MBR_RECLAIMING:
+ case MBR_DISCOVERED:
+ case MBR_JOINING:
+ case MBR_LEAVING:
+ default:
+ break;
+ }
+}
+
+static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
+ int mtyp, struct sk_buff_head *xmitq)
+{
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ int adv = 0;
+
+ skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0,
+ m->node, tipc_own_addr(grp->net),
+ m->port, grp->portid, 0);
+ if (!skb)
+ return;
+
+ if (m->state == MBR_ACTIVE)
+ adv = ADV_ACTIVE - m->advertised;
+ else if (m->state == MBR_JOINED || m->state == MBR_PENDING)
+ adv = ADV_IDLE - m->advertised;
+
+ hdr = buf_msg(skb);
+
+ if (mtyp == GRP_JOIN_MSG) {
+ msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
+ msg_set_adv_win(hdr, adv);
+ m->advertised += adv;
+ } else if (mtyp == GRP_LEAVE_MSG) {
+ msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
+ } else if (mtyp == GRP_ADV_MSG) {
+ msg_set_adv_win(hdr, adv);
+ m->advertised += adv;
+ } else if (mtyp == GRP_ACK_MSG) {
+ msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt);
+ } else if (mtyp == GRP_REMIT_MSG) {
+ msg_set_grp_remitted(hdr, m->window);
+ }
+ __skb_queue_tail(xmitq, skb);
+}
+
+void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
+ struct tipc_msg *hdr, struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ u32 node = msg_orignode(hdr);
+ u32 port = msg_origport(hdr);
+ struct tipc_member *m, *pm;
+ struct tipc_msg *ehdr;
+ u16 remitted, in_flight;
+
+ if (!grp)
+ return;
+
+ m = tipc_group_find_member(grp, node, port);
+
+ switch (msg_type(hdr)) {
+ case GRP_JOIN_MSG:
+ if (!m)
+ m = tipc_group_create_member(grp, node, port,
+ MBR_QUARANTINED);
+ if (!m)
+ return;
+ m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+ m->bc_rcv_nxt = m->bc_syncpt;
+ m->window += msg_adv_win(hdr);
+
+ /* Wait until PUBLISH event is received */
+ if (m->state == MBR_DISCOVERED) {
+ m->state = MBR_JOINING;
+ } else if (m->state == MBR_PUBLISHED) {
+ m->state = MBR_JOINED;
+ *usr_wakeup = true;
+ m->usr_pending = false;
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ ehdr = buf_msg(m->event_msg);
+ msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
+ __skb_queue_tail(inputq, m->event_msg);
+ }
+ if (m->window < ADV_IDLE)
+ tipc_group_update_member(m, 0);
+ else
+ list_del_init(&m->congested);
+ return;
+ case GRP_LEAVE_MSG:
+ if (!m)
+ return;
+ m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+
+ /* Wait until WITHDRAW event is received */
+ if (m->state != MBR_LEAVING) {
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
+ return;
+ }
+ /* Otherwise deliver already received WITHDRAW event */
+ ehdr = buf_msg(m->event_msg);
+ msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
+ __skb_queue_tail(inputq, m->event_msg);
+ *usr_wakeup = true;
+ list_del_init(&m->congested);
+ return;
+ case GRP_ADV_MSG:
+ if (!m)
+ return;
+ m->window += msg_adv_win(hdr);
+ *usr_wakeup = m->usr_pending;
+ m->usr_pending = false;
+ list_del_init(&m->congested);
+ return;
+ case GRP_ACK_MSG:
+ if (!m)
+ return;
+ m->bc_acked = msg_grp_bc_acked(hdr);
+ if (--grp->bc_ackers)
+ break;
+ *usr_wakeup = true;
+ m->usr_pending = false;
+ return;
+ case GRP_RECLAIM_MSG:
+ if (!m)
+ return;
+ *usr_wakeup = m->usr_pending;
+ m->usr_pending = false;
+ tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
+ m->window = ADV_IDLE;
+ return;
+ case GRP_REMIT_MSG:
+ if (!m || m->state != MBR_RECLAIMING)
+ return;
+
+ list_del_init(&m->list);
+ grp->active_cnt--;
+ remitted = msg_grp_remitted(hdr);
+
+ /* Messages preceding the REMIT still in receive queue */
+ if (m->advertised > remitted) {
+ m->state = MBR_REMITTED;
+ in_flight = m->advertised - remitted;
+ }
+ /* All messages preceding the REMIT have been read */
+ if (m->advertised <= remitted) {
+ m->state = MBR_JOINED;
+ in_flight = 0;
+ }
+ /* ..and the REMIT overtaken by more messages => re-advertise */
+ if (m->advertised < remitted)
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+
+ m->advertised = ADV_IDLE + in_flight;
+
+ /* Set oldest pending member to active and advertise */
+ if (list_empty(&grp->pending))
+ return;
+ pm = list_first_entry(&grp->pending, struct tipc_member, list);
+ pm->state = MBR_ACTIVE;
+ list_move_tail(&pm->list, &grp->active);
+ grp->active_cnt++;
+ if (pm->advertised <= (ADV_ACTIVE * 3 / 4))
+ tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
+ return;
+ default:
+ pr_warn("Received unknown GROUP_PROTO message\n");
+ }
+}
+
+/* tipc_group_member_evt() - receive and handle a member up/down event
+ */
+void tipc_group_member_evt(struct tipc_group *grp,
+ bool *usr_wakeup,
+ int *sk_rcvbuf,
+ struct sk_buff *skb,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct tipc_event *evt = (void *)msg_data(hdr);
+ u32 instance = evt->found_lower;
+ u32 node = evt->port.node;
+ u32 port = evt->port.ref;
+ int event = evt->event;
+ struct tipc_member *m;
+ struct net *net;
+ bool node_up;
+ u32 self;
+
+ if (!grp)
+ goto drop;
+
+ net = grp->net;
+ self = tipc_own_addr(net);
+ if (!grp->loopback && node == self && port == grp->portid)
+ goto drop;
+
+ /* Convert message before delivery to user */
+ msg_set_hdr_sz(hdr, GROUP_H_SIZE);
+ msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE);
+ msg_set_type(hdr, TIPC_GRP_MEMBER_EVT);
+ msg_set_origport(hdr, port);
+ msg_set_orignode(hdr, node);
+ msg_set_nametype(hdr, grp->type);
+ msg_set_grp_evt(hdr, event);
+
+ m = tipc_group_find_member(grp, node, port);
+
+ if (event == TIPC_PUBLISHED) {
+ if (!m)
+ m = tipc_group_create_member(grp, node, port,
+ MBR_DISCOVERED);
+ if (!m)
+ goto drop;
+
+ /* Hold back event if JOIN message not yet received */
+ if (m->state == MBR_DISCOVERED) {
+ m->event_msg = skb;
+ m->state = MBR_PUBLISHED;
+ } else {
+ msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
+ __skb_queue_tail(inputq, skb);
+ m->state = MBR_JOINED;
+ *usr_wakeup = true;
+ m->usr_pending = false;
+ }
+ m->instance = instance;
+ TIPC_SKB_CB(skb)->orig_member = m->instance;
+ tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
+ if (m->window < ADV_IDLE)
+ tipc_group_update_member(m, 0);
+ else
+ list_del_init(&m->congested);
+ } else if (event == TIPC_WITHDRAWN) {
+ if (!m)
+ goto drop;
+
+ TIPC_SKB_CB(skb)->orig_member = m->instance;
+
+ *usr_wakeup = true;
+ m->usr_pending = false;
+ node_up = tipc_node_is_up(net, node);
+
+ /* Hold back event if more messages might be expected */
+ if (m->state != MBR_LEAVING && node_up) {
+ m->event_msg = skb;
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
+ } else {
+ if (node_up)
+ msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
+ else
+ msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
+ __skb_queue_tail(inputq, skb);
+ }
+ list_del_init(&m->congested);
+ }
+ *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
+ return;
+drop:
+ kfree_skb(skb);
+}
diff --git a/net/tipc/group.h b/net/tipc/group.h
new file mode 100644
index 000000000000..d525e1cd7de5
--- /dev/null
+++ b/net/tipc/group.h
@@ -0,0 +1,73 @@
+/*
+ * net/tipc/group.h: Include file for TIPC group unicast/multicast functions
+ *
+ * Copyright (c) 2017, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_GROUP_H
+#define _TIPC_GROUP_H
+
+#include "core.h"
+
+struct tipc_group;
+struct tipc_member;
+struct tipc_msg;
+
+struct tipc_group *tipc_group_create(struct net *net, u32 portid,
+ struct tipc_group_req *mreq);
+void tipc_group_delete(struct net *net, struct tipc_group *grp);
+void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port);
+struct tipc_nlist *tipc_group_dests(struct tipc_group *grp);
+void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
+ int *scope);
+u32 tipc_group_exclude(struct tipc_group *grp);
+void tipc_group_filter_msg(struct tipc_group *grp,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq);
+void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup,
+ int *sk_rcvbuf, struct sk_buff *skb,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq);
+void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup,
+ struct tipc_msg *hdr,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq);
+void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack);
+bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
+ int len, struct tipc_member **m);
+bool tipc_group_bc_cong(struct tipc_group *grp, int len);
+void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
+ u32 port, struct sk_buff_head *xmitq);
+u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
+void tipc_group_update_member(struct tipc_member *m, int len);
+int tipc_group_size(struct tipc_group *grp);
+#endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 60820dc35a08..6bce0b1117bd 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -239,7 +239,8 @@ static int link_is_up(struct tipc_link *l)
static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *xmitq);
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
- u16 rcvgap, int tolerance, int priority,
+ bool probe_reply, u16 rcvgap,
+ int tolerance, int priority,
struct sk_buff_head *xmitq);
static void link_print(struct tipc_link *l, const char *str);
static int tipc_link_build_nack_msg(struct tipc_link *l,
@@ -773,7 +774,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
}
if (state || probe || setup)
- tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq);
return rc;
}
@@ -978,15 +979,15 @@ static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb)
struct tipc_msg *hdr = buf_msg(skb);
pr_warn("Retransmission failure on link <%s>\n", l->name);
- link_print(l, "Resetting link ");
+ link_print(l, "State of link ");
pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
pr_info("sqno %u, prev: %x, src: %x\n",
msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
}
-int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to,
- struct sk_buff_head *xmitq)
+int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker,
+ u16 from, u16 to, struct sk_buff_head *xmitq)
{
struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
struct tipc_msg *hdr;
@@ -997,11 +998,14 @@ int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to,
return 0;
/* Detect repeated retransmit failures on same packet */
- if (likely(l->last_retransm != buf_seqno(skb))) {
- l->last_retransm = buf_seqno(skb);
- l->stale_count = 1;
- } else if (++l->stale_count > 100) {
+ if (nacker->last_retransm != buf_seqno(skb)) {
+ nacker->last_retransm = buf_seqno(skb);
+ nacker->stale_count = 1;
+ } else if (++nacker->stale_count > 100) {
link_retransmit_failure(l, skb);
+ nacker->stale_count = 0;
+ if (link_is_bc_sndlink(l))
+ return TIPC_LINK_DOWN_EVT;
return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
}
@@ -1036,6 +1040,7 @@ int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to,
static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *inputq)
{
+ struct sk_buff_head *mc_inputq = l->bc_rcvlink->inputq;
struct tipc_msg *hdr = buf_msg(skb);
switch (msg_user(hdr)) {
@@ -1043,13 +1048,16 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
case TIPC_MEDIUM_IMPORTANCE:
case TIPC_HIGH_IMPORTANCE:
case TIPC_CRITICAL_IMPORTANCE:
- if (unlikely(msg_type(hdr) == TIPC_MCAST_MSG)) {
- skb_queue_tail(l->bc_rcvlink->inputq, skb);
+ if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) {
+ skb_queue_tail(mc_inputq, skb);
return true;
}
case CONN_MANAGER:
skb_queue_tail(inputq, skb);
return true;
+ case GROUP_PROTOCOL:
+ skb_queue_tail(mc_inputq, skb);
+ return true;
case NAME_DISTRIBUTOR:
l->bc_rcvlink->state = LINK_ESTABLISHED;
skb_queue_tail(l->namedq, skb);
@@ -1167,7 +1175,7 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
/* Unicast ACK */
l->rcv_unacked = 0;
l->stats.sent_acks++;
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
return 0;
}
@@ -1181,7 +1189,7 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
if (l->state == LINK_ESTABLISHING)
mtyp = ACTIVATE_MSG;
- tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq);
/* Inform peer that this endpoint is going down if applicable */
skb = skb_peek_tail(xmitq);
@@ -1208,7 +1216,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l,
}
if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
return 0;
}
@@ -1282,7 +1290,8 @@ drop:
}
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
- u16 rcvgap, int tolerance, int priority,
+ bool probe_reply, u16 rcvgap,
+ int tolerance, int priority,
struct sk_buff_head *xmitq)
{
struct tipc_link *bcl = l->bc_rcvlink;
@@ -1330,6 +1339,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
msg_set_seq_gap(hdr, rcvgap);
msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
msg_set_probe(hdr, probe);
+ msg_set_is_keepalive(hdr, probe || probe_reply);
tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
msg_set_size(hdr, INT_H_SIZE + dlen);
skb_trim(skb, INT_H_SIZE + dlen);
@@ -1435,6 +1445,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
u16 rcv_nxt = l->rcv_nxt;
u16 dlen = msg_data_sz(hdr);
int mtyp = msg_type(hdr);
+ bool reply = msg_probe(hdr);
void *data;
char *if_name;
int rc = 0;
@@ -1521,14 +1532,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
/* Send NACK if peer has sent pkts we haven't received yet */
if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
rcvgap = peers_snd_nxt - l->rcv_nxt;
- if (rcvgap || (msg_probe(hdr)))
- tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap,
- 0, 0, xmitq);
+ if (rcvgap || reply)
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
+ rcvgap, 0, 0, xmitq);
tipc_link_release_pkts(l, ack);
/* If NACK, retransmit will now start at right position */
if (gap) {
- rc = tipc_link_retrans(l, ack + 1, ack + gap, xmitq);
+ rc = tipc_link_retrans(l, l, ack + 1, ack + gap, xmitq);
l->stats.recv_nacks++;
}
@@ -1680,7 +1691,7 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
return rc;
if (link_bc_retr_eval(snd_l, &from, &to))
- rc = tipc_link_retrans(snd_l, from, to, xmitq);
+ rc = tipc_link_retrans(snd_l, l, from, to, xmitq);
l->snd_nxt = peers_snd_nxt;
if (link_bc_rcv_gap(l))
@@ -1775,7 +1786,7 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
if (dnode == tipc_own_addr(l->net)) {
tipc_link_bc_ack_rcv(l, acked, xmitq);
- rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq);
+ rc = tipc_link_retrans(l->bc_sndlink, l, from, to, xmitq);
l->stats.recv_nacks++;
return rc;
}
@@ -2115,14 +2126,14 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
struct sk_buff_head *xmitq)
{
l->tolerance = tol;
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
}
void tipc_link_set_prio(struct tipc_link *l, u32 prio,
struct sk_buff_head *xmitq)
{
l->priority = prio;
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq);
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq);
}
void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 9e109bb1a207..8e884ed06d4b 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -530,8 +530,11 @@ void tipc_mon_prep(struct net *net, void *data, int *dlen,
u16 gen = mon->dom_gen;
u16 len;
- if (!tipc_mon_is_active(net, mon))
+ /* Send invalid record if not active */
+ if (!tipc_mon_is_active(net, mon)) {
+ dom->len = 0;
return;
+ }
/* Send only a dummy record with ack if peer has acked our last sent */
if (likely(state->acked_gen == gen)) {
@@ -559,6 +562,12 @@ void tipc_mon_get_state(struct net *net, u32 addr,
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
struct tipc_peer *peer;
+ if (!tipc_mon_is_active(net, mon)) {
+ state->probing = false;
+ state->monitoring = true;
+ return;
+ }
+
/* Used cached state if table has not changed */
if (!state->probing &&
(state->list_gen == mon->list_gen) &&
@@ -578,9 +587,9 @@ void tipc_mon_get_state(struct net *net, u32 addr,
read_unlock_bh(&mon->lock);
}
-static void mon_timeout(unsigned long m)
+static void mon_timeout(struct timer_list *t)
{
- struct tipc_monitor *mon = (void *)m;
+ struct tipc_monitor *mon = from_timer(mon, t, timer);
struct tipc_peer *self;
int best_member_cnt = dom_size(mon->peer_cnt) - 1;
@@ -623,7 +632,7 @@ int tipc_mon_create(struct net *net, int bearer_id)
self->is_up = true;
self->is_head = true;
INIT_LIST_HEAD(&self->list);
- setup_timer(&mon->timer, mon_timeout, (unsigned long)mon);
+ timer_setup(&mon->timer, mon_timeout, 0);
mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
mod_timer(&mon->timer, jiffies + mon->timer_intv);
return 0;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index dcd90e6fa7c3..b0d07b35909d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -174,7 +174,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (fragid == LAST_FRAGMENT) {
TIPC_SKB_CB(head)->validated = false;
- if (unlikely(!tipc_msg_validate(head)))
+ if (unlikely(!tipc_msg_validate(&head)))
goto err;
*buf = head;
TIPC_SKB_CB(head)->tail = NULL;
@@ -201,11 +201,21 @@ err:
* TIPC will ignore the excess, under the assumption that it is optional info
* introduced by a later release of the protocol.
*/
-bool tipc_msg_validate(struct sk_buff *skb)
+bool tipc_msg_validate(struct sk_buff **_skb)
{
- struct tipc_msg *msg;
+ struct sk_buff *skb = *_skb;
+ struct tipc_msg *hdr;
int msz, hsz;
+ /* Ensure that flow control ratio condition is satisfied */
+ if (unlikely(skb->truesize / buf_roundup_len(skb) > 4)) {
+ skb = skb_copy(skb, GFP_ATOMIC);
+ if (!skb)
+ return false;
+ kfree_skb(*_skb);
+ *_skb = skb;
+ }
+
if (unlikely(TIPC_SKB_CB(skb)->validated))
return true;
if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE)))
@@ -217,11 +227,11 @@ bool tipc_msg_validate(struct sk_buff *skb)
if (unlikely(!pskb_may_pull(skb, hsz)))
return false;
- msg = buf_msg(skb);
- if (unlikely(msg_version(msg) != TIPC_VERSION))
+ hdr = buf_msg(skb);
+ if (unlikely(msg_version(hdr) != TIPC_VERSION))
return false;
- msz = msg_size(msg);
+ msz = msg_size(hdr);
if (unlikely(msz < hsz))
return false;
if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE))
@@ -411,7 +421,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
skb_pull(*iskb, offset);
imsz = msg_size(buf_msg(*iskb));
skb_trim(*iskb, imsz);
- if (unlikely(!tipc_msg_validate(*iskb)))
+ if (unlikely(!tipc_msg_validate(iskb)))
goto none;
*pos += align(imsz);
return true;
@@ -479,13 +489,14 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err)
{
struct sk_buff *_skb = *skb;
- struct tipc_msg *hdr = buf_msg(_skb);
+ struct tipc_msg *hdr;
struct tipc_msg ohdr;
- int dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE);
+ int dlen;
if (skb_linearize(_skb))
goto exit;
hdr = buf_msg(_skb);
+ dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE);
if (msg_dest_droppable(hdr))
goto exit;
if (msg_errcode(hdr))
@@ -511,6 +522,8 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err)
pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
goto exit;
+ /* reassign after skb header modifications */
+ hdr = buf_msg(_skb);
/* Now reverse the concerned fields */
msg_set_errcode(hdr, err);
msg_set_non_seq(hdr, 0);
@@ -548,7 +561,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
return false;
if (msg_errcode(msg))
return false;
- *err = -TIPC_ERR_NO_NAME;
+ *err = TIPC_ERR_NO_NAME;
if (skb_linearize(skb))
return false;
msg = buf_msg(skb);
@@ -565,6 +578,14 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
msg_set_destnode(msg, dnode);
msg_set_destport(msg, dport);
*err = TIPC_OK;
+
+ if (!skb_cloned(skb))
+ return true;
+
+ /* Unclone buffer in case it was bundled */
+ if (pskb_expand_head(skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
+ return false;
+
return true;
}
@@ -655,3 +676,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
}
kfree_skb(skb);
}
+
+void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
+{
+ if (tipc_msg_reverse(tipc_own_addr(net), &skb, err))
+ __skb_queue_tail(xmitq, skb);
+}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index c843fd2bc48d..3e4384c222f7 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1,7 +1,7 @@
/*
* net/tipc/msg.h: Include file for TIPC message header routines
*
- * Copyright (c) 2000-2007, 2014-2015 Ericsson AB
+ * Copyright (c) 2000-2007, 2014-2017 Ericsson AB
* Copyright (c) 2005-2008, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -61,10 +61,14 @@ struct plist;
/*
* Payload message types
*/
-#define TIPC_CONN_MSG 0
-#define TIPC_MCAST_MSG 1
-#define TIPC_NAMED_MSG 2
-#define TIPC_DIRECT_MSG 3
+#define TIPC_CONN_MSG 0
+#define TIPC_MCAST_MSG 1
+#define TIPC_NAMED_MSG 2
+#define TIPC_DIRECT_MSG 3
+#define TIPC_GRP_MEMBER_EVT 4
+#define TIPC_GRP_BCAST_MSG 5
+#define TIPC_GRP_MCAST_MSG 6
+#define TIPC_GRP_UCAST_MSG 7
/*
* Internal message users
@@ -73,11 +77,13 @@ struct plist;
#define MSG_BUNDLER 6
#define LINK_PROTOCOL 7
#define CONN_MANAGER 8
+#define GROUP_PROTOCOL 9
#define TUNNEL_PROTOCOL 10
#define NAME_DISTRIBUTOR 11
#define MSG_FRAGMENTER 12
#define LINK_CONFIG 13
#define SOCK_WAKEUP 14 /* pseudo user */
+#define TOP_SRV 15 /* pseudo user */
/*
* Message header sizes
@@ -86,6 +92,7 @@ struct plist;
#define BASIC_H_SIZE 32 /* Basic payload message */
#define NAMED_H_SIZE 40 /* Named payload message */
#define MCAST_H_SIZE 44 /* Multicast payload message */
+#define GROUP_H_SIZE 44 /* Group payload message */
#define INT_H_SIZE 40 /* Internal messages */
#define MIN_H_SIZE 24 /* Smallest legal TIPC header size */
#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
@@ -96,6 +103,7 @@ struct plist;
struct tipc_skb_cb {
u32 bytes_read;
+ u32 orig_member;
struct sk_buff *tail;
bool validated;
u16 chain_imp;
@@ -188,6 +196,11 @@ static inline u32 msg_size(struct tipc_msg *m)
return msg_bits(m, 0, 0, 0x1ffff);
}
+static inline u32 msg_blocks(struct tipc_msg *m)
+{
+ return (msg_size(m) / 1024) + 1;
+}
+
static inline u32 msg_data_sz(struct tipc_msg *m)
{
return msg_size(m) - msg_hdr_sz(m);
@@ -213,6 +226,16 @@ static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d)
msg_set_bits(m, 0, 19, 1, d);
}
+static inline int msg_is_keepalive(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 19, 1);
+}
+
+static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d)
+{
+ msg_set_bits(m, 0, 19, 1, d);
+}
+
static inline int msg_src_droppable(struct tipc_msg *m)
{
return msg_bits(m, 0, 18, 1);
@@ -251,6 +274,18 @@ static inline void msg_set_type(struct tipc_msg *m, u32 n)
msg_set_bits(m, 1, 29, 0x7, n);
}
+static inline int msg_in_group(struct tipc_msg *m)
+{
+ int mtyp = msg_type(m);
+
+ return mtyp >= TIPC_GRP_MEMBER_EVT && mtyp <= TIPC_GRP_UCAST_MSG;
+}
+
+static inline bool msg_is_grp_evt(struct tipc_msg *m)
+{
+ return msg_type(m) == TIPC_GRP_MEMBER_EVT;
+}
+
static inline u32 msg_named(struct tipc_msg *m)
{
return msg_type(m) == TIPC_NAMED_MSG;
@@ -258,7 +293,10 @@ static inline u32 msg_named(struct tipc_msg *m)
static inline u32 msg_mcast(struct tipc_msg *m)
{
- return msg_type(m) == TIPC_MCAST_MSG;
+ int mtyp = msg_type(m);
+
+ return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG) ||
+ (mtyp == TIPC_GRP_MCAST_MSG));
}
static inline u32 msg_connected(struct tipc_msg *m)
@@ -514,6 +552,16 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
#define DSC_RESP_MSG 1
/*
+ * Group protocol message types
+ */
+#define GRP_JOIN_MSG 0
+#define GRP_LEAVE_MSG 1
+#define GRP_ADV_MSG 2
+#define GRP_ACK_MSG 3
+#define GRP_RECLAIM_MSG 4
+#define GRP_REMIT_MSG 5
+
+/*
* Word 1
*/
static inline u32 msg_seq_gap(struct tipc_msg *m)
@@ -764,12 +812,12 @@ static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n)
msg_set_bits(m, 9, 16, 0xffff, n);
}
-static inline u32 msg_adv_win(struct tipc_msg *m)
+static inline u16 msg_adv_win(struct tipc_msg *m)
{
return msg_bits(m, 9, 0, 0xffff);
}
-static inline void msg_set_adv_win(struct tipc_msg *m, u32 n)
+static inline void msg_set_adv_win(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 9, 0, 0xffff, n);
}
@@ -794,6 +842,68 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
msg_set_bits(m, 9, 0, 0xffff, n);
}
+static inline u16 msg_grp_bc_syncpt(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+static inline u16 msg_grp_bc_acked(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+static inline u16 msg_grp_remitted(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_remitted(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+/* Word 10
+ */
+static inline u16 msg_grp_evt(struct tipc_msg *m)
+{
+ return msg_bits(m, 10, 0, 0x3);
+}
+
+static inline void msg_set_grp_evt(struct tipc_msg *m, int n)
+{
+ msg_set_bits(m, 10, 0, 0x3, n);
+}
+
+static inline u16 msg_grp_bc_ack_req(struct tipc_msg *m)
+{
+ return msg_bits(m, 10, 0, 0x1);
+}
+
+static inline void msg_set_grp_bc_ack_req(struct tipc_msg *m, bool n)
+{
+ msg_set_bits(m, 10, 0, 0x1, n);
+}
+
+static inline u16 msg_grp_bc_seqno(struct tipc_msg *m)
+{
+ return msg_bits(m, 10, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_seqno(struct tipc_msg *m, u32 n)
+{
+ msg_set_bits(m, 10, 16, 0xffff, n);
+}
+
static inline bool msg_peer_link_is_up(struct tipc_msg *m)
{
if (likely(msg_user(m) != LINK_PROTOCOL))
@@ -816,8 +926,10 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
}
struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
-bool tipc_msg_validate(struct sk_buff *skb);
+bool tipc_msg_validate(struct sk_buff **_skb);
bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
+void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
+ struct sk_buff_head *xmitq);
void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type,
u32 hsize, u32 destnode);
struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
@@ -842,6 +954,11 @@ static inline u16 buf_seqno(struct sk_buff *skb)
return msg_seqno(buf_msg(skb));
}
+static inline int buf_roundup_len(struct sk_buff *skb)
+{
+ return (skb->len / 1024 + 1) * 1024;
+}
+
/* tipc_skb_peek(): peek and reserve first buffer in list
* @list: list to be peeked in
* Returns pointer to first buffer in list, if any
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index bd0aac87b41a..b3829bcf63c7 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -43,6 +43,7 @@
#include "bcast.h"
#include "addr.h"
#include "node.h"
+#include "group.h"
#include <net/genetlink.h>
#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
@@ -596,18 +597,47 @@ not_found:
return ref;
}
-/**
- * tipc_nametbl_mc_translate - find multicast destinations
- *
- * Creates list of all local ports that overlap the given multicast address;
- * also determines if any off-node ports overlap.
- *
- * Note: Publications with a scope narrower than 'limit' are ignored.
- * (i.e. local node-scope publications mustn't receive messages arriving
- * from another node, even if the multcast link brought it here)
- *
- * Returns non-zero if any off-node ports overlap
- */
+bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
+ struct list_head *dsts, int *dstcnt, u32 exclude,
+ bool all)
+{
+ u32 self = tipc_own_addr(net);
+ struct publication *publ;
+ struct name_info *info;
+ struct name_seq *seq;
+ struct sub_seq *sseq;
+
+ if (!tipc_in_scope(domain, self))
+ return false;
+
+ *dstcnt = 0;
+ rcu_read_lock();
+ seq = nametbl_find_seq(net, type);
+ if (unlikely(!seq))
+ goto exit;
+ spin_lock_bh(&seq->lock);
+ sseq = nameseq_find_subseq(seq, instance);
+ if (likely(sseq)) {
+ info = sseq->info;
+ list_for_each_entry(publ, &info->zone_list, zone_list) {
+ if (!tipc_in_scope(domain, publ->node))
+ continue;
+ if (publ->ref == exclude && publ->node == self)
+ continue;
+ tipc_dest_push(dsts, publ->node, publ->ref);
+ (*dstcnt)++;
+ if (all)
+ continue;
+ list_move_tail(&publ->zone_list, &info->zone_list);
+ break;
+ }
+ }
+ spin_unlock_bh(&seq->lock);
+exit:
+ rcu_read_unlock();
+ return !list_empty(dsts);
+}
+
int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
u32 limit, struct list_head *dports)
{
@@ -634,7 +664,7 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
info = sseq->info;
list_for_each_entry(publ, &info->node_list, node_list) {
if (publ->scope <= limit)
- u32_push(dports, publ->ref);
+ tipc_dest_push(dports, 0, publ->ref);
}
if (info->cluster_list_size != info->node_list_size)
@@ -667,7 +697,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
spin_lock_bh(&seq->lock);
sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
stop = seq->sseqs + seq->first_free;
- for (; sseq->lower <= upper && sseq != stop; sseq++) {
+ for (; sseq != stop && sseq->lower <= upper; sseq++) {
info = sseq->info;
list_for_each_entry(publ, &info->zone_list, zone_list) {
if (tipc_in_scope(domain, publ->node))
@@ -679,6 +709,37 @@ exit:
rcu_read_unlock();
}
+/* tipc_nametbl_build_group - build list of communication group members
+ */
+void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
+ u32 type, u32 domain)
+{
+ struct sub_seq *sseq, *stop;
+ struct name_info *info;
+ struct publication *p;
+ struct name_seq *seq;
+
+ rcu_read_lock();
+ seq = nametbl_find_seq(net, type);
+ if (!seq)
+ goto exit;
+
+ spin_lock_bh(&seq->lock);
+ sseq = seq->sseqs;
+ stop = seq->sseqs + seq->first_free;
+ for (; sseq != stop; sseq++) {
+ info = sseq->info;
+ list_for_each_entry(p, &info->zone_list, zone_list) {
+ if (!tipc_in_scope(domain, p->node))
+ continue;
+ tipc_group_add_member(grp, p->node, p->ref);
+ }
+ }
+ spin_unlock_bh(&seq->lock);
+exit:
+ rcu_read_unlock();
+}
+
/*
* tipc_nametbl_publish - add name publication to network name tables
*/
@@ -1057,78 +1118,79 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-bool u32_find(struct list_head *l, u32 value)
+struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port)
{
- struct u32_item *item;
+ u64 value = (u64)node << 32 | port;
+ struct tipc_dest *dst;
- list_for_each_entry(item, l, list) {
- if (item->value == value)
- return true;
+ list_for_each_entry(dst, l, list) {
+ if (dst->value != value)
+ continue;
+ return dst;
}
- return false;
+ return NULL;
}
-bool u32_push(struct list_head *l, u32 value)
+bool tipc_dest_push(struct list_head *l, u32 node, u32 port)
{
- struct u32_item *item;
+ u64 value = (u64)node << 32 | port;
+ struct tipc_dest *dst;
- list_for_each_entry(item, l, list) {
- if (item->value == value)
- return false;
- }
- item = kmalloc(sizeof(*item), GFP_ATOMIC);
- if (unlikely(!item))
+ if (tipc_dest_find(l, node, port))
return false;
- item->value = value;
- list_add(&item->list, l);
+ dst = kmalloc(sizeof(*dst), GFP_ATOMIC);
+ if (unlikely(!dst))
+ return false;
+ dst->value = value;
+ list_add(&dst->list, l);
return true;
}
-u32 u32_pop(struct list_head *l)
+bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port)
{
- struct u32_item *item;
- u32 value = 0;
+ struct tipc_dest *dst;
if (list_empty(l))
- return 0;
- item = list_first_entry(l, typeof(*item), list);
- value = item->value;
- list_del(&item->list);
- kfree(item);
- return value;
+ return false;
+ dst = list_first_entry(l, typeof(*dst), list);
+ if (port)
+ *port = dst->port;
+ if (node)
+ *node = dst->node;
+ list_del(&dst->list);
+ kfree(dst);
+ return true;
}
-bool u32_del(struct list_head *l, u32 value)
+bool tipc_dest_del(struct list_head *l, u32 node, u32 port)
{
- struct u32_item *item, *tmp;
+ struct tipc_dest *dst;
- list_for_each_entry_safe(item, tmp, l, list) {
- if (item->value != value)
- continue;
- list_del(&item->list);
- kfree(item);
- return true;
- }
- return false;
+ dst = tipc_dest_find(l, node, port);
+ if (!dst)
+ return false;
+ list_del(&dst->list);
+ kfree(dst);
+ return true;
}
-void u32_list_purge(struct list_head *l)
+void tipc_dest_list_purge(struct list_head *l)
{
- struct u32_item *item, *tmp;
+ struct tipc_dest *dst, *tmp;
- list_for_each_entry_safe(item, tmp, l, list) {
- list_del(&item->list);
- kfree(item);
+ list_for_each_entry_safe(dst, tmp, l, list) {
+ list_del(&dst->list);
+ kfree(dst);
}
}
-int u32_list_len(struct list_head *l)
+int tipc_dest_list_len(struct list_head *l)
{
- struct u32_item *item;
+ struct tipc_dest *dst;
int i = 0;
- list_for_each_entry(item, l, list) {
+ list_for_each_entry(dst, l, list) {
i++;
}
return i;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 6ebdeb1d84a5..71926e429446 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -40,6 +40,7 @@
struct tipc_subscription;
struct tipc_plist;
struct tipc_nlist;
+struct tipc_group;
/*
* TIPC name types reserved for internal TIPC use (both current and planned)
@@ -101,9 +102,14 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
u32 limit, struct list_head *dports);
+void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
+ u32 type, u32 domain);
void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
u32 upper, u32 domain,
struct tipc_nlist *nodes);
+bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
+ struct list_head *dsts, int *dstcnt, u32 exclude,
+ bool all);
struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
u32 upper, u32 scope, u32 port_ref,
u32 key);
@@ -120,16 +126,22 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
int tipc_nametbl_init(struct net *net);
void tipc_nametbl_stop(struct net *net);
-struct u32_item {
+struct tipc_dest {
struct list_head list;
- u32 value;
+ union {
+ struct {
+ u32 port;
+ u32 node;
+ };
+ u64 value;
+ };
};
-bool u32_push(struct list_head *l, u32 value);
-u32 u32_pop(struct list_head *l);
-bool u32_find(struct list_head *l, u32 value);
-bool u32_del(struct list_head *l, u32 value);
-void u32_list_purge(struct list_head *l);
-int u32_list_len(struct list_head *l);
+struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port);
+bool tipc_dest_push(struct list_head *l, u32 node, u32 port);
+bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port);
+bool tipc_dest_del(struct list_head *l, u32 node, u32 port);
+void tipc_dest_list_purge(struct list_head *l);
+int tipc_dest_list_len(struct list_head *l);
#endif
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 9bfe886ab330..e48f0b2c01b9 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -258,13 +258,15 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
arg = nlmsg_new(0, GFP_KERNEL);
if (!arg) {
kfree_skb(msg->rep);
+ msg->rep = NULL;
return -ENOMEM;
}
err = __tipc_nl_compat_dumpit(cmd, msg, arg);
- if (err)
+ if (err) {
kfree_skb(msg->rep);
-
+ msg->rep = NULL;
+ }
kfree_skb(arg);
return err;
@@ -1215,7 +1217,7 @@ send:
return err;
}
-static struct genl_ops tipc_genl_compat_ops[] = {
+static const struct genl_ops tipc_genl_compat_ops[] = {
{
.cmd = TIPC_GENL_CMD,
.doit = tipc_nl_compat_recv,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9b4dcb6a16b5..507017fe0f1b 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -153,11 +153,11 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id,
bool delete);
static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq);
static void tipc_node_delete(struct tipc_node *node);
-static void tipc_node_timeout(unsigned long data);
+static void tipc_node_timeout(struct timer_list *t);
static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
static void tipc_node_put(struct tipc_node *node);
-static bool tipc_node_is_up(struct tipc_node *n);
+static bool node_is_up(struct tipc_node *n);
struct tipc_sock_conn {
u32 port;
@@ -361,7 +361,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
goto exit;
}
tipc_node_get(n);
- setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n);
+ timer_setup(&n->timer, tipc_node_timeout, 0);
n->keepalive_intv = U32_MAX;
hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
@@ -500,9 +500,9 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
/* tipc_node_timeout - handle expiration of node timer
*/
-static void tipc_node_timeout(unsigned long data)
+static void tipc_node_timeout(struct timer_list *t)
{
- struct tipc_node *n = (struct tipc_node *)data;
+ struct tipc_node *n = from_timer(n, t, timer);
struct tipc_link_entry *le;
struct sk_buff_head xmitq;
int bearer_id;
@@ -657,7 +657,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
*slot1 = i;
}
- if (!tipc_node_is_up(n)) {
+ if (!node_is_up(n)) {
if (tipc_link_peer_is_down(l))
tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT);
@@ -717,11 +717,27 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
tipc_sk_rcv(n->net, &le->inputq);
}
-static bool tipc_node_is_up(struct tipc_node *n)
+static bool node_is_up(struct tipc_node *n)
{
return n->active_links[0] != INVALID_BEARER_ID;
}
+bool tipc_node_is_up(struct net *net, u32 addr)
+{
+ struct tipc_node *n;
+ bool retval = false;
+
+ if (in_own_node(net, addr))
+ return true;
+
+ n = tipc_node_find(net, addr);
+ if (!n)
+ return false;
+ retval = node_is_up(n);
+ tipc_node_put(n);
+ return retval;
+}
+
void tipc_node_check_dest(struct net *net, u32 onode,
struct tipc_bearer *b,
u16 capabilities, u32 signature,
@@ -1126,8 +1142,8 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
strncpy(linkname, tipc_link_name(link), len);
err = 0;
}
-exit:
tipc_node_read_unlock(node);
+exit:
tipc_node_put(node);
return err;
}
@@ -1149,7 +1165,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node)
if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr))
goto attr_msg_full;
- if (tipc_node_is_up(node))
+ if (node_is_up(node))
if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP))
goto attr_msg_full;
@@ -1238,6 +1254,22 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
return 0;
}
+/* tipc_node_distr_xmit(): send single buffer msgs to individual destinations
+ * Note: this is only for SYSTEM_IMPORTANCE messages, which cannot be rejected
+ */
+int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb;
+ u32 selector, dnode;
+
+ while ((skb = __skb_dequeue(xmitq))) {
+ selector = msg_origport(buf_msg(skb));
+ dnode = msg_destnode(buf_msg(skb));
+ tipc_node_xmit_skb(net, skb, dnode, selector);
+ }
+ return 0;
+}
+
void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
{
struct sk_buff *txskb;
@@ -1249,7 +1281,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
dst = n->addr;
if (in_own_node(net, dst))
continue;
- if (!tipc_node_is_up(n))
+ if (!node_is_up(n))
continue;
txskb = pskb_copy(skb, GFP_ATOMIC);
if (!txskb)
@@ -1284,7 +1316,7 @@ static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr);
if (rc & TIPC_LINK_DOWN_EVT) {
- tipc_bearer_reset_all(n->net);
+ tipc_node_reset_links(n);
return;
}
@@ -1351,15 +1383,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
if (!skb_queue_empty(&be->inputq1))
tipc_node_mcast_rcv(n);
- if (rc & TIPC_LINK_DOWN_EVT) {
- /* Reception reassembly failure => reset all links to peer */
- if (!tipc_link_is_up(be->link))
- tipc_node_reset_links(n);
-
- /* Retransmission failure => reset all links to all peers */
- if (!tipc_link_is_up(tipc_bc_sndlink(net)))
- tipc_bearer_reset_all(net);
- }
+ /* If reassembly or retransmission failure => reset all links to peer */
+ if (rc & TIPC_LINK_DOWN_EVT)
+ tipc_node_reset_links(n);
tipc_node_put(n);
}
@@ -1513,7 +1539,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
__skb_queue_head_init(&xmitq);
/* Ensure message is well-formed before touching the header */
- if (unlikely(!tipc_msg_validate(skb)))
+ if (unlikely(!tipc_msg_validate(&skb)))
goto discard;
hdr = buf_msg(skb);
usr = msg_user(hdr);
@@ -1557,6 +1583,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
/* Check/update node state before receiving */
if (unlikely(skb)) {
+ if (unlikely(skb_linearize(skb)))
+ goto discard;
tipc_node_write_lock(n);
if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) {
if (le->link) {
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 898c22916984..acd58d23a70e 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -48,7 +48,8 @@ enum {
TIPC_BCAST_SYNCH = (1 << 1),
TIPC_BCAST_STATE_NACK = (1 << 2),
TIPC_BLOCK_FLOWCTL = (1 << 3),
- TIPC_BCAST_RCAST = (1 << 4)
+ TIPC_BCAST_RCAST = (1 << 4),
+ TIPC_MCAST_GROUPS = (1 << 5)
};
#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
@@ -68,6 +69,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
char *linkname, size_t len);
int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
int selector);
+int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *list);
int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
u32 selector);
void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr);
@@ -76,6 +78,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
+bool tipc_node_is_up(struct net *net, u32 addr);
u16 tipc_node_get_capabilities(struct net *net, u32 addr);
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 3cd6402e812c..acaef80fb88c 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -36,6 +36,8 @@
#include "server.h"
#include "core.h"
#include "socket.h"
+#include "addr.h"
+#include "msg.h"
#include <net/sock.h>
#include <linux/module.h>
@@ -105,13 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref)
kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
sock_release(sock);
con->sock = NULL;
-
- spin_lock_bh(&s->idr_lock);
- idr_remove(&s->conn_idr, con->conid);
- s->idr_in_use--;
- spin_unlock_bh(&s->idr_lock);
}
-
+ spin_lock_bh(&s->idr_lock);
+ idr_remove(&s->conn_idr, con->conid);
+ s->idr_in_use--;
+ spin_unlock_bh(&s->idr_lock);
tipc_clean_outqueues(con);
kfree(con);
}
@@ -197,7 +197,8 @@ static void tipc_close_conn(struct tipc_conn *con)
struct tipc_server *s = con->server;
if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
- tipc_unregister_callbacks(con);
+ if (con->sock)
+ tipc_unregister_callbacks(con);
if (con->conid)
s->tipc_conn_release(con->conid, con->usr_data);
@@ -207,8 +208,8 @@ static void tipc_close_conn(struct tipc_conn *con)
* are harmless for us here as we have already deleted this
* connection from server connection list.
*/
- kernel_sock_shutdown(con->sock, SHUT_RDWR);
-
+ if (con->sock)
+ kernel_sock_shutdown(con->sock, SHUT_RDWR);
conn_put(con);
}
}
@@ -487,38 +488,104 @@ void tipc_conn_terminate(struct tipc_server *s, int conid)
}
}
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
+ u32 lower, u32 upper, int *conid)
+{
+ struct tipc_subscriber *scbr;
+ struct tipc_subscr sub;
+ struct tipc_server *s;
+ struct tipc_conn *con;
+
+ sub.seq.type = type;
+ sub.seq.lower = lower;
+ sub.seq.upper = upper;
+ sub.timeout = TIPC_WAIT_FOREVER;
+ sub.filter = TIPC_SUB_PORTS;
+ *(u32 *)&sub.usr_handle = port;
+
+ con = tipc_alloc_conn(tipc_topsrv(net));
+ if (IS_ERR(con))
+ return false;
+
+ *conid = con->conid;
+ s = con->server;
+ scbr = s->tipc_conn_new(*conid);
+ if (!scbr) {
+ tipc_close_conn(con);
+ return false;
+ }
+
+ con->usr_data = scbr;
+ con->sock = NULL;
+ s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub));
+ return true;
+}
+
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
+{
+ struct tipc_conn *con;
+
+ con = tipc_conn_lookup(tipc_topsrv(net), conid);
+ if (!con)
+ return;
+ tipc_close_conn(con);
+ conn_put(con);
+}
+
+static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
+{
+ u32 port = *(u32 *)&evt->s.usr_handle;
+ u32 self = tipc_own_addr(net);
+ struct sk_buff_head evtq;
+ struct sk_buff *skb;
+
+ skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
+ self, self, port, port, 0);
+ if (!skb)
+ return;
+ msg_set_dest_droppable(buf_msg(skb), true);
+ memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
+ skb_queue_head_init(&evtq);
+ __skb_queue_tail(&evtq, skb);
+ tipc_sk_rcv(net, &evtq);
+}
+
static void tipc_send_to_sock(struct tipc_conn *con)
{
- int count = 0;
struct tipc_server *s = con->server;
struct outqueue_entry *e;
+ struct tipc_event *evt;
struct msghdr msg;
+ int count = 0;
int ret;
spin_lock_bh(&con->outqueue_lock);
while (test_bit(CF_CONNECTED, &con->flags)) {
- e = list_entry(con->outqueue.next, struct outqueue_entry,
- list);
+ e = list_entry(con->outqueue.next, struct outqueue_entry, list);
if ((struct list_head *) e == &con->outqueue)
break;
- spin_unlock_bh(&con->outqueue_lock);
- memset(&msg, 0, sizeof(msg));
- msg.msg_flags = MSG_DONTWAIT;
+ spin_unlock_bh(&con->outqueue_lock);
- if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
- msg.msg_name = &e->dest;
- msg.msg_namelen = sizeof(struct sockaddr_tipc);
- }
- ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
- e->iov.iov_len);
- if (ret == -EWOULDBLOCK || ret == 0) {
- cond_resched();
- goto out;
- } else if (ret < 0) {
- goto send_err;
+ if (con->sock) {
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_flags = MSG_DONTWAIT;
+ if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
+ msg.msg_name = &e->dest;
+ msg.msg_namelen = sizeof(struct sockaddr_tipc);
+ }
+ ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
+ e->iov.iov_len);
+ if (ret == -EWOULDBLOCK || ret == 0) {
+ cond_resched();
+ goto out;
+ } else if (ret < 0) {
+ goto send_err;
+ }
+ } else {
+ evt = e->iov.iov_base;
+ tipc_send_kern_top_evt(s->net, evt);
}
-
/* Don't starve users filling buffers */
if (++count >= MAX_SEND_MSG_COUNT) {
cond_resched();
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 34f8055afa3b..2113c9192633 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -83,13 +83,16 @@ struct tipc_server {
int tipc_conn_sendmsg(struct tipc_server *s, int conid,
struct sockaddr_tipc *addr, void *data, size_t len);
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
+ u32 lower, u32 upper, int *conid);
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
+
/**
* tipc_conn_terminate - terminate connection with server
*
* Note: Must call it in process context since it might sleep
*/
void tipc_conn_terminate(struct tipc_server *s, int conid);
-
int tipc_server_start(struct tipc_server *s);
void tipc_server_stop(struct tipc_server *s);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 101e3597338f..5d18c0caa92b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1,7 +1,7 @@
/*
* net/tipc/socket.c: TIPC socket API
*
- * Copyright (c) 2001-2007, 2012-2016, Ericsson AB
+ * Copyright (c) 2001-2007, 2012-2017, Ericsson AB
* Copyright (c) 2004-2008, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -45,9 +45,10 @@
#include "socket.h"
#include "bcast.h"
#include "netlink.h"
+#include "group.h"
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
-#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */
+#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */
#define TIPC_FWD_MSG 1
#define TIPC_MAX_PORT 0xffffffff
#define TIPC_MIN_PORT 1
@@ -61,6 +62,11 @@ enum {
TIPC_CONNECTING = TCP_SYN_SENT,
};
+struct sockaddr_pair {
+ struct sockaddr_tipc sock;
+ struct sockaddr_tipc member;
+};
+
/**
* struct tipc_sock - TIPC socket structure
* @sk: socket - interacts with 'port' and with user via the socket API
@@ -78,7 +84,7 @@ enum {
* @conn_timeout: the time we can wait for an unresponded setup request
* @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
* @cong_link_cnt: number of congested links
- * @sent_unacked: # messages sent by socket, and not yet acked by peer
+ * @snt_unacked: # messages sent by socket, and not yet acked by peer
* @rcv_unacked: # messages read by user, but not yet acked back to peer
* @peer: 'connected' peer for dgram/rdm
* @node: hash table node
@@ -109,20 +115,22 @@ struct tipc_sock {
struct rhash_head node;
struct tipc_mc_method mc_method;
struct rcu_head rcu;
+ struct tipc_group *group;
};
-static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
+static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
static void tipc_data_ready(struct sock *sk);
static void tipc_write_space(struct sock *sk);
static void tipc_sock_destruct(struct sock *sk);
static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
bool kern);
-static void tipc_sk_timeout(unsigned long data);
+static void tipc_sk_timeout(struct timer_list *t);
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
struct tipc_name_seq const *seq);
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
struct tipc_name_seq const *seq);
+static int tipc_sk_leave(struct tipc_sock *tsk);
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
@@ -193,6 +201,11 @@ static bool tsk_conn_cong(struct tipc_sock *tsk)
return tsk->snt_unacked > tsk->snd_win;
}
+static u16 tsk_blocks(int len)
+{
+ return ((len / FLOWCTL_BLK_SZ) + 1);
+}
+
/* tsk_blocks(): translate a buffer size in bytes to number of
* advertisable blocks, taking into account the ratio truesize(len)/len
* We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
@@ -451,9 +464,9 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
NAMED_H_SIZE, 0);
msg_set_origport(msg, tsk->portid);
- setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
+ timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
sk->sk_shutdown = 0;
- sk->sk_backlog_rcv = tipc_backlog_rcv;
+ sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
@@ -559,13 +572,14 @@ static int tipc_release(struct socket *sock)
__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
sk->sk_shutdown = SHUTDOWN_MASK;
+ tipc_sk_leave(tsk);
tipc_sk_withdraw(tsk, 0, NULL);
sk_stop_timer(sk, &sk->sk_timer);
tipc_sk_remove(tsk);
/* Reject any messages that accumulated in backlog queue */
release_sock(sk);
- u32_list_purge(&tsk->cong_links);
+ tipc_dest_list_purge(&tsk->cong_links);
tsk->cong_link_cnt = 0;
call_rcu(&tsk->rcu, tipc_sk_callback);
sock->sk = NULL;
@@ -601,7 +615,10 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
res = tipc_sk_withdraw(tsk, 0, NULL);
goto exit;
}
-
+ if (tsk->group) {
+ res = -EACCES;
+ goto exit;
+ }
if (uaddr_len < sizeof(struct sockaddr_tipc)) {
res = -EINVAL;
goto exit;
@@ -698,38 +715,41 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- u32 mask = 0;
+ struct tipc_group *grp = tsk->group;
+ u32 revents = 0;
sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ revents |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ revents |= POLLHUP;
switch (sk->sk_state) {
case TIPC_ESTABLISHED:
if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
- mask |= POLLOUT;
+ revents |= POLLOUT;
/* fall thru' */
case TIPC_LISTEN:
case TIPC_CONNECTING:
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= (POLLIN | POLLRDNORM);
+ revents |= POLLIN | POLLRDNORM;
break;
case TIPC_OPEN:
- if (!tsk->cong_link_cnt)
- mask |= POLLOUT;
- if (tipc_sk_type_connectionless(sk) &&
- (!skb_queue_empty(&sk->sk_receive_queue)))
- mask |= (POLLIN | POLLRDNORM);
+ if (!grp || tipc_group_size(grp))
+ if (!tsk->cong_link_cnt)
+ revents |= POLLOUT;
+ if (!tipc_sk_type_connectionless(sk))
+ break;
+ if (skb_queue_empty(&sk->sk_receive_queue))
+ break;
+ revents |= POLLIN | POLLRDNORM;
break;
case TIPC_DISCONNECTING:
- mask = (POLLIN | POLLRDNORM | POLLHUP);
+ revents = POLLIN | POLLRDNORM | POLLHUP;
break;
}
-
- return mask;
+ return revents;
}
/**
@@ -757,6 +777,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
struct tipc_nlist dsts;
int rc;
+ if (tsk->group)
+ return -EACCES;
+
/* Block or return if any destination link is congested */
rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
if (unlikely(rc))
@@ -794,6 +817,296 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
}
/**
+ * tipc_send_group_msg - send a message to a member in the group
+ * @net: network namespace
+ * @m: message to send
+ * @mb: group member
+ * @dnode: destination node
+ * @dport: destination port
+ * @dlen: total length of message data
+ */
+static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
+ struct msghdr *m, struct tipc_member *mb,
+ u32 dnode, u32 dport, int dlen)
+{
+ u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group);
+ struct tipc_mc_method *method = &tsk->mc_method;
+ int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+ struct tipc_msg *hdr = &tsk->phdr;
+ struct sk_buff_head pkts;
+ int mtu, rc;
+
+ /* Complete message header */
+ msg_set_type(hdr, TIPC_GRP_UCAST_MSG);
+ msg_set_hdr_sz(hdr, GROUP_H_SIZE);
+ msg_set_destport(hdr, dport);
+ msg_set_destnode(hdr, dnode);
+ msg_set_grp_bc_seqno(hdr, bc_snd_nxt);
+
+ /* Build message as chain of buffers */
+ skb_queue_head_init(&pkts);
+ mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
+ rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
+ if (unlikely(rc != dlen))
+ return rc;
+
+ /* Send message */
+ rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
+ if (unlikely(rc == -ELINKCONG)) {
+ tipc_dest_push(&tsk->cong_links, dnode, 0);
+ tsk->cong_link_cnt++;
+ }
+
+ /* Update send window */
+ tipc_group_update_member(mb, blks);
+
+ /* A broadcast sent within next EXPIRE period must follow same path */
+ method->rcast = true;
+ method->mandatory = true;
+ return dlen;
+}
+
+/**
+ * tipc_send_group_unicast - send message to a member in the group
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
+{
+ struct sock *sk = sock->sk;
+ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group *grp = tsk->group;
+ struct net *net = sock_net(sk);
+ struct tipc_member *mb = NULL;
+ u32 node, port;
+ int rc;
+
+ node = dest->addr.id.node;
+ port = dest->addr.id.ref;
+ if (!port && !node)
+ return -EHOSTUNREACH;
+
+ /* Block or return if destination link or member is congested */
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tipc_dest_find(&tsk->cong_links, node, 0) &&
+ !tipc_group_cong(grp, node, port, blks, &mb));
+ if (unlikely(rc))
+ return rc;
+
+ if (unlikely(!mb))
+ return -EHOSTUNREACH;
+
+ rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen);
+
+ return rc ? rc : dlen;
+}
+
+/**
+ * tipc_send_group_anycast - send message to any member with given identity
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
+{
+ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ struct sock *sk = sock->sk;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct list_head *cong_links = &tsk->cong_links;
+ int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_member *first = NULL;
+ struct tipc_member *mbr = NULL;
+ struct net *net = sock_net(sk);
+ u32 node, port, exclude;
+ u32 type, inst, domain;
+ struct list_head dsts;
+ int lookups = 0;
+ int dstcnt, rc;
+ bool cong;
+
+ INIT_LIST_HEAD(&dsts);
+
+ type = dest->addr.name.name.type;
+ inst = dest->addr.name.name.instance;
+ domain = addr_domain(net, dest->scope);
+ exclude = tipc_group_exclude(grp);
+
+ while (++lookups < 4) {
+ first = NULL;
+
+ /* Look for a non-congested destination member, if any */
+ while (1) {
+ if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts,
+ &dstcnt, exclude, false))
+ return -EHOSTUNREACH;
+ tipc_dest_pop(&dsts, &node, &port);
+ cong = tipc_group_cong(grp, node, port, blks, &mbr);
+ if (!cong)
+ break;
+ if (mbr == first)
+ break;
+ if (!first)
+ first = mbr;
+ }
+
+ /* Start over if destination was not in member list */
+ if (unlikely(!mbr))
+ continue;
+
+ if (likely(!cong && !tipc_dest_find(cong_links, node, 0)))
+ break;
+
+ /* Block or return if destination link or member is congested */
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tipc_dest_find(cong_links, node, 0) &&
+ !tipc_group_cong(grp, node, port,
+ blks, &mbr));
+ if (unlikely(rc))
+ return rc;
+
+ /* Send, unless destination disappeared while waiting */
+ if (likely(mbr))
+ break;
+ }
+
+ if (unlikely(lookups >= 4))
+ return -EHOSTUNREACH;
+
+ rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen);
+
+ return rc ? rc : dlen;
+}
+
+/**
+ * tipc_send_group_bcast - send message to all members in communication group
+ * @sk: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
+{
+ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_nlist *dsts = tipc_group_dests(grp);
+ struct tipc_mc_method *method = &tsk->mc_method;
+ bool ack = method->mandatory && method->rcast;
+ int blks = tsk_blocks(MCAST_H_SIZE + dlen);
+ struct tipc_msg *hdr = &tsk->phdr;
+ int mtu = tipc_bcast_get_mtu(net);
+ struct sk_buff_head pkts;
+ int rc = -EHOSTUNREACH;
+
+ if (!dsts->local && !dsts->remote)
+ return -EHOSTUNREACH;
+
+ /* Block or return if any destination link or member is congested */
+ rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt &&
+ !tipc_group_bc_cong(grp, blks));
+ if (unlikely(rc))
+ return rc;
+
+ /* Complete message header */
+ if (dest) {
+ msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
+ msg_set_nameinst(hdr, dest->addr.name.name.instance);
+ } else {
+ msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
+ msg_set_nameinst(hdr, 0);
+ }
+ msg_set_hdr_sz(hdr, GROUP_H_SIZE);
+ msg_set_destport(hdr, 0);
+ msg_set_destnode(hdr, 0);
+ msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));
+
+ /* Avoid getting stuck with repeated forced replicasts */
+ msg_set_grp_bc_ack_req(hdr, ack);
+
+ /* Build message as chain of buffers */
+ skb_queue_head_init(&pkts);
+ rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
+ if (unlikely(rc != dlen))
+ return rc;
+
+ /* Send message */
+ rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt);
+ if (unlikely(rc))
+ return rc;
+
+ /* Update broadcast sequence number and send windows */
+ tipc_group_update_bc_members(tsk->group, blks, ack);
+
+ /* Broadcast link is now free to choose method for next broadcast */
+ method->mandatory = false;
+ method->expires = jiffies;
+
+ return dlen;
+}
+
+/**
+ * tipc_send_group_mcast - send message to all members with given identity
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
+{
+ struct sock *sk = sock->sk;
+ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ struct tipc_name_seq *seq = &dest->addr.nameseq;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group *grp = tsk->group;
+ struct net *net = sock_net(sk);
+ u32 domain, exclude, dstcnt;
+ struct list_head dsts;
+
+ INIT_LIST_HEAD(&dsts);
+
+ if (seq->lower != seq->upper)
+ return -ENOTSUPP;
+
+ domain = addr_domain(net, dest->scope);
+ exclude = tipc_group_exclude(grp);
+ if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain,
+ &dsts, &dstcnt, exclude, true))
+ return -EHOSTUNREACH;
+
+ if (dstcnt == 1) {
+ tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref);
+ return tipc_send_group_unicast(sock, m, dlen, timeout);
+ }
+
+ tipc_dest_list_purge(&dsts);
+ return tipc_send_group_bcast(sock, m, dlen, timeout);
+}
+
+/**
* tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
* @arrvq: queue with arriving messages, to be cloned after destination lookup
* @inputq: queue with cloned messages, delivered to socket after dest lookup
@@ -803,13 +1116,15 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
struct sk_buff_head *inputq)
{
- struct tipc_msg *msg;
- struct list_head dports;
- u32 portid;
u32 scope = TIPC_CLUSTER_SCOPE;
- struct sk_buff_head tmpq;
- uint hsz;
+ u32 self = tipc_own_addr(net);
struct sk_buff *skb, *_skb;
+ u32 lower = 0, upper = ~0;
+ struct sk_buff_head tmpq;
+ u32 portid, oport, onode;
+ struct list_head dports;
+ struct tipc_msg *msg;
+ int user, mtyp, hsz;
__skb_queue_head_init(&tmpq);
INIT_LIST_HEAD(&dports);
@@ -817,17 +1132,32 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
skb = tipc_skb_peek(arrvq, &inputq->lock);
for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
msg = buf_msg(skb);
+ user = msg_user(msg);
+ mtyp = msg_type(msg);
+ if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
+ spin_lock_bh(&inputq->lock);
+ if (skb_peek(arrvq) == skb) {
+ __skb_dequeue(arrvq);
+ __skb_queue_tail(inputq, skb);
+ }
+ refcount_dec(&skb->users);
+ spin_unlock_bh(&inputq->lock);
+ continue;
+ }
hsz = skb_headroom(skb) + msg_hdr_sz(msg);
-
- if (in_own_node(net, msg_orignode(msg)))
+ oport = msg_origport(msg);
+ onode = msg_orignode(msg);
+ if (onode == self)
scope = TIPC_NODE_SCOPE;
/* Create destination port list and message clones: */
- tipc_nametbl_mc_translate(net,
- msg_nametype(msg), msg_namelower(msg),
- msg_nameupper(msg), scope, &dports);
- portid = u32_pop(&dports);
- for (; portid; portid = u32_pop(&dports)) {
+ if (!msg_in_group(msg)) {
+ lower = msg_namelower(msg);
+ upper = msg_nameupper(msg);
+ }
+ tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper,
+ scope, &dports);
+ while (tipc_dest_pop(&dports, NULL, &portid)) {
_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
if (_skb) {
msg_set_destport(buf_msg(_skb), portid);
@@ -850,16 +1180,16 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
}
/**
- * tipc_sk_proto_rcv - receive a connection mng protocol message
+ * tipc_sk_conn_proto_rcv - receive a connection mng protocol message
* @tsk: receiving socket
* @skb: pointer to message buffer.
*/
-static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
- struct sk_buff_head *xmitq)
+static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
{
- struct sock *sk = &tsk->sk;
- u32 onode = tsk_own_node(tsk);
struct tipc_msg *hdr = buf_msg(skb);
+ u32 onode = tsk_own_node(tsk);
+ struct sock *sk = &tsk->sk;
int mtyp = msg_type(hdr);
bool conn_cong;
@@ -931,6 +1261,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
struct list_head *clinks = &tsk->cong_links;
bool syn = !tipc_sk_type_connectionless(sk);
+ struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = &tsk->phdr;
struct tipc_name_seq *seq;
struct sk_buff_head pkts;
@@ -941,18 +1272,31 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
return -EMSGSIZE;
+ if (likely(dest)) {
+ if (unlikely(m->msg_namelen < sizeof(*dest)))
+ return -EINVAL;
+ if (unlikely(dest->family != AF_TIPC))
+ return -EINVAL;
+ }
+
+ if (grp) {
+ if (!dest)
+ return tipc_send_group_bcast(sock, m, dlen, timeout);
+ if (dest->addrtype == TIPC_ADDR_NAME)
+ return tipc_send_group_anycast(sock, m, dlen, timeout);
+ if (dest->addrtype == TIPC_ADDR_ID)
+ return tipc_send_group_unicast(sock, m, dlen, timeout);
+ if (dest->addrtype == TIPC_ADDR_MCAST)
+ return tipc_send_group_mcast(sock, m, dlen, timeout);
+ return -EINVAL;
+ }
+
if (unlikely(!dest)) {
dest = &tsk->peer;
if (!syn || dest->family != AF_TIPC)
return -EDESTADDRREQ;
}
- if (unlikely(m->msg_namelen < sizeof(*dest)))
- return -EINVAL;
-
- if (unlikely(dest->family != AF_TIPC))
- return -EINVAL;
-
if (unlikely(syn)) {
if (sk->sk_state == TIPC_LISTEN)
return -EPIPE;
@@ -985,7 +1329,6 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
msg_set_destport(hdr, dport);
if (unlikely(!dport && !dnode))
return -EHOSTUNREACH;
-
} else if (dest->addrtype == TIPC_ADDR_ID) {
dnode = dest->addr.id.node;
msg_set_type(hdr, TIPC_DIRECT_MSG);
@@ -996,7 +1339,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
}
/* Block or return if destination link is congested */
- rc = tipc_wait_for_cond(sock, &timeout, !u32_find(clinks, dnode));
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tipc_dest_find(clinks, dnode, 0));
if (unlikely(rc))
return rc;
@@ -1008,7 +1352,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
if (unlikely(rc == -ELINKCONG)) {
- u32_push(clinks, dnode);
+ tipc_dest_push(clinks, dnode, 0);
tsk->cong_link_cnt++;
rc = 0;
}
@@ -1128,7 +1472,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
msg_set_lookup_scope(msg, 0);
msg_set_hdr_sz(msg, SHORT_H_SIZE);
- sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
tipc_set_sk_state(sk, TIPC_ESTABLISHED);
tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
@@ -1142,26 +1486,38 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
}
/**
- * set_orig_addr - capture sender's address for received message
+ * tipc_sk_set_orig_addr - capture sender's address for received message
* @m: descriptor for message info
- * @msg: received message header
+ * @hdr: received message header
*
* Note: Address is not captured if not requested by receiver.
*/
-static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
+static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
{
- DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name);
+ DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name);
+ struct tipc_msg *hdr = buf_msg(skb);
- if (addr) {
- addr->family = AF_TIPC;
- addr->addrtype = TIPC_ADDR_ID;
- memset(&addr->addr, 0, sizeof(addr->addr));
- addr->addr.id.ref = msg_origport(msg);
- addr->addr.id.node = msg_orignode(msg);
- addr->addr.name.domain = 0; /* could leave uninitialized */
- addr->scope = 0; /* could leave uninitialized */
- m->msg_namelen = sizeof(struct sockaddr_tipc);
- }
+ if (!srcaddr)
+ return;
+
+ srcaddr->sock.family = AF_TIPC;
+ srcaddr->sock.addrtype = TIPC_ADDR_ID;
+ srcaddr->sock.addr.id.ref = msg_origport(hdr);
+ srcaddr->sock.addr.id.node = msg_orignode(hdr);
+ srcaddr->sock.addr.name.domain = 0;
+ srcaddr->sock.scope = 0;
+ m->msg_namelen = sizeof(struct sockaddr_tipc);
+
+ if (!msg_in_group(hdr))
+ return;
+
+ /* Group message users may also want to know sending member's id */
+ srcaddr->member.family = AF_TIPC;
+ srcaddr->member.addrtype = TIPC_ADDR_NAME;
+ srcaddr->member.addr.name.name.type = msg_nametype(hdr);
+ srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
+ srcaddr->member.addr.name.domain = 0;
+ m->msg_namelen = sizeof(*srcaddr);
}
/**
@@ -1318,11 +1674,13 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
size_t buflen, int flags)
{
struct sock *sk = sock->sk;
- struct tipc_sock *tsk = tipc_sk(sk);
- struct sk_buff *skb;
- struct tipc_msg *hdr;
bool connected = !tipc_sk_type_connectionless(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
int rc, err, hlen, dlen, copy;
+ struct sk_buff_head xmitq;
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ bool grp_evt;
long timeout;
/* Catch invalid receive requests */
@@ -1336,8 +1694,8 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
}
timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ /* Step rcv queue to first msg with data or error; wait if necessary */
do {
- /* Look at first msg in receive queue; wait if necessary */
rc = tipc_wait_for_rcvmsg(sock, &timeout);
if (unlikely(rc))
goto exit;
@@ -1346,13 +1704,14 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
dlen = msg_data_sz(hdr);
hlen = msg_hdr_sz(hdr);
err = msg_errcode(hdr);
+ grp_evt = msg_is_grp_evt(hdr);
if (likely(dlen || err))
break;
tsk_advance_rx_queue(sk);
} while (1);
/* Collect msg meta data, including error code and rejected data */
- set_orig_addr(m, hdr);
+ tipc_sk_set_orig_addr(m, skb);
rc = tipc_sk_anc_data_recv(m, hdr, tsk);
if (unlikely(rc))
goto exit;
@@ -1372,15 +1731,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
if (unlikely(rc))
goto exit;
+ /* Mark message as group event if applicable */
+ if (unlikely(grp_evt)) {
+ if (msg_grp_evt(hdr) == TIPC_WITHDRAWN)
+ m->msg_flags |= MSG_EOR;
+ m->msg_flags |= MSG_OOB;
+ copy = 0;
+ }
+
/* Caption of data or error code/rejected data was successful */
if (unlikely(flags & MSG_PEEK))
goto exit;
+ /* Send group flow control advertisement when applicable */
+ if (tsk->group && msg_in_group(hdr) && !grp_evt) {
+ skb_queue_head_init(&xmitq);
+ tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen),
+ msg_orignode(hdr), msg_origport(hdr),
+ &xmitq);
+ tipc_node_distr_xmit(sock_net(sk), &xmitq);
+ }
+
tsk_advance_rx_queue(sk);
+
if (likely(!connected))
goto exit;
- /* Send connection flow control ack when applicable */
+ /* Send connection flow control advertisement when applicable */
tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
tipc_sk_send_ack(tsk);
@@ -1446,7 +1823,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Collect msg meta data, incl. error code and rejected data */
if (!copied) {
- set_orig_addr(m, hdr);
+ tipc_sk_set_orig_addr(m, skb);
rc = tipc_sk_anc_data_recv(m, hdr, tsk);
if (rc)
break;
@@ -1532,14 +1909,51 @@ static void tipc_sock_destruct(struct sock *sk)
__skb_queue_purge(&sk->sk_receive_queue);
}
+static void tipc_sk_proto_rcv(struct sock *sk,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb = __skb_dequeue(inputq);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct tipc_group *grp = tsk->group;
+ bool wakeup = false;
+
+ switch (msg_user(hdr)) {
+ case CONN_MANAGER:
+ tipc_sk_conn_proto_rcv(tsk, skb, xmitq);
+ return;
+ case SOCK_WAKEUP:
+ tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
+ tsk->cong_link_cnt--;
+ wakeup = true;
+ break;
+ case GROUP_PROTOCOL:
+ tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
+ break;
+ case TOP_SRV:
+ tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
+ skb, inputq, xmitq);
+ skb = NULL;
+ break;
+ default:
+ break;
+ }
+
+ if (wakeup)
+ sk->sk_write_space(sk);
+
+ kfree_skb(skb);
+}
+
/**
- * filter_connect - Handle all incoming messages for a connection-based socket
+ * tipc_filter_connect - Handle incoming message for a connection-based socket
* @tsk: TIPC socket
* @skb: pointer to message buffer. Set to NULL if buffer is consumed
*
* Returns true if everything ok, false otherwise
*/
-static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
+static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
{
struct sock *sk = &tsk->sk;
struct net *net = sock_net(sk);
@@ -1643,6 +2057,9 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *hdr = buf_msg(skb);
+ if (unlikely(msg_in_group(hdr)))
+ return sk->sk_rcvbuf;
+
if (unlikely(!msg_connected(hdr)))
return sk->sk_rcvbuf << msg_importance(hdr);
@@ -1653,7 +2070,7 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
}
/**
- * filter_rcv - validate incoming message
+ * tipc_sk_filter_rcv - validate incoming message
* @sk: socket
* @skb: pointer to message.
*
@@ -1662,99 +2079,71 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
*
* Called with socket lock already taken
*
- * Returns true if message was added to socket receive queue, otherwise false
*/
-static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
- struct sk_buff_head *xmitq)
+static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
{
+ bool sk_conn = !tipc_sk_type_connectionless(sk);
struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = buf_msg(skb);
- unsigned int limit = rcvbuf_limit(sk, skb);
- int err = TIPC_OK;
- int usr = msg_user(hdr);
- u32 onode;
+ struct net *net = sock_net(sk);
+ struct sk_buff_head inputq;
+ int limit, err = TIPC_OK;
- if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
- tipc_sk_proto_rcv(tsk, skb, xmitq);
- return false;
- }
+ TIPC_SKB_CB(skb)->bytes_read = 0;
+ __skb_queue_head_init(&inputq);
+ __skb_queue_tail(&inputq, skb);
- if (unlikely(usr == SOCK_WAKEUP)) {
- onode = msg_orignode(hdr);
- kfree_skb(skb);
- u32_del(&tsk->cong_links, onode);
- tsk->cong_link_cnt--;
- sk->sk_write_space(sk);
- return false;
- }
+ if (unlikely(!msg_isdata(hdr)))
+ tipc_sk_proto_rcv(sk, &inputq, xmitq);
- /* Drop if illegal message type */
- if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) {
- kfree_skb(skb);
- return false;
- }
+ if (unlikely(grp))
+ tipc_group_filter_msg(grp, &inputq, xmitq);
- /* Reject if wrong message type for current socket state */
- if (tipc_sk_type_connectionless(sk)) {
- if (msg_connected(hdr)) {
+ /* Validate and add to receive buffer if there is space */
+ while ((skb = __skb_dequeue(&inputq))) {
+ hdr = buf_msg(skb);
+ limit = rcvbuf_limit(sk, skb);
+ if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
+ (!sk_conn && msg_connected(hdr)) ||
+ (!grp && msg_in_group(hdr)))
err = TIPC_ERR_NO_PORT;
- goto reject;
- }
- } else if (unlikely(!filter_connect(tsk, skb))) {
- err = TIPC_ERR_NO_PORT;
- goto reject;
- }
+ else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit)
+ err = TIPC_ERR_OVERLOAD;
- /* Reject message if there isn't room to queue it */
- if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) {
- err = TIPC_ERR_OVERLOAD;
- goto reject;
+ if (unlikely(err)) {
+ tipc_skb_reject(net, err, skb, xmitq);
+ err = TIPC_OK;
+ continue;
+ }
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ skb_set_owner_r(skb, sk);
+ sk->sk_data_ready(sk);
}
-
- /* Enqueue message */
- TIPC_SKB_CB(skb)->bytes_read = 0;
- __skb_queue_tail(&sk->sk_receive_queue, skb);
- skb_set_owner_r(skb, sk);
-
- sk->sk_data_ready(sk);
- return true;
-
-reject:
- if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
- __skb_queue_tail(xmitq, skb);
- return false;
}
/**
- * tipc_backlog_rcv - handle incoming message from backlog queue
+ * tipc_sk_backlog_rcv - handle incoming message from backlog queue
* @sk: socket
* @skb: message
*
* Caller must hold socket lock
- *
- * Returns 0
*/
-static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
- unsigned int truesize = skb->truesize;
+ unsigned int before = sk_rmem_alloc_get(sk);
struct sk_buff_head xmitq;
- u32 dnode, selector;
+ unsigned int added;
__skb_queue_head_init(&xmitq);
- if (likely(filter_rcv(sk, skb, &xmitq))) {
- atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt);
- return 0;
- }
-
- if (skb_queue_empty(&xmitq))
- return 0;
+ tipc_sk_filter_rcv(sk, skb, &xmitq);
+ added = sk_rmem_alloc_get(sk) - before;
+ atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt);
- /* Send response/rejected message */
- skb = __skb_dequeue(&xmitq);
- dnode = msg_destnode(buf_msg(skb));
- selector = msg_origport(buf_msg(skb));
- tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
+ /* Send pending response/rejected messages, if any */
+ tipc_node_distr_xmit(sock_net(sk), &xmitq);
return 0;
}
@@ -1786,7 +2175,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
/* Add message directly to receive queue if possible */
if (!sock_owned_by_user(sk)) {
- filter_rcv(sk, skb, xmitq);
+ tipc_sk_filter_rcv(sk, skb, xmitq);
continue;
}
@@ -1833,14 +2222,10 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
spin_unlock_bh(&sk->sk_lock.slock);
}
/* Send pending response/rejected messages, if any */
- while ((skb = __skb_dequeue(&xmitq))) {
- dnode = msg_destnode(buf_msg(skb));
- tipc_node_xmit_skb(net, skb, dnode, dport);
- }
+ tipc_node_distr_xmit(sock_net(sk), &xmitq);
sock_put(sk);
continue;
}
-
/* No destination socket => dequeue skb if still there */
skb = tipc_skb_dequeue(inputq, dport);
if (!skb)
@@ -1903,28 +2288,32 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
int previous;
int res = 0;
+ if (destlen != sizeof(struct sockaddr_tipc))
+ return -EINVAL;
+
lock_sock(sk);
- /* DGRAM/RDM connect(), just save the destaddr */
- if (tipc_sk_type_connectionless(sk)) {
- if (dst->family == AF_UNSPEC) {
- memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
- } else if (destlen != sizeof(struct sockaddr_tipc)) {
- res = -EINVAL;
- } else {
- memcpy(&tsk->peer, dest, destlen);
- }
+ if (tsk->group) {
+ res = -EINVAL;
goto exit;
}
- /*
- * Reject connection attempt using multicast address
- *
- * Note: send_msg() validates the rest of the address fields,
- * so there's no need to do it here
- */
- if (dst->addrtype == TIPC_ADDR_MCAST) {
+ if (dst->family == AF_UNSPEC) {
+ memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
+ if (!tipc_sk_type_connectionless(sk))
+ res = -EINVAL;
+ goto exit;
+ } else if (dst->family != AF_TIPC) {
+ res = -EINVAL;
+ }
+ if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
res = -EINVAL;
+ if (res)
+ goto exit;
+
+ /* DGRAM/RDM connect(), just save the destaddr */
+ if (tipc_sk_type_connectionless(sk)) {
+ memcpy(&tsk->peer, dest, destlen);
goto exit;
}
@@ -2141,46 +2530,43 @@ static int tipc_shutdown(struct socket *sock, int how)
return res;
}
-static void tipc_sk_timeout(unsigned long data)
+static void tipc_sk_timeout(struct timer_list *t)
{
- struct tipc_sock *tsk = (struct tipc_sock *)data;
- struct sock *sk = &tsk->sk;
- struct sk_buff *skb = NULL;
- u32 peer_port, peer_node;
+ struct sock *sk = from_timer(sk, t, sk_timer);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ u32 peer_port = tsk_peer_port(tsk);
+ u32 peer_node = tsk_peer_node(tsk);
u32 own_node = tsk_own_node(tsk);
+ u32 own_port = tsk->portid;
+ struct net *net = sock_net(sk);
+ struct sk_buff *skb = NULL;
bh_lock_sock(sk);
- if (!tipc_sk_connected(sk)) {
- bh_unlock_sock(sk);
+ if (!tipc_sk_connected(sk))
+ goto exit;
+
+ /* Try again later if socket is busy */
+ if (sock_owned_by_user(sk)) {
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
goto exit;
}
- peer_port = tsk_peer_port(tsk);
- peer_node = tsk_peer_node(tsk);
if (tsk->probe_unacked) {
- if (!sock_owned_by_user(sk)) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
- tsk_peer_port(tsk));
- sk->sk_state_change(sk);
- } else {
- /* Try again later */
- sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
- }
-
- bh_unlock_sock(sk);
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ tipc_node_remove_conn(net, peer_node, peer_port);
+ sk->sk_state_change(sk);
goto exit;
}
-
- skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
- INT_H_SIZE, 0, peer_node, own_node,
- peer_port, tsk->portid, TIPC_OK);
+ /* Send new probe */
+ skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
+ peer_node, own_node, peer_port, own_port,
+ TIPC_OK);
tsk->probe_unacked = true;
- sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
+exit:
bh_unlock_sock(sk);
if (skb)
- tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
-exit:
+ tipc_node_xmit_skb(net, skb, peer_node, own_port);
sock_put(sk);
}
@@ -2255,8 +2641,8 @@ void tipc_sk_reinit(struct net *net)
do {
tsk = ERR_PTR(rhashtable_walk_start(&iter));
- if (tsk)
- continue;
+ if (IS_ERR(tsk))
+ goto walk_stop;
while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
spin_lock_bh(&tsk->sk.sk_lock.slock);
@@ -2265,7 +2651,7 @@ void tipc_sk_reinit(struct net *net)
msg_set_orignode(msg, tn->own_addr);
spin_unlock_bh(&tsk->sk.sk_lock.slock);
}
-
+walk_stop:
rhashtable_walk_stop(&iter);
} while (tsk == ERR_PTR(-EAGAIN));
}
@@ -2345,6 +2731,58 @@ void tipc_sk_rht_destroy(struct net *net)
rhashtable_destroy(&tn->sk_rht);
}
+static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
+{
+ struct net *net = sock_net(&tsk->sk);
+ u32 domain = addr_domain(net, mreq->scope);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_msg *hdr = &tsk->phdr;
+ struct tipc_name_seq seq;
+ int rc;
+
+ if (mreq->type < TIPC_RESERVED_TYPES)
+ return -EACCES;
+ if (grp)
+ return -EACCES;
+ grp = tipc_group_create(net, tsk->portid, mreq);
+ if (!grp)
+ return -ENOMEM;
+ tsk->group = grp;
+ msg_set_lookup_scope(hdr, mreq->scope);
+ msg_set_nametype(hdr, mreq->type);
+ msg_set_dest_droppable(hdr, true);
+ seq.type = mreq->type;
+ seq.lower = mreq->instance;
+ seq.upper = seq.lower;
+ tipc_nametbl_build_group(net, grp, mreq->type, domain);
+ rc = tipc_sk_publish(tsk, mreq->scope, &seq);
+ if (rc) {
+ tipc_group_delete(net, grp);
+ tsk->group = NULL;
+ }
+
+ /* Eliminate any risk that a broadcast overtakes the sent JOIN */
+ tsk->mc_method.rcast = true;
+ tsk->mc_method.mandatory = true;
+ return rc;
+}
+
+static int tipc_sk_leave(struct tipc_sock *tsk)
+{
+ struct net *net = sock_net(&tsk->sk);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_name_seq seq;
+ int scope;
+
+ if (!grp)
+ return -EINVAL;
+ tipc_group_self(grp, &seq, &scope);
+ tipc_group_delete(net, grp);
+ tsk->group = NULL;
+ tipc_sk_withdraw(tsk, scope, &seq);
+ return 0;
+}
+
/**
* tipc_setsockopt - set socket option
* @sock: socket structure
@@ -2363,6 +2801,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group_req mreq;
u32 value = 0;
int res = 0;
@@ -2378,9 +2817,14 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
case TIPC_CONN_TIMEOUT:
if (ol < sizeof(value))
return -EINVAL;
- res = get_user(value, (u32 __user *)ov);
- if (res)
- return res;
+ if (get_user(value, (u32 __user *)ov))
+ return -EFAULT;
+ break;
+ case TIPC_GROUP_JOIN:
+ if (ol < sizeof(mreq))
+ return -EINVAL;
+ if (copy_from_user(&mreq, ov, sizeof(mreq)))
+ return -EFAULT;
break;
default:
if (ov || ol)
@@ -2413,6 +2857,12 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
tsk->mc_method.rcast = true;
tsk->mc_method.mandatory = true;
break;
+ case TIPC_GROUP_JOIN:
+ res = tipc_sk_join(tsk, &mreq);
+ break;
+ case TIPC_GROUP_LEAVE:
+ res = tipc_sk_leave(tsk);
+ break;
default:
res = -EINVAL;
}
@@ -2440,7 +2890,8 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- int len;
+ struct tipc_name_seq seq;
+ int len, scope;
u32 value;
int res;
@@ -2474,6 +2925,12 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
case TIPC_SOCK_RECVQ_DEPTH:
value = skb_queue_len(&sk->sk_receive_queue);
break;
+ case TIPC_GROUP_JOIN:
+ seq.type = 0;
+ if (tsk->group)
+ tipc_group_self(tsk->group, &seq, &scope);
+ value = seq.type;
+ break;
default:
res = -EINVAL;
}
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 0bf91cd3733c..251065dfd8df 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -52,7 +52,6 @@ struct tipc_subscriber {
struct list_head subscrp_list;
};
-static void tipc_subscrp_delete(struct tipc_subscription *sub);
static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
/**
@@ -134,9 +133,9 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
node);
}
-static void tipc_subscrp_timeout(unsigned long data)
+static void tipc_subscrp_timeout(struct timer_list *t)
{
- struct tipc_subscription *sub = (struct tipc_subscription *)data;
+ struct tipc_subscription *sub = from_timer(sub, t, timer);
struct tipc_subscriber *subscriber = sub->subscriber;
spin_lock_bh(&subscriber->lock);
@@ -197,15 +196,19 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
{
struct list_head *subscription_list = &subscriber->subscrp_list;
struct tipc_subscription *sub, *temp;
+ u32 timeout;
spin_lock_bh(&subscriber->lock);
list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) {
if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
continue;
- tipc_nametbl_unsubscribe(sub);
- list_del(&sub->subscrp_list);
- tipc_subscrp_delete(sub);
+ timeout = htohl(sub->evt.s.timeout, sub->swap);
+ if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) {
+ tipc_nametbl_unsubscribe(sub);
+ list_del(&sub->subscrp_list);
+ tipc_subscrp_put(sub);
+ }
if (s)
break;
@@ -236,18 +239,12 @@ static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
tipc_subscrb_put(subscriber);
}
-static void tipc_subscrp_delete(struct tipc_subscription *sub)
-{
- u32 timeout = htohl(sub->evt.s.timeout, sub->swap);
-
- if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer))
- tipc_subscrp_put(sub);
-}
-
static void tipc_subscrp_cancel(struct tipc_subscr *s,
struct tipc_subscriber *subscriber)
{
+ tipc_subscrb_get(subscriber);
tipc_subscrb_subscrp_delete(subscriber, s);
+ tipc_subscrb_put(subscriber);
}
static struct tipc_subscription *tipc_subscrp_create(struct net *net,
@@ -306,7 +303,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
tipc_subscrb_get(subscriber);
spin_unlock_bh(&subscriber->lock);
- setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub);
+ timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
timeout = htohl(sub->evt.s.timeout, swap);
if (timeout != TIPC_WAIT_FOREVER)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 60aff60e30ad..e07ee3ae0023 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -45,8 +45,18 @@ MODULE_AUTHOR("Mellanox Technologies");
MODULE_DESCRIPTION("Transport Layer Security Support");
MODULE_LICENSE("Dual BSD/GPL");
-static struct proto tls_base_prot;
-static struct proto tls_sw_prot;
+enum {
+ TLS_BASE_TX,
+ TLS_SW_TX,
+ TLS_NUM_CONFIG,
+};
+
+static struct proto tls_prots[TLS_NUM_CONFIG];
+
+static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
+{
+ sk->sk_prot = &tls_prots[ctx->tx_conf];
+}
int wait_on_pending_writer(struct sock *sk, long *timeo)
{
@@ -216,6 +226,12 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
void (*sk_proto_close)(struct sock *sk, long timeout);
lock_sock(sk);
+ sk_proto_close = ctx->sk_proto_close;
+
+ if (ctx->tx_conf == TLS_BASE_TX) {
+ kfree(ctx);
+ goto skip_tx_cleanup;
+ }
if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
tls_handle_open_record(sk, 0);
@@ -232,13 +248,14 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
sg++;
}
}
- ctx->free_resources(sk);
+
kfree(ctx->rec_seq);
kfree(ctx->iv);
- sk_proto_close = ctx->sk_proto_close;
- kfree(ctx);
+ if (ctx->tx_conf == TLS_SW_TX)
+ tls_sw_free_tx_resources(sk);
+skip_tx_cleanup:
release_sock(sk);
sk_proto_close(sk, timeout);
}
@@ -338,46 +355,41 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
unsigned int optlen)
{
- struct tls_crypto_info *crypto_info, tmp_crypto_info;
+ struct tls_crypto_info *crypto_info;
struct tls_context *ctx = tls_get_ctx(sk);
- struct proto *prot = NULL;
int rc = 0;
+ int tx_conf;
if (!optval || (optlen < sizeof(*crypto_info))) {
rc = -EINVAL;
goto out;
}
- rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info));
+ crypto_info = &ctx->crypto_send;
+ /* Currently we don't support set crypto info more than one time */
+ if (TLS_CRYPTO_INFO_READY(crypto_info))
+ goto out;
+
+ rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
if (rc) {
rc = -EFAULT;
goto out;
}
/* check version */
- if (tmp_crypto_info.version != TLS_1_2_VERSION) {
+ if (crypto_info->version != TLS_1_2_VERSION) {
rc = -ENOTSUPP;
- goto out;
+ goto err_crypto_info;
}
- /* get user crypto info */
- crypto_info = &ctx->crypto_send;
-
- /* Currently we don't support set crypto info more than one time */
- if (TLS_CRYPTO_INFO_READY(crypto_info))
- goto out;
-
- switch (tmp_crypto_info.cipher_type) {
+ switch (crypto_info->cipher_type) {
case TLS_CIPHER_AES_GCM_128: {
if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
rc = -EINVAL;
goto out;
}
- rc = copy_from_user(
- crypto_info,
- optval,
- sizeof(struct tls12_crypto_info_aes_gcm_128));
-
+ rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
+ optlen - sizeof(*crypto_info));
if (rc) {
rc = -EFAULT;
goto err_crypto_info;
@@ -389,18 +401,16 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
goto out;
}
- ctx->sk_write_space = sk->sk_write_space;
- sk->sk_write_space = tls_write_space;
-
- ctx->sk_proto_close = sk->sk_prot->close;
-
/* currently SW is default, we will have ethtool in future */
rc = tls_set_sw_offload(sk, ctx);
- prot = &tls_sw_prot;
+ tx_conf = TLS_SW_TX;
if (rc)
goto err_crypto_info;
- sk->sk_prot = prot;
+ ctx->tx_conf = tx_conf;
+ update_sk_prot(sk, ctx);
+ ctx->sk_write_space = sk->sk_write_space;
+ sk->sk_write_space = tls_write_space;
goto out;
err_crypto_info:
@@ -453,7 +463,10 @@ static int tls_init(struct sock *sk)
icsk->icsk_ulp_data = ctx;
ctx->setsockopt = sk->sk_prot->setsockopt;
ctx->getsockopt = sk->sk_prot->getsockopt;
- sk->sk_prot = &tls_base_prot;
+ ctx->sk_proto_close = sk->sk_prot->close;
+
+ ctx->tx_conf = TLS_BASE_TX;
+ update_sk_prot(sk, ctx);
out:
return rc;
}
@@ -464,16 +477,21 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
.init = tls_init,
};
+static void build_protos(struct proto *prot, struct proto *base)
+{
+ prot[TLS_BASE_TX] = *base;
+ prot[TLS_BASE_TX].setsockopt = tls_setsockopt;
+ prot[TLS_BASE_TX].getsockopt = tls_getsockopt;
+ prot[TLS_BASE_TX].close = tls_sk_proto_close;
+
+ prot[TLS_SW_TX] = prot[TLS_BASE_TX];
+ prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg;
+ prot[TLS_SW_TX].sendpage = tls_sw_sendpage;
+}
+
static int __init tls_register(void)
{
- tls_base_prot = tcp_prot;
- tls_base_prot.setsockopt = tls_setsockopt;
- tls_base_prot.getsockopt = tls_getsockopt;
-
- tls_sw_prot = tls_base_prot;
- tls_sw_prot.sendmsg = tls_sw_sendmsg;
- tls_sw_prot.sendpage = tls_sw_sendpage;
- tls_sw_prot.close = tls_sk_proto_close;
+ build_protos(tls_prots, &tcp_prot);
tcp_register_ulp(&tcp_tls_ulp_ops);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index fa596fa71ba7..73d19210dd49 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -39,22 +39,6 @@
#include <net/tls.h>
-static inline void tls_make_aad(int recv,
- char *buf,
- size_t size,
- char *record_sequence,
- int record_sequence_size,
- unsigned char record_type)
-{
- memcpy(buf, record_sequence, record_sequence_size);
-
- buf[8] = record_type;
- buf[9] = TLS_1_2_VERSION_MAJOR;
- buf[10] = TLS_1_2_VERSION_MINOR;
- buf[11] = size >> 8;
- buf[12] = size & 0xFF;
-}
-
static void trim_sg(struct sock *sk, struct scatterlist *sg,
int *sg_num_elem, unsigned int *sg_size, int target_size)
{
@@ -219,7 +203,7 @@ static int tls_do_encryption(struct tls_context *tls_ctx,
struct aead_request *aead_req;
int rc;
- aead_req = kmalloc(req_size, flags);
+ aead_req = kzalloc(req_size, flags);
if (!aead_req)
return -ENOMEM;
@@ -249,7 +233,7 @@ static int tls_push_record(struct sock *sk, int flags,
sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1);
sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1);
- tls_make_aad(0, ctx->aad_space, ctx->sg_plaintext_size,
+ tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size,
tls_ctx->rec_seq, tls_ctx->rec_seq_size,
record_type);
@@ -639,7 +623,7 @@ sendpage_end:
return ret;
}
-void tls_sw_free_resources(struct sock *sk)
+void tls_sw_free_tx_resources(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
@@ -650,6 +634,7 @@ void tls_sw_free_resources(struct sock *sk)
tls_free_both_sg(sk);
kfree(ctx);
+ kfree(tls_ctx);
}
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
@@ -679,7 +664,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
}
ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
- ctx->free_resources = tls_sw_free_resources;
crypto_info = &ctx->crypto_send;
switch (crypto_info->cipher_type) {
diff --git a/net/unix/Makefile b/net/unix/Makefile
index b663c607b1c6..ffd0a275c3a7 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux unix domain socket layer.
#
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7b52a380d710..a9ee634f3c42 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -814,6 +814,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
*/
case SOCK_RAW:
sock->type = SOCK_DGRAM;
+ /* fall through */
case SOCK_DGRAM:
sock->ops = &unix_dgram_ops;
break;
@@ -1528,26 +1529,13 @@ static inline bool too_many_unix_fds(struct task_struct *p)
return false;
}
-#define MAX_RECURSION_LEVEL 4
-
static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
int i;
- unsigned char max_level = 0;
if (too_many_unix_fds(current))
return -ETOOMANYREFS;
- for (i = scm->fp->count - 1; i >= 0; i--) {
- struct sock *sk = unix_get_socket(scm->fp->fp[i]);
-
- if (sk)
- max_level = max(max_level,
- unix_sk(sk)->recursion_level);
- }
- if (unlikely(max_level > MAX_RECURSION_LEVEL))
- return -ETOOMANYREFS;
-
/*
* Need to duplicate file references for the sake of garbage
* collection. Otherwise a socket in the fps might become a
@@ -1559,7 +1547,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
for (i = scm->fp->count - 1; i >= 0; i--)
unix_inflight(scm->fp->user, scm->fp->fp[i]);
- return max_level;
+ return 0;
}
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1649,7 +1637,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
struct sk_buff *skb;
long timeo;
struct scm_cookie scm;
- int max_level;
int data_len = 0;
int sk_locked;
@@ -1701,7 +1688,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
err = unix_scm_to_skb(&scm, skb, true);
if (err < 0)
goto out_free;
- max_level = err + 1;
skb_put(skb, len - data_len);
skb->data_len = data_len;
@@ -1819,8 +1805,6 @@ restart_locked:
__net_timestamp(skb);
maybe_add_creds(skb, sock, other);
skb_queue_tail(&other->sk_receive_queue, skb);
- if (max_level > unix_sk(other)->recursion_level)
- unix_sk(other)->recursion_level = max_level;
unix_state_unlock(other);
other->sk_data_ready(other);
sock_put(other);
@@ -1855,7 +1839,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
int sent = 0;
struct scm_cookie scm;
bool fds_sent = false;
- int max_level;
int data_len;
wait_for_unix_gc();
@@ -1905,7 +1888,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
kfree_skb(skb);
goto out_err;
}
- max_level = err + 1;
fds_sent = true;
skb_put(skb, size - data_len);
@@ -1925,8 +1907,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
maybe_add_creds(skb, sock, other);
skb_queue_tail(&other->sk_receive_queue, skb);
- if (max_level > unix_sk(other)->recursion_level)
- unix_sk(other)->recursion_level = max_level;
unix_state_unlock(other);
other->sk_data_ready(other);
sent += size;
@@ -2304,10 +2284,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
*/
mutex_lock(&u->iolock);
- if (flags & MSG_PEEK)
- skip = sk_peek_offset(sk, flags);
- else
- skip = 0;
+ skip = max(sk_peek_offset(sk, flags), 0);
do {
int chunk;
@@ -2324,7 +2301,6 @@ redo:
last_len = last ? last->len : 0;
again:
if (skb == NULL) {
- unix_sk(sk)->recursion_level = 0;
if (copied >= target)
goto unlock;
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 4d9679701a6d..384c84e83462 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -257,6 +257,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb,
err = -ENOENT;
if (sk == NULL)
goto out_nosk;
+ if (!net_eq(sock_net(sk), net))
+ goto out;
err = sock_diag_check_cookie(sk, req->udiag_cookie);
if (err)
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
index 8831e7c42167..970f96489fe7 100644
--- a/net/vmw_vsock/Kconfig
+++ b/net/vmw_vsock/Kconfig
@@ -15,6 +15,16 @@ config VSOCKETS
To compile this driver as a module, choose M here: the module
will be called vsock. If unsure, say N.
+config VSOCKETS_DIAG
+ tristate "Virtual Sockets monitoring interface"
+ depends on VSOCKETS
+ default y
+ help
+ Support for PF_VSOCK sockets monitoring interface used by the ss tool.
+ If unsure, say Y.
+
+ Enable this module so userspace applications can query open sockets.
+
config VMWARE_VMCI_VSOCKETS
tristate "VMware VMCI transport for Virtual Sockets"
depends on VSOCKETS && VMWARE_VMCI
@@ -46,3 +56,15 @@ config VIRTIO_VSOCKETS_COMMON
This option is selected by any driver which needs to access
the virtio_vsock. The module will be called
vmw_vsock_virtio_transport_common.
+
+config HYPERV_VSOCKETS
+ tristate "Hyper-V transport for Virtual Sockets"
+ depends on VSOCKETS && HYPERV
+ help
+ This module implements a Hyper-V transport for Virtual Sockets.
+
+ Enable this transport if your Virtual Machine host supports Virtual
+ Sockets over Hyper-V VMBus.
+
+ To compile this driver as a module, choose M here: the module will be
+ called hv_sock. If unsure, say N.
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index 09fc2eb29dc8..7c6f9a0b67b0 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -1,13 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_VSOCKETS) += vsock.o
+obj-$(CONFIG_VSOCKETS_DIAG) += vsock_diag.o
obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
+obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o
vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o
+vsock_diag-y += diag.o
+
vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
vmci_transport_notify_qstate.o
vmw_vsock_virtio_transport-y += virtio_transport.o
vmw_vsock_virtio_transport_common-y += virtio_transport_common.o
+
+hv_sock-y += hyperv_transport.o
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index dfc8c51e4d74..5d28abf87fbf 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -36,7 +36,7 @@
* not support simultaneous connects (two "client" sockets connecting).
*
* - "Server" sockets are referred to as listener sockets throughout this
- * implementation because they are in the VSOCK_SS_LISTEN state. When a
+ * implementation because they are in the TCP_LISTEN state. When a
* connection request is received (the second kind of socket mentioned above),
* we create a new socket and refer to it as a pending socket. These pending
* sockets are placed on the pending connection list of the listener socket.
@@ -82,6 +82,15 @@
* argument, we must ensure the reference count is increased to ensure the
* socket isn't freed before the function is run; the deferred function will
* then drop the reference.
+ *
+ * - sk->sk_state uses the TCP state constants because they are widely used by
+ * other address families and exposed to userspace tools like ss(8):
+ *
+ * TCP_CLOSE - unconnected
+ * TCP_SYN_SENT - connecting
+ * TCP_ESTABLISHED - connected
+ * TCP_CLOSING - disconnecting
+ * TCP_LISTEN - listening
*/
#include <linux/types.h>
@@ -153,7 +162,6 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
* vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function
* mods with VSOCK_HASH_SIZE to ensure this.
*/
-#define VSOCK_HASH_SIZE 251
#define MAX_PORT_RETRIES 24
#define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE)
@@ -168,9 +176,12 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
#define vsock_connected_sockets_vsk(vsk) \
vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr)
-static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
-static struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
-static DEFINE_SPINLOCK(vsock_table_lock);
+struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
+EXPORT_SYMBOL_GPL(vsock_bind_table);
+struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
+EXPORT_SYMBOL_GPL(vsock_connected_table);
+DEFINE_SPINLOCK(vsock_table_lock);
+EXPORT_SYMBOL_GPL(vsock_table_lock);
/* Autobind this socket to the local address if necessary. */
static int vsock_auto_bind(struct vsock_sock *vsk)
@@ -184,7 +195,7 @@ static int vsock_auto_bind(struct vsock_sock *vsk)
return __vsock_bind(sk, &local_addr);
}
-static void vsock_init_tables(void)
+static int __init vsock_init_tables(void)
{
int i;
@@ -193,6 +204,7 @@ static void vsock_init_tables(void)
for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
INIT_LIST_HEAD(&vsock_connected_table[i]);
+ return 0;
}
static void __vsock_insert_bound(struct list_head *list,
@@ -248,16 +260,6 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
return NULL;
}
-static bool __vsock_in_bound_table(struct vsock_sock *vsk)
-{
- return !list_empty(&vsk->bound_table);
-}
-
-static bool __vsock_in_connected_table(struct vsock_sock *vsk)
-{
- return !list_empty(&vsk->connected_table);
-}
-
static void vsock_insert_unbound(struct vsock_sock *vsk)
{
spin_lock_bh(&vsock_table_lock);
@@ -485,7 +487,7 @@ void vsock_pending_work(struct work_struct *work)
if (vsock_in_connected_table(vsk))
vsock_remove_connected(vsk);
- sk->sk_state = SS_FREE;
+ sk->sk_state = TCP_CLOSE;
out:
release_sock(sk);
@@ -625,7 +627,6 @@ struct sock *__vsock_create(struct net *net,
sk->sk_destruct = vsock_sk_destruct;
sk->sk_backlog_rcv = vsock_queue_rcv_skb;
- sk->sk_state = 0;
sock_reset_flag(sk, SOCK_DONE);
INIT_LIST_HEAD(&vsk->bound_table);
@@ -899,7 +900,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
/* Listening sockets that have connections in their accept
* queue can be read.
*/
- if (sk->sk_state == VSOCK_SS_LISTEN
+ if (sk->sk_state == TCP_LISTEN
&& !vsock_is_accept_queue_empty(sk))
mask |= POLLIN | POLLRDNORM;
@@ -928,7 +929,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
}
/* Connected sockets that can produce data can be written. */
- if (sk->sk_state == SS_CONNECTED) {
+ if (sk->sk_state == TCP_ESTABLISHED) {
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
bool space_avail_now = false;
int ret = transport->notify_poll_out(
@@ -950,7 +951,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
* POLLOUT|POLLWRNORM when peer is closed and nothing to read,
* but local send is not shutdown.
*/
- if (sk->sk_state == SS_UNCONNECTED) {
+ if (sk->sk_state == TCP_CLOSE) {
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
mask |= POLLOUT | POLLWRNORM;
@@ -1120,9 +1121,9 @@ static void vsock_connect_timeout(struct work_struct *work)
sk = sk_vsock(vsk);
lock_sock(sk);
- if (sk->sk_state == SS_CONNECTING &&
+ if (sk->sk_state == TCP_SYN_SENT &&
(sk->sk_shutdown != SHUTDOWN_MASK)) {
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = ETIMEDOUT;
sk->sk_error_report(sk);
cancel = 1;
@@ -1168,7 +1169,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
err = -EALREADY;
break;
default:
- if ((sk->sk_state == VSOCK_SS_LISTEN) ||
+ if ((sk->sk_state == TCP_LISTEN) ||
vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
err = -EINVAL;
goto out;
@@ -1191,7 +1192,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (err)
goto out;
- sk->sk_state = SS_CONNECTING;
+ sk->sk_state = TCP_SYN_SENT;
err = transport->connect(vsk);
if (err < 0)
@@ -1211,7 +1212,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
timeout = vsk->connect_timeout;
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) {
+ while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) {
if (flags & O_NONBLOCK) {
/* If we're not going to block, we schedule a timeout
* function to generate a timeout on the connection
@@ -1234,13 +1235,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
vsock_transport_cancel_pkt(vsk);
goto out_wait;
} else if (timeout == 0) {
err = -ETIMEDOUT;
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
vsock_transport_cancel_pkt(vsk);
goto out_wait;
@@ -1251,7 +1252,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (sk->sk_err) {
err = -sk->sk_err;
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
} else {
err = 0;
@@ -1284,7 +1285,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
goto out;
}
- if (listener->sk_state != VSOCK_SS_LISTEN) {
+ if (listener->sk_state != TCP_LISTEN) {
err = -EINVAL;
goto out;
}
@@ -1374,7 +1375,7 @@ static int vsock_listen(struct socket *sock, int backlog)
}
sk->sk_max_ack_backlog = backlog;
- sk->sk_state = VSOCK_SS_LISTEN;
+ sk->sk_state = TCP_LISTEN;
err = 0;
@@ -1554,7 +1555,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
/* Callers should not provide a destination with stream sockets. */
if (msg->msg_namelen) {
- err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP;
+ err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
goto out;
}
@@ -1565,7 +1566,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out;
}
- if (sk->sk_state != SS_CONNECTED ||
+ if (sk->sk_state != TCP_ESTABLISHED ||
!vsock_addr_bound(&vsk->local_addr)) {
err = -ENOTCONN;
goto out;
@@ -1689,7 +1690,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
lock_sock(sk);
- if (sk->sk_state != SS_CONNECTED) {
+ if (sk->sk_state != TCP_ESTABLISHED) {
/* Recvmsg is supposed to return 0 if a peer performs an
* orderly shutdown. Differentiate between that case and when a
* peer has not connected or a local shutdown occured with the
@@ -1957,8 +1958,6 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
vsock_proto.owner = owner;
transport = t;
- vsock_init_tables();
-
vsock_device.minor = MISC_DYNAMIC_MINOR;
err = misc_register(&vsock_device);
if (err) {
@@ -2019,6 +2018,8 @@ const struct vsock_transport *vsock_core_get_transport(void)
}
EXPORT_SYMBOL_GPL(vsock_core_get_transport);
+module_init(vsock_init_tables);
+
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Virtual Socket Family");
MODULE_VERSION("1.0.2.0-k");
diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c
new file mode 100644
index 000000000000..31b567652250
--- /dev/null
+++ b/net/vmw_vsock/diag.c
@@ -0,0 +1,186 @@
+/*
+ * vsock sock_diag(7) module
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/sock_diag.h>
+#include <linux/vm_sockets_diag.h>
+#include <net/af_vsock.h>
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+ u32 portid, u32 seq, u32 flags)
+{
+ struct vsock_sock *vsk = vsock_sk(sk);
+ struct vsock_diag_msg *rep;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
+ flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ rep = nlmsg_data(nlh);
+ rep->vdiag_family = AF_VSOCK;
+
+ /* Lock order dictates that sk_lock is acquired before
+ * vsock_table_lock, so we cannot lock here. Simply don't take
+ * sk_lock; sk is guaranteed to stay alive since vsock_table_lock is
+ * held.
+ */
+ rep->vdiag_type = sk->sk_type;
+ rep->vdiag_state = sk->sk_state;
+ rep->vdiag_shutdown = sk->sk_shutdown;
+ rep->vdiag_src_cid = vsk->local_addr.svm_cid;
+ rep->vdiag_src_port = vsk->local_addr.svm_port;
+ rep->vdiag_dst_cid = vsk->remote_addr.svm_cid;
+ rep->vdiag_dst_port = vsk->remote_addr.svm_port;
+ rep->vdiag_ino = sock_i_ino(sk);
+
+ sock_diag_save_cookie(sk, rep->vdiag_cookie);
+
+ return 0;
+}
+
+static int vsock_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct vsock_diag_req *req;
+ struct vsock_sock *vsk;
+ unsigned int bucket;
+ unsigned int last_i;
+ unsigned int table;
+ struct net *net;
+ unsigned int i;
+
+ req = nlmsg_data(cb->nlh);
+ net = sock_net(skb->sk);
+
+ /* State saved between calls: */
+ table = cb->args[0];
+ bucket = cb->args[1];
+ i = last_i = cb->args[2];
+
+ /* TODO VMCI pending sockets? */
+
+ spin_lock_bh(&vsock_table_lock);
+
+ /* Bind table (locally created sockets) */
+ if (table == 0) {
+ while (bucket < ARRAY_SIZE(vsock_bind_table)) {
+ struct list_head *head = &vsock_bind_table[bucket];
+
+ i = 0;
+ list_for_each_entry(vsk, head, bound_table) {
+ struct sock *sk = sk_vsock(vsk);
+
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (i < last_i)
+ goto next_bind;
+ if (!(req->vdiag_states & (1 << sk->sk_state)))
+ goto next_bind;
+ if (sk_diag_fill(sk, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI) < 0)
+ goto done;
+next_bind:
+ i++;
+ }
+ last_i = 0;
+ bucket++;
+ }
+
+ table++;
+ bucket = 0;
+ }
+
+ /* Connected table (accepted connections) */
+ while (bucket < ARRAY_SIZE(vsock_connected_table)) {
+ struct list_head *head = &vsock_connected_table[bucket];
+
+ i = 0;
+ list_for_each_entry(vsk, head, connected_table) {
+ struct sock *sk = sk_vsock(vsk);
+
+ /* Skip sockets we've already seen above */
+ if (__vsock_in_bound_table(vsk))
+ continue;
+
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (i < last_i)
+ goto next_connected;
+ if (!(req->vdiag_states & (1 << sk->sk_state)))
+ goto next_connected;
+ if (sk_diag_fill(sk, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI) < 0)
+ goto done;
+next_connected:
+ i++;
+ }
+ last_i = 0;
+ bucket++;
+ }
+
+done:
+ spin_unlock_bh(&vsock_table_lock);
+
+ cb->args[0] = table;
+ cb->args[1] = bucket;
+ cb->args[2] = i;
+
+ return skb->len;
+}
+
+static int vsock_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+ int hdrlen = sizeof(struct vsock_diag_req);
+ struct net *net = sock_net(skb->sk);
+
+ if (nlmsg_len(h) < hdrlen)
+ return -EINVAL;
+
+ if (h->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = vsock_diag_dump,
+ };
+ return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler vsock_diag_handler = {
+ .family = AF_VSOCK,
+ .dump = vsock_diag_handler_dump,
+};
+
+static int __init vsock_diag_init(void)
+{
+ return sock_diag_register(&vsock_diag_handler);
+}
+
+static void __exit vsock_diag_exit(void)
+{
+ sock_diag_unregister(&vsock_diag_handler);
+}
+
+module_init(vsock_diag_init);
+module_exit(vsock_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG,
+ 40 /* AF_VSOCK */);
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
new file mode 100644
index 000000000000..5583df708b8c
--- /dev/null
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -0,0 +1,917 @@
+/*
+ * Hyper-V transport for vsock
+ *
+ * Hyper-V Sockets supplies a byte-stream based communication mechanism
+ * between the host and the VM. This driver implements the necessary
+ * support in the VM by introducing the new vsock transport.
+ *
+ * Copyright (c) 2017, Microsoft Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/hyperv.h>
+#include <net/sock.h>
+#include <net/af_vsock.h>
+
+/* The host side's design of the feature requires 6 exact 4KB pages for
+ * recv/send rings respectively -- this is suboptimal considering memory
+ * consumption, however unluckily we have to live with it, before the
+ * host comes up with a better design in the future.
+ */
+#define PAGE_SIZE_4K 4096
+#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
+#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
+
+/* The MTU is 16KB per the host side's design */
+#define HVS_MTU_SIZE (1024 * 16)
+
+struct vmpipe_proto_header {
+ u32 pkt_type;
+ u32 data_size;
+};
+
+/* For recv, we use the VMBus in-place packet iterator APIs to directly copy
+ * data from the ringbuffer into the userspace buffer.
+ */
+struct hvs_recv_buf {
+ /* The header before the payload data */
+ struct vmpipe_proto_header hdr;
+
+ /* The payload */
+ u8 data[HVS_MTU_SIZE];
+};
+
+/* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
+ * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated
+ * buffer, because tests show there is no significant performance difference.
+ *
+ * Note: the buffer can be eliminated in the future when we add new VMBus
+ * ringbuffer APIs that allow us to directly copy data from userspace buffer
+ * to VMBus ringbuffer.
+ */
+#define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header))
+
+struct hvs_send_buf {
+ /* The header before the payload data */
+ struct vmpipe_proto_header hdr;
+
+ /* The payload */
+ u8 data[HVS_SEND_BUF_SIZE];
+};
+
+#define HVS_HEADER_LEN (sizeof(struct vmpacket_descriptor) + \
+ sizeof(struct vmpipe_proto_header))
+
+/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write(), and
+ * __hv_pkt_iter_next().
+ */
+#define VMBUS_PKT_TRAILER_SIZE (sizeof(u64))
+
+#define HVS_PKT_LEN(payload_len) (HVS_HEADER_LEN + \
+ ALIGN((payload_len), 8) + \
+ VMBUS_PKT_TRAILER_SIZE)
+
+union hvs_service_id {
+ uuid_le srv_id;
+
+ struct {
+ unsigned int svm_port;
+ unsigned char b[sizeof(uuid_le) - sizeof(unsigned int)];
+ };
+};
+
+/* Per-socket state (accessed via vsk->trans) */
+struct hvsock {
+ struct vsock_sock *vsk;
+
+ uuid_le vm_srv_id;
+ uuid_le host_srv_id;
+
+ struct vmbus_channel *chan;
+ struct vmpacket_descriptor *recv_desc;
+
+ /* The length of the payload not delivered to userland yet */
+ u32 recv_data_len;
+ /* The offset of the payload */
+ u32 recv_data_off;
+
+ /* Have we sent the zero-length packet (FIN)? */
+ bool fin_sent;
+};
+
+/* In the VM, we support Hyper-V Sockets with AF_VSOCK, and the endpoint is
+ * <cid, port> (see struct sockaddr_vm). Note: cid is not really used here:
+ * when we write apps to connect to the host, we can only use VMADDR_CID_ANY
+ * or VMADDR_CID_HOST (both are equivalent) as the remote cid, and when we
+ * write apps to bind() & listen() in the VM, we can only use VMADDR_CID_ANY
+ * as the local cid.
+ *
+ * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
+ * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
+ * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
+ * the below sockaddr:
+ *
+ * struct SOCKADDR_HV
+ * {
+ * ADDRESS_FAMILY Family;
+ * USHORT Reserved;
+ * GUID VmId;
+ * GUID ServiceId;
+ * };
+ * Note: VmID is not used by Linux VM and actually it isn't transmitted via
+ * VMBus, because here it's obvious the host and the VM can easily identify
+ * each other. Though the VmID is useful on the host, especially in the case
+ * of Windows container, Linux VM doesn't need it at all.
+ *
+ * To make use of the AF_VSOCK infrastructure in Linux VM, we have to limit
+ * the available GUID space of SOCKADDR_HV so that we can create a mapping
+ * between AF_VSOCK port and SOCKADDR_HV Service GUID. The rule of writing
+ * Hyper-V Sockets apps on the host and in Linux VM is:
+ *
+ ****************************************************************************
+ * The only valid Service GUIDs, from the perspectives of both the host and *
+ * Linux VM, that can be connected by the other end, must conform to this *
+ * format: <port>-facb-11e6-bd58-64006a7986d3, and the "port" must be in *
+ * this range [0, 0x7FFFFFFF]. *
+ ****************************************************************************
+ *
+ * When we write apps on the host to connect(), the GUID ServiceID is used.
+ * When we write apps in Linux VM to connect(), we only need to specify the
+ * port and the driver will form the GUID and use that to request the host.
+ *
+ * From the perspective of Linux VM:
+ * 1. the local ephemeral port (i.e. the local auto-bound port when we call
+ * connect() without explicit bind()) is generated by __vsock_bind_stream(),
+ * and the range is [1024, 0xFFFFFFFF).
+ * 2. the remote ephemeral port (i.e. the auto-generated remote port for
+ * a connect request initiated by the host's connect()) is generated by
+ * hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF).
+ */
+
+#define MAX_LISTEN_PORT ((u32)0x7FFFFFFF)
+#define MAX_VM_LISTEN_PORT MAX_LISTEN_PORT
+#define MAX_HOST_LISTEN_PORT MAX_LISTEN_PORT
+#define MIN_HOST_EPHEMERAL_PORT (MAX_HOST_LISTEN_PORT + 1)
+
+/* 00000000-facb-11e6-bd58-64006a7986d3 */
+static const uuid_le srv_id_template =
+ UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
+ 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
+
+static bool is_valid_srv_id(const uuid_le *id)
+{
+ return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(uuid_le) - 4);
+}
+
+static unsigned int get_port_by_srv_id(const uuid_le *svr_id)
+{
+ return *((unsigned int *)svr_id);
+}
+
+static void hvs_addr_init(struct sockaddr_vm *addr, const uuid_le *svr_id)
+{
+ unsigned int port = get_port_by_srv_id(svr_id);
+
+ vsock_addr_init(addr, VMADDR_CID_ANY, port);
+}
+
+static void hvs_remote_addr_init(struct sockaddr_vm *remote,
+ struct sockaddr_vm *local)
+{
+ static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
+ struct sock *sk;
+
+ vsock_addr_init(remote, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+
+ while (1) {
+ /* Wrap around ? */
+ if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT ||
+ host_ephemeral_port == VMADDR_PORT_ANY)
+ host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
+
+ remote->svm_port = host_ephemeral_port++;
+
+ sk = vsock_find_connected_socket(remote, local);
+ if (!sk) {
+ /* Found an available ephemeral port */
+ return;
+ }
+
+ /* Release refcnt got in vsock_find_connected_socket */
+ sock_put(sk);
+ }
+}
+
+static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
+{
+ set_channel_pending_send_size(chan,
+ HVS_PKT_LEN(HVS_SEND_BUF_SIZE));
+
+ /* See hvs_stream_has_space(): we must make sure the host has seen
+ * the new pending send size, before we can re-check the writable
+ * bytes.
+ */
+ virt_mb();
+}
+
+static void hvs_clear_channel_pending_send_size(struct vmbus_channel *chan)
+{
+ set_channel_pending_send_size(chan, 0);
+
+ /* Ditto */
+ virt_mb();
+}
+
+static bool hvs_channel_readable(struct vmbus_channel *chan)
+{
+ u32 readable = hv_get_bytes_to_read(&chan->inbound);
+
+ /* 0-size payload means FIN */
+ return readable >= HVS_PKT_LEN(0);
+}
+
+static int hvs_channel_readable_payload(struct vmbus_channel *chan)
+{
+ u32 readable = hv_get_bytes_to_read(&chan->inbound);
+
+ if (readable > HVS_PKT_LEN(0)) {
+ /* At least we have 1 byte to read. We don't need to return
+ * the exact readable bytes: see vsock_stream_recvmsg() ->
+ * vsock_stream_has_data().
+ */
+ return 1;
+ }
+
+ if (readable == HVS_PKT_LEN(0)) {
+ /* 0-size payload means FIN */
+ return 0;
+ }
+
+ /* No payload or FIN */
+ return -1;
+}
+
+static size_t hvs_channel_writable_bytes(struct vmbus_channel *chan)
+{
+ u32 writeable = hv_get_bytes_to_write(&chan->outbound);
+ size_t ret;
+
+ /* The ringbuffer mustn't be 100% full, and we should reserve a
+ * zero-length-payload packet for the FIN: see hv_ringbuffer_write()
+ * and hvs_shutdown().
+ */
+ if (writeable <= HVS_PKT_LEN(1) + HVS_PKT_LEN(0))
+ return 0;
+
+ ret = writeable - HVS_PKT_LEN(1) - HVS_PKT_LEN(0);
+
+ return round_down(ret, 8);
+}
+
+static int hvs_send_data(struct vmbus_channel *chan,
+ struct hvs_send_buf *send_buf, size_t to_write)
+{
+ send_buf->hdr.pkt_type = 1;
+ send_buf->hdr.data_size = to_write;
+ return vmbus_sendpacket(chan, &send_buf->hdr,
+ sizeof(send_buf->hdr) + to_write,
+ 0, VM_PKT_DATA_INBAND, 0);
+}
+
+static void hvs_channel_cb(void *ctx)
+{
+ struct sock *sk = (struct sock *)ctx;
+ struct vsock_sock *vsk = vsock_sk(sk);
+ struct hvsock *hvs = vsk->trans;
+ struct vmbus_channel *chan = hvs->chan;
+
+ if (hvs_channel_readable(chan))
+ sk->sk_data_ready(sk);
+
+ /* See hvs_stream_has_space(): when we reach here, the writable bytes
+ * may be already less than HVS_PKT_LEN(HVS_SEND_BUF_SIZE).
+ */
+ if (hv_get_bytes_to_write(&chan->outbound) > 0)
+ sk->sk_write_space(sk);
+}
+
+static void hvs_close_connection(struct vmbus_channel *chan)
+{
+ struct sock *sk = get_per_channel_state(chan);
+ struct vsock_sock *vsk = vsock_sk(sk);
+
+ lock_sock(sk);
+
+ sk->sk_state = TCP_CLOSE;
+ sock_set_flag(sk, SOCK_DONE);
+ vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
+
+ sk->sk_state_change(sk);
+
+ release_sock(sk);
+}
+
+static void hvs_open_connection(struct vmbus_channel *chan)
+{
+ uuid_le *if_instance, *if_type;
+ unsigned char conn_from_host;
+
+ struct sockaddr_vm addr;
+ struct sock *sk, *new = NULL;
+ struct vsock_sock *vnew;
+ struct hvsock *hvs, *hvs_new;
+ int ret;
+
+ if_type = &chan->offermsg.offer.if_type;
+ if_instance = &chan->offermsg.offer.if_instance;
+ conn_from_host = chan->offermsg.offer.u.pipe.user_def[0];
+
+ /* The host or the VM should only listen on a port in
+ * [0, MAX_LISTEN_PORT]
+ */
+ if (!is_valid_srv_id(if_type) ||
+ get_port_by_srv_id(if_type) > MAX_LISTEN_PORT)
+ return;
+
+ hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
+ sk = vsock_find_bound_socket(&addr);
+ if (!sk)
+ return;
+
+ lock_sock(sk);
+ if ((conn_from_host && sk->sk_state != TCP_LISTEN) ||
+ (!conn_from_host && sk->sk_state != TCP_SYN_SENT))
+ goto out;
+
+ if (conn_from_host) {
+ if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog)
+ goto out;
+
+ new = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
+ sk->sk_type, 0);
+ if (!new)
+ goto out;
+
+ new->sk_state = TCP_SYN_SENT;
+ vnew = vsock_sk(new);
+ hvs_new = vnew->trans;
+ hvs_new->chan = chan;
+ } else {
+ hvs = vsock_sk(sk)->trans;
+ hvs->chan = chan;
+ }
+
+ set_channel_read_mode(chan, HV_CALL_DIRECT);
+ ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
+ RINGBUFFER_HVS_RCV_SIZE, NULL, 0,
+ hvs_channel_cb, conn_from_host ? new : sk);
+ if (ret != 0) {
+ if (conn_from_host) {
+ hvs_new->chan = NULL;
+ sock_put(new);
+ } else {
+ hvs->chan = NULL;
+ }
+ goto out;
+ }
+
+ set_per_channel_state(chan, conn_from_host ? new : sk);
+ vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
+
+ if (conn_from_host) {
+ new->sk_state = TCP_ESTABLISHED;
+ sk->sk_ack_backlog++;
+
+ hvs_addr_init(&vnew->local_addr, if_type);
+ hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
+
+ hvs_new->vm_srv_id = *if_type;
+ hvs_new->host_srv_id = *if_instance;
+
+ vsock_insert_connected(vnew);
+
+ vsock_enqueue_accept(sk, new);
+ } else {
+ sk->sk_state = TCP_ESTABLISHED;
+ sk->sk_socket->state = SS_CONNECTED;
+
+ vsock_insert_connected(vsock_sk(sk));
+ }
+
+ sk->sk_state_change(sk);
+
+out:
+ /* Release refcnt obtained when we called vsock_find_bound_socket() */
+ sock_put(sk);
+
+ release_sock(sk);
+}
+
+static u32 hvs_get_local_cid(void)
+{
+ return VMADDR_CID_ANY;
+}
+
+static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
+{
+ struct hvsock *hvs;
+
+ hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
+ if (!hvs)
+ return -ENOMEM;
+
+ vsk->trans = hvs;
+ hvs->vsk = vsk;
+
+ return 0;
+}
+
+static int hvs_connect(struct vsock_sock *vsk)
+{
+ union hvs_service_id vm, host;
+ struct hvsock *h = vsk->trans;
+
+ vm.srv_id = srv_id_template;
+ vm.svm_port = vsk->local_addr.svm_port;
+ h->vm_srv_id = vm.srv_id;
+
+ host.srv_id = srv_id_template;
+ host.svm_port = vsk->remote_addr.svm_port;
+ h->host_srv_id = host.srv_id;
+
+ return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
+}
+
+static int hvs_shutdown(struct vsock_sock *vsk, int mode)
+{
+ struct sock *sk = sk_vsock(vsk);
+ struct vmpipe_proto_header hdr;
+ struct hvs_send_buf *send_buf;
+ struct hvsock *hvs;
+
+ if (!(mode & SEND_SHUTDOWN))
+ return 0;
+
+ lock_sock(sk);
+
+ hvs = vsk->trans;
+ if (hvs->fin_sent)
+ goto out;
+
+ send_buf = (struct hvs_send_buf *)&hdr;
+
+ /* It can't fail: see hvs_channel_writable_bytes(). */
+ (void)hvs_send_data(hvs->chan, send_buf, 0);
+
+ hvs->fin_sent = true;
+out:
+ release_sock(sk);
+ return 0;
+}
+
+static void hvs_release(struct vsock_sock *vsk)
+{
+ struct sock *sk = sk_vsock(vsk);
+ struct hvsock *hvs = vsk->trans;
+ struct vmbus_channel *chan;
+
+ lock_sock(sk);
+
+ sk->sk_state = SS_DISCONNECTING;
+ vsock_remove_sock(vsk);
+
+ release_sock(sk);
+
+ chan = hvs->chan;
+ if (chan)
+ hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
+
+}
+
+static void hvs_destruct(struct vsock_sock *vsk)
+{
+ struct hvsock *hvs = vsk->trans;
+ struct vmbus_channel *chan = hvs->chan;
+
+ if (chan)
+ vmbus_hvsock_device_unregister(chan);
+
+ kfree(hvs);
+}
+
+static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr)
+{
+ return -EOPNOTSUPP;
+}
+
+static int hvs_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
+ size_t len, int flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static int hvs_dgram_enqueue(struct vsock_sock *vsk,
+ struct sockaddr_vm *remote, struct msghdr *msg,
+ size_t dgram_len)
+{
+ return -EOPNOTSUPP;
+}
+
+static bool hvs_dgram_allow(u32 cid, u32 port)
+{
+ return false;
+}
+
+static int hvs_update_recv_data(struct hvsock *hvs)
+{
+ struct hvs_recv_buf *recv_buf;
+ u32 payload_len;
+
+ recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
+ payload_len = recv_buf->hdr.data_size;
+
+ if (payload_len > HVS_MTU_SIZE)
+ return -EIO;
+
+ if (payload_len == 0)
+ hvs->vsk->peer_shutdown |= SEND_SHUTDOWN;
+
+ hvs->recv_data_len = payload_len;
+ hvs->recv_data_off = 0;
+
+ return 0;
+}
+
+static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
+ size_t len, int flags)
+{
+ struct hvsock *hvs = vsk->trans;
+ bool need_refill = !hvs->recv_desc;
+ struct hvs_recv_buf *recv_buf;
+ u32 to_read;
+ int ret;
+
+ if (flags & MSG_PEEK)
+ return -EOPNOTSUPP;
+
+ if (need_refill) {
+ hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
+ ret = hvs_update_recv_data(hvs);
+ if (ret)
+ return ret;
+ }
+
+ recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
+ to_read = min_t(u32, len, hvs->recv_data_len);
+ ret = memcpy_to_msg(msg, recv_buf->data + hvs->recv_data_off, to_read);
+ if (ret != 0)
+ return ret;
+
+ hvs->recv_data_len -= to_read;
+ if (hvs->recv_data_len == 0) {
+ hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
+ if (hvs->recv_desc) {
+ ret = hvs_update_recv_data(hvs);
+ if (ret)
+ return ret;
+ }
+ } else {
+ hvs->recv_data_off += to_read;
+ }
+
+ return to_read;
+}
+
+static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
+ size_t len)
+{
+ struct hvsock *hvs = vsk->trans;
+ struct vmbus_channel *chan = hvs->chan;
+ struct hvs_send_buf *send_buf;
+ ssize_t to_write, max_writable, ret;
+
+ BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
+
+ send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL);
+ if (!send_buf)
+ return -ENOMEM;
+
+ max_writable = hvs_channel_writable_bytes(chan);
+ to_write = min_t(ssize_t, len, max_writable);
+ to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
+
+ ret = memcpy_from_msg(send_buf->data, msg, to_write);
+ if (ret < 0)
+ goto out;
+
+ ret = hvs_send_data(hvs->chan, send_buf, to_write);
+ if (ret < 0)
+ goto out;
+
+ ret = to_write;
+out:
+ kfree(send_buf);
+ return ret;
+}
+
+static s64 hvs_stream_has_data(struct vsock_sock *vsk)
+{
+ struct hvsock *hvs = vsk->trans;
+ s64 ret;
+
+ if (hvs->recv_data_len > 0)
+ return 1;
+
+ switch (hvs_channel_readable_payload(hvs->chan)) {
+ case 1:
+ ret = 1;
+ break;
+ case 0:
+ vsk->peer_shutdown |= SEND_SHUTDOWN;
+ ret = 0;
+ break;
+ default: /* -1 */
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+static s64 hvs_stream_has_space(struct vsock_sock *vsk)
+{
+ struct hvsock *hvs = vsk->trans;
+ struct vmbus_channel *chan = hvs->chan;
+ s64 ret;
+
+ ret = hvs_channel_writable_bytes(chan);
+ if (ret > 0) {
+ hvs_clear_channel_pending_send_size(chan);
+ } else {
+ /* See hvs_channel_cb() */
+ hvs_set_channel_pending_send_size(chan);
+
+ /* Re-check the writable bytes to avoid race */
+ ret = hvs_channel_writable_bytes(chan);
+ if (ret > 0)
+ hvs_clear_channel_pending_send_size(chan);
+ }
+
+ return ret;
+}
+
+static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)
+{
+ return HVS_MTU_SIZE + 1;
+}
+
+static bool hvs_stream_is_active(struct vsock_sock *vsk)
+{
+ struct hvsock *hvs = vsk->trans;
+
+ return hvs->chan != NULL;
+}
+
+static bool hvs_stream_allow(u32 cid, u32 port)
+{
+ /* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is
+ * reserved as ephemeral ports, which are used as the host's ports
+ * when the host initiates connections.
+ *
+ * Perform this check in the guest so an immediate error is produced
+ * instead of a timeout.
+ */
+ if (port > MAX_HOST_LISTEN_PORT)
+ return false;
+
+ if (cid == VMADDR_CID_HOST)
+ return true;
+
+ return false;
+}
+
+static
+int hvs_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *readable)
+{
+ struct hvsock *hvs = vsk->trans;
+
+ *readable = hvs_channel_readable(hvs->chan);
+ return 0;
+}
+
+static
+int hvs_notify_poll_out(struct vsock_sock *vsk, size_t target, bool *writable)
+{
+ *writable = hvs_stream_has_space(vsk) > 0;
+
+ return 0;
+}
+
+static
+int hvs_notify_recv_init(struct vsock_sock *vsk, size_t target,
+ struct vsock_transport_recv_notify_data *d)
+{
+ return 0;
+}
+
+static
+int hvs_notify_recv_pre_block(struct vsock_sock *vsk, size_t target,
+ struct vsock_transport_recv_notify_data *d)
+{
+ return 0;
+}
+
+static
+int hvs_notify_recv_pre_dequeue(struct vsock_sock *vsk, size_t target,
+ struct vsock_transport_recv_notify_data *d)
+{
+ return 0;
+}
+
+static
+int hvs_notify_recv_post_dequeue(struct vsock_sock *vsk, size_t target,
+ ssize_t copied, bool data_read,
+ struct vsock_transport_recv_notify_data *d)
+{
+ return 0;
+}
+
+static
+int hvs_notify_send_init(struct vsock_sock *vsk,
+ struct vsock_transport_send_notify_data *d)
+{
+ return 0;
+}
+
+static
+int hvs_notify_send_pre_block(struct vsock_sock *vsk,
+ struct vsock_transport_send_notify_data *d)
+{
+ return 0;
+}
+
+static
+int hvs_notify_send_pre_enqueue(struct vsock_sock *vsk,
+ struct vsock_transport_send_notify_data *d)
+{
+ return 0;
+}
+
+static
+int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
+ struct vsock_transport_send_notify_data *d)
+{
+ return 0;
+}
+
+static void hvs_set_buffer_size(struct vsock_sock *vsk, u64 val)
+{
+ /* Ignored. */
+}
+
+static void hvs_set_min_buffer_size(struct vsock_sock *vsk, u64 val)
+{
+ /* Ignored. */
+}
+
+static void hvs_set_max_buffer_size(struct vsock_sock *vsk, u64 val)
+{
+ /* Ignored. */
+}
+
+static u64 hvs_get_buffer_size(struct vsock_sock *vsk)
+{
+ return -ENOPROTOOPT;
+}
+
+static u64 hvs_get_min_buffer_size(struct vsock_sock *vsk)
+{
+ return -ENOPROTOOPT;
+}
+
+static u64 hvs_get_max_buffer_size(struct vsock_sock *vsk)
+{
+ return -ENOPROTOOPT;
+}
+
+static struct vsock_transport hvs_transport = {
+ .get_local_cid = hvs_get_local_cid,
+
+ .init = hvs_sock_init,
+ .destruct = hvs_destruct,
+ .release = hvs_release,
+ .connect = hvs_connect,
+ .shutdown = hvs_shutdown,
+
+ .dgram_bind = hvs_dgram_bind,
+ .dgram_dequeue = hvs_dgram_dequeue,
+ .dgram_enqueue = hvs_dgram_enqueue,
+ .dgram_allow = hvs_dgram_allow,
+
+ .stream_dequeue = hvs_stream_dequeue,
+ .stream_enqueue = hvs_stream_enqueue,
+ .stream_has_data = hvs_stream_has_data,
+ .stream_has_space = hvs_stream_has_space,
+ .stream_rcvhiwat = hvs_stream_rcvhiwat,
+ .stream_is_active = hvs_stream_is_active,
+ .stream_allow = hvs_stream_allow,
+
+ .notify_poll_in = hvs_notify_poll_in,
+ .notify_poll_out = hvs_notify_poll_out,
+ .notify_recv_init = hvs_notify_recv_init,
+ .notify_recv_pre_block = hvs_notify_recv_pre_block,
+ .notify_recv_pre_dequeue = hvs_notify_recv_pre_dequeue,
+ .notify_recv_post_dequeue = hvs_notify_recv_post_dequeue,
+ .notify_send_init = hvs_notify_send_init,
+ .notify_send_pre_block = hvs_notify_send_pre_block,
+ .notify_send_pre_enqueue = hvs_notify_send_pre_enqueue,
+ .notify_send_post_enqueue = hvs_notify_send_post_enqueue,
+
+ .set_buffer_size = hvs_set_buffer_size,
+ .set_min_buffer_size = hvs_set_min_buffer_size,
+ .set_max_buffer_size = hvs_set_max_buffer_size,
+ .get_buffer_size = hvs_get_buffer_size,
+ .get_min_buffer_size = hvs_get_min_buffer_size,
+ .get_max_buffer_size = hvs_get_max_buffer_size,
+};
+
+static int hvs_probe(struct hv_device *hdev,
+ const struct hv_vmbus_device_id *dev_id)
+{
+ struct vmbus_channel *chan = hdev->channel;
+
+ hvs_open_connection(chan);
+
+ /* Always return success to suppress the unnecessary error message
+ * in vmbus_probe(): on error the host will rescind the device in
+ * 30 seconds and we can do cleanup at that time in
+ * vmbus_onoffer_rescind().
+ */
+ return 0;
+}
+
+static int hvs_remove(struct hv_device *hdev)
+{
+ struct vmbus_channel *chan = hdev->channel;
+
+ vmbus_close(chan);
+
+ return 0;
+}
+
+/* This isn't really used. See vmbus_match() and vmbus_probe() */
+static const struct hv_vmbus_device_id id_table[] = {
+ {},
+};
+
+static struct hv_driver hvs_drv = {
+ .name = "hv_sock",
+ .hvsock = true,
+ .id_table = id_table,
+ .probe = hvs_probe,
+ .remove = hvs_remove,
+};
+
+static int __init hvs_init(void)
+{
+ int ret;
+
+ if (vmbus_proto_version < VERSION_WIN10)
+ return -ENODEV;
+
+ ret = vmbus_driver_register(&hvs_drv);
+ if (ret != 0)
+ return ret;
+
+ ret = vsock_core_init(&hvs_transport);
+ if (ret) {
+ vmbus_driver_unregister(&hvs_drv);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void __exit hvs_exit(void)
+{
+ vsock_core_exit();
+ vmbus_driver_unregister(&hvs_drv);
+}
+
+module_init(hvs_init);
+module_exit(hvs_exit);
+
+MODULE_DESCRIPTION("Hyper-V Sockets");
+MODULE_VERSION("1.0.0");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 403d86e80162..8e03bd3f3668 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -414,7 +414,7 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
static void virtio_vsock_reset_sock(struct sock *sk)
{
lock_sock(sk);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = ECONNRESET;
sk->sk_error_report(sk);
release_sock(sk);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index edba7ab97563..3ae3a33da70b 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -708,7 +708,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk,
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown = SHUTDOWN_MASK;
if (vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = SS_DISCONNECTING;
+ sk->sk_state = TCP_CLOSING;
sk->sk_state_change(sk);
if (vsk->close_work_scheduled &&
@@ -748,8 +748,8 @@ static bool virtio_transport_close(struct vsock_sock *vsk)
{
struct sock *sk = &vsk->sk;
- if (!(sk->sk_state == SS_CONNECTED ||
- sk->sk_state == SS_DISCONNECTING))
+ if (!(sk->sk_state == TCP_ESTABLISHED ||
+ sk->sk_state == TCP_CLOSING))
return true;
/* Already received SHUTDOWN from peer, reply with RST */
@@ -801,7 +801,7 @@ virtio_transport_recv_connecting(struct sock *sk,
switch (le16_to_cpu(pkt->hdr.op)) {
case VIRTIO_VSOCK_OP_RESPONSE:
- sk->sk_state = SS_CONNECTED;
+ sk->sk_state = TCP_ESTABLISHED;
sk->sk_socket->state = SS_CONNECTED;
vsock_insert_connected(vsk);
sk->sk_state_change(sk);
@@ -821,7 +821,7 @@ virtio_transport_recv_connecting(struct sock *sk,
destroy:
virtio_transport_reset(vsk, pkt);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = skerr;
sk->sk_error_report(sk);
return err;
@@ -857,7 +857,7 @@ virtio_transport_recv_connected(struct sock *sk,
vsk->peer_shutdown |= SEND_SHUTDOWN;
if (vsk->peer_shutdown == SHUTDOWN_MASK &&
vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = SS_DISCONNECTING;
+ sk->sk_state = TCP_CLOSING;
if (le32_to_cpu(pkt->hdr.flags))
sk->sk_state_change(sk);
break;
@@ -928,7 +928,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
lock_sock_nested(child, SINGLE_DEPTH_NESTING);
- child->sk_state = SS_CONNECTED;
+ child->sk_state = TCP_ESTABLISHED;
vchild = vsock_sk(child);
vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid),
@@ -1016,18 +1016,18 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
sk->sk_write_space(sk);
switch (sk->sk_state) {
- case VSOCK_SS_LISTEN:
+ case TCP_LISTEN:
virtio_transport_recv_listen(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
- case SS_CONNECTING:
+ case TCP_SYN_SENT:
virtio_transport_recv_connecting(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
- case SS_CONNECTED:
+ case TCP_ESTABLISHED:
virtio_transport_recv_connected(sk, pkt);
break;
- case SS_DISCONNECTING:
+ case TCP_CLOSING:
virtio_transport_recv_disconnecting(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 10ae7823a19d..391775e3575c 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -21,7 +21,6 @@
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/list.h>
-#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/net.h>
@@ -743,7 +742,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
/* The local context ID may be out of date, update it. */
vsk->local_addr.svm_cid = dst.svm_cid;
- if (sk->sk_state == SS_CONNECTED)
+ if (sk->sk_state == TCP_ESTABLISHED)
vmci_trans(vsk)->notify_ops->handle_notify_pkt(
sk, pkt, true, &dst, &src,
&bh_process_pkt);
@@ -801,7 +800,9 @@ static void vmci_transport_handle_detach(struct sock *sk)
* left in our consume queue.
*/
if (vsock_stream_has_data(vsk) <= 0) {
- if (sk->sk_state == SS_CONNECTING) {
+ sk->sk_state = TCP_CLOSE;
+
+ if (sk->sk_state == TCP_SYN_SENT) {
/* The peer may detach from a queue pair while
* we are still in the connecting state, i.e.,
* if the peer VM is killed after attaching to
@@ -810,12 +811,10 @@ static void vmci_transport_handle_detach(struct sock *sk)
* event like a reset.
*/
- sk->sk_state = SS_UNCONNECTED;
sk->sk_err = ECONNRESET;
sk->sk_error_report(sk);
return;
}
- sk->sk_state = SS_UNCONNECTED;
}
sk->sk_state_change(sk);
}
@@ -883,17 +882,17 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work)
vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
switch (sk->sk_state) {
- case VSOCK_SS_LISTEN:
+ case TCP_LISTEN:
vmci_transport_recv_listen(sk, pkt);
break;
- case SS_CONNECTING:
+ case TCP_SYN_SENT:
/* Processing of pending connections for servers goes through
* the listening socket, so see vmci_transport_recv_listen()
* for that path.
*/
vmci_transport_recv_connecting_client(sk, pkt);
break;
- case SS_CONNECTED:
+ case TCP_ESTABLISHED:
vmci_transport_recv_connected(sk, pkt);
break;
default:
@@ -942,7 +941,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context;
switch (pending->sk_state) {
- case SS_CONNECTING:
+ case TCP_SYN_SENT:
err = vmci_transport_recv_connecting_server(sk,
pending,
pkt);
@@ -1072,7 +1071,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
vsock_add_pending(sk, pending);
sk->sk_ack_backlog++;
- pending->sk_state = SS_CONNECTING;
+ pending->sk_state = TCP_SYN_SENT;
vmci_trans(vpending)->produce_size =
vmci_trans(vpending)->consume_size = qp_size;
vmci_trans(vpending)->queue_pair_size = qp_size;
@@ -1197,11 +1196,11 @@ vmci_transport_recv_connecting_server(struct sock *listener,
* the socket will be valid until it is removed from the queue.
*
* If we fail sending the attach below, we remove the socket from the
- * connected list and move the socket to SS_UNCONNECTED before
+ * connected list and move the socket to TCP_CLOSE before
* releasing the lock, so a pending slow path processing of an incoming
* packet will not see the socket in the connected state in that case.
*/
- pending->sk_state = SS_CONNECTED;
+ pending->sk_state = TCP_ESTABLISHED;
vsock_insert_connected(vpending);
@@ -1232,7 +1231,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
destroy:
pending->sk_err = skerr;
- pending->sk_state = SS_UNCONNECTED;
+ pending->sk_state = TCP_CLOSE;
/* As long as we drop our reference, all necessary cleanup will handle
* when the cleanup function drops its reference and our destruct
* implementation is called. Note that since the listen handler will
@@ -1270,7 +1269,7 @@ vmci_transport_recv_connecting_client(struct sock *sk,
* accounting (it can already be found since it's in the bound
* table).
*/
- sk->sk_state = SS_CONNECTED;
+ sk->sk_state = TCP_ESTABLISHED;
sk->sk_socket->state = SS_CONNECTED;
vsock_insert_connected(vsk);
sk->sk_state_change(sk);
@@ -1338,7 +1337,7 @@ vmci_transport_recv_connecting_client(struct sock *sk,
destroy:
vmci_transport_send_reset(sk, pkt);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = skerr;
sk->sk_error_report(sk);
return err;
@@ -1526,7 +1525,7 @@ static int vmci_transport_recv_connected(struct sock *sk,
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown = SHUTDOWN_MASK;
if (vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = SS_DISCONNECTING;
+ sk->sk_state = TCP_CLOSING;
sk->sk_state_change(sk);
break;
@@ -1790,7 +1789,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
err = vmci_transport_send_conn_request(
sk, vmci_trans(vsk)->queue_pair_size);
if (err < 0) {
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
return err;
}
} else {
@@ -1800,7 +1799,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
sk, vmci_trans(vsk)->queue_pair_size,
supported_proto_versions);
if (err < 0) {
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
return err;
}
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index 1406db4d97d1..41fb427f150a 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -355,7 +355,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
* queue. Ask for notifications when there is something to
* read.
*/
- if (sk->sk_state == SS_CONNECTED) {
+ if (sk->sk_state == TCP_ESTABLISHED) {
if (!send_waiting_read(sk, 1))
return -1;
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index f3a0afc46208..0cc84f2bb05e 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -176,7 +176,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
* queue. Ask for notifications when there is something to
* read.
*/
- if (sk->sk_state == SS_CONNECTED)
+ if (sk->sk_state == TCP_ESTABLISHED)
vsock_block_update_write_window(sk);
*data_ready_now = false;
}
diff --git a/net/wimax/Makefile b/net/wimax/Makefile
index 8f1510d0cc2b..eb2db0d3b880 100644
--- a/net/wimax/Makefile
+++ b/net/wimax/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_WIMAX) += wimax.o
diff --git a/net/wireless/.gitignore b/net/wireless/.gitignore
index c33451b896d9..61cbc304a3d3 100644
--- a/net/wireless/.gitignore
+++ b/net/wireless/.gitignore
@@ -1 +1,2 @@
-regdb.c
+shipped-certs.c
+extra-certs.c
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 6c606120abfe..da91bb547db3 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -19,6 +19,7 @@ config WEXT_PRIV
config CFG80211
tristate "cfg80211 - wireless configuration API"
depends on RFKILL || !RFKILL
+ select FW_LOADER
---help---
cfg80211 is the Linux wireless LAN (802.11) configuration API.
Enable this if you have a wireless device.
@@ -82,6 +83,36 @@ config CFG80211_CERTIFICATION_ONUS
you are a wireless researcher and are working in a controlled
and approved environment by your local regulatory agency.
+config CFG80211_REQUIRE_SIGNED_REGDB
+ bool "require regdb signature" if CFG80211_CERTIFICATION_ONUS
+ default y
+ select SYSTEM_DATA_VERIFICATION
+ help
+ Require that in addition to the "regulatory.db" file a
+ "regulatory.db.p7s" can be loaded with a valid PKCS#7
+ signature for the regulatory.db file made by one of the
+ keys in the certs/ directory.
+
+config CFG80211_USE_KERNEL_REGDB_KEYS
+ bool "allow regdb keys shipped with the kernel" if CFG80211_CERTIFICATION_ONUS
+ default y
+ depends on CFG80211_REQUIRE_SIGNED_REGDB
+ help
+ Allow the regulatory database to be signed by one of the keys for
+ which certificates are part of the kernel sources
+ (in net/wireless/certs/).
+
+ This is currently only Seth Forshee's key, who is the regulatory
+ database maintainer.
+
+config CFG80211_EXTRA_REGDB_KEYDIR
+ string "additional regdb key directory" if CFG80211_CERTIFICATION_ONUS
+ depends on CFG80211_REQUIRE_SIGNED_REGDB
+ help
+ If selected, point to a directory with DER-encoded X.509
+ certificates like in the kernel sources (net/wireless/certs/)
+ that shall be accepted for a signed regulatory database.
+
config CFG80211_REG_CELLULAR_HINTS
bool "cfg80211 regulatory support for cellular base station hints"
depends on CFG80211_CERTIFICATION_ONUS
@@ -139,35 +170,14 @@ config CFG80211_DEBUGFS
If unsure, say N.
-config CFG80211_INTERNAL_REGDB
- bool "use statically compiled regulatory rules database" if EXPERT
- default n
- depends on CFG80211
- ---help---
- This option generates an internal data structure representing
- the wireless regulatory rules described in net/wireless/db.txt
- and includes code to query that database. This is an alternative
- to using CRDA for defining regulatory rules for the kernel.
-
- Using this option requires some parsing of the db.txt at build time,
- the parser will be upkept with the latest wireless-regdb updates but
- older wireless-regdb formats will be ignored. The parser may later
- be replaced to avoid issues with conflicts on versions of
- wireless-regdb.
-
- For details see:
-
- http://wireless.kernel.org/en/developers/Regulatory
-
- Most distributions have a CRDA package. So if unsure, say N.
-
config CFG80211_CRDA_SUPPORT
- bool "support CRDA" if CFG80211_INTERNAL_REGDB
+ bool "support CRDA" if EXPERT
default y
depends on CFG80211
help
You should enable this option unless you know for sure you have no
- need for it, for example when using internal regdb (above.)
+ need for it, for example when using internal regdb (above) or the
+ database loaded as a firmware file.
If unsure, say Y.
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index d06e5015751a..278d979c211a 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_CFG80211) += cfg80211.o
obj-$(CONFIG_LIB80211) += lib80211.o
obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o
@@ -14,11 +15,27 @@ cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o ocb.o
cfg80211-$(CONFIG_OF) += of.o
cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o
cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o
-cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o
CFLAGS_trace.o := -I$(src)
-$(obj)/regdb.c: $(src)/db.txt $(src)/genregdb.awk
- @$(AWK) -f $(srctree)/$(src)/genregdb.awk < $< > $@
+cfg80211-$(CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS) += shipped-certs.o
+ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
+cfg80211-y += extra-certs.o
+endif
-clean-files := regdb.c
+$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
+ @$(kecho) " GEN $@"
+ @echo '#include "reg.h"' > $@
+ @echo 'const u8 shipped_regdb_certs[] = {' >> $@
+ @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done
+ @echo '};' >> $@
+ @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@
+
+$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
+ $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
+ @$(kecho) " GEN $@"
+ @echo '#include "reg.h"' > $@
+ @echo 'const u8 extra_regdb_certs[] = {' >> $@
+ @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done
+ @echo '};' >> $@
+ @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index 25666d3009be..63682176c96c 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ieee80211.h>
#include <linux/export.h>
#include <net/cfg80211.h>
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
new file mode 100644
index 000000000000..c6f8f9d6b988
--- /dev/null
+++ b/net/wireless/certs/sforshee.x509
Binary files differ
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index b8aa5a7d5c77..a48859982a32 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* This file contains helper code to handle channel
* settings and keeping track of what is possible at
@@ -464,7 +465,7 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
struct ieee80211_channel *chan)
{
int width;
- u32 cf_offset, freq;
+ u32 freq;
if (chandef->chan->center_freq == chan->center_freq)
return true;
@@ -473,8 +474,6 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
if (width <= 20)
return false;
- cf_offset = width / 2 - 10;
-
for (freq = chandef->center_freq1 - width / 2 + 10;
freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) {
if (chan->center_freq == freq)
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7b33e8c366bc..fdde0d98fde1 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1384,7 +1384,7 @@ out_fail_sysfs:
out_fail_pernet:
return err;
}
-subsys_initcall(cfg80211_init);
+fs_initcall(cfg80211_init);
static void __exit cfg80211_exit(void)
{
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 6e809325af3b..d2f7e8b8a097 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Wireless configuration interface internals.
*
@@ -216,6 +217,7 @@ enum cfg80211_event_type {
EVENT_DISCONNECTED,
EVENT_IBSS_JOINED,
EVENT_STOPPED,
+ EVENT_PORT_AUTHORIZED,
};
struct cfg80211_event {
@@ -235,6 +237,9 @@ struct cfg80211_event {
u8 bssid[ETH_ALEN];
struct ieee80211_channel *channel;
} ij;
+ struct {
+ u8 bssid[ETH_ALEN];
+ } pa;
};
};
@@ -385,6 +390,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
bool wextev);
void __cfg80211_roamed(struct wireless_dev *wdev,
struct cfg80211_roam_info *info);
+void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid);
int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
void cfg80211_autodisconnect_wk(struct work_struct *work);
diff --git a/net/wireless/db.txt b/net/wireless/db.txt
deleted file mode 100644
index a2fc3a09ccdc..000000000000
--- a/net/wireless/db.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# This file is a placeholder to prevent accidental build breakage if someone
-# enables CONFIG_CFG80211_INTERNAL_REGDB. Almost no one actually needs to
-# enable that build option.
-#
-# You should be using CRDA instead. It is even better if you use the CRDA
-# package provided by your distribution, since they will probably keep it
-# up-to-date on your behalf.
-#
-# If you _really_ intend to use CONFIG_CFG80211_INTERNAL_REGDB then you will
-# need to replace this file with one containing appropriately formatted
-# regulatory rules that cover the regulatory domains you will be using. Your
-# best option is to extract the db.txt file from the wireless-regdb git
-# repository:
-#
-# git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-regdb.git
-#
diff --git a/net/wireless/debugfs.h b/net/wireless/debugfs.h
index 74fdd3811427..a8a135d94ab5 100644
--- a/net/wireless/debugfs.h
+++ b/net/wireless/debugfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __CFG80211_DEBUGFS_H
#define __CFG80211_DEBUGFS_H
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index e9e91298c70d..a9c0f368db5d 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/utsname.h>
#include <net/cfg80211.h>
#include "core.h"
diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
deleted file mode 100644
index baf2426b555a..000000000000
--- a/net/wireless/genregdb.awk
+++ /dev/null
@@ -1,158 +0,0 @@
-#!/usr/bin/awk -f
-#
-# genregdb.awk -- generate regdb.c from db.txt
-#
-# Actually, it reads from stdin (presumed to be db.txt) and writes
-# to stdout (presumed to be regdb.c), but close enough...
-#
-# Copyright 2009 John W. Linville <linville@tuxdriver.com>
-#
-# Permission to use, copy, modify, and/or distribute this software for any
-# purpose with or without fee is hereby granted, provided that the above
-# copyright notice and this permission notice appear in all copies.
-#
-# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-BEGIN {
- active = 0
- rules = 0;
- print "/*"
- print " * DO NOT EDIT -- file generated from data in db.txt"
- print " */"
- print ""
- print "#include <linux/nl80211.h>"
- print "#include <net/cfg80211.h>"
- print "#include \"regdb.h\""
- print ""
- regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n"
-}
-
-function parse_country_head() {
- country=$2
- sub(/:/, "", country)
- printf "static const struct ieee80211_regdomain regdom_%s = {\n", country
- printf "\t.alpha2 = \"%s\",\n", country
- if ($NF ~ /DFS-ETSI/)
- printf "\t.dfs_region = NL80211_DFS_ETSI,\n"
- else if ($NF ~ /DFS-FCC/)
- printf "\t.dfs_region = NL80211_DFS_FCC,\n"
- else if ($NF ~ /DFS-JP/)
- printf "\t.dfs_region = NL80211_DFS_JP,\n"
- printf "\t.reg_rules = {\n"
- active = 1
- regdb = regdb "\t&regdom_" country ",\n"
-}
-
-function parse_reg_rule()
-{
- flag_starts_at = 7
-
- start = $1
- sub(/\(/, "", start)
- end = $3
- bw = $5
- sub(/\),/, "", bw)
- gain = 0
- power = $6
- # power might be in mW...
- units = $7
- dfs_cac = 0
-
- sub(/\(/, "", power)
- sub(/\),/, "", power)
- sub(/\),/, "", units)
- sub(/\)/, "", units)
-
- if (units == "mW") {
- flag_starts_at = 8
- power = 10 * log(power)/log(10)
- if ($8 ~ /[[:digit:]]/) {
- flag_starts_at = 9
- dfs_cac = $8
- }
- } else {
- if ($7 ~ /[[:digit:]]/) {
- flag_starts_at = 8
- dfs_cac = $7
- }
- }
- sub(/\(/, "", dfs_cac)
- sub(/\),/, "", dfs_cac)
- flagstr = ""
- for (i=flag_starts_at; i<=NF; i++)
- flagstr = flagstr $i
- split(flagstr, flagarray, ",")
- flags = ""
- for (arg in flagarray) {
- if (flagarray[arg] == "NO-OFDM") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_OFDM | "
- } else if (flagarray[arg] == "NO-CCK") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_CCK | "
- } else if (flagarray[arg] == "NO-INDOOR") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_INDOOR | "
- } else if (flagarray[arg] == "NO-OUTDOOR") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_OUTDOOR | "
- } else if (flagarray[arg] == "DFS") {
- flags = flags "\n\t\t\tNL80211_RRF_DFS | "
- } else if (flagarray[arg] == "PTP-ONLY") {
- flags = flags "\n\t\t\tNL80211_RRF_PTP_ONLY | "
- } else if (flagarray[arg] == "PTMP-ONLY") {
- flags = flags "\n\t\t\tNL80211_RRF_PTMP_ONLY | "
- } else if (flagarray[arg] == "PASSIVE-SCAN") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
- } else if (flagarray[arg] == "NO-IBSS") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
- } else if (flagarray[arg] == "NO-IR") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
- } else if (flagarray[arg] == "AUTO-BW") {
- flags = flags "\n\t\t\tNL80211_RRF_AUTO_BW | "
- }
-
- }
- flags = flags "0"
- printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %.0f, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags
- rules++
-}
-
-function print_tail_country()
-{
- active = 0
- printf "\t},\n"
- printf "\t.n_reg_rules = %d\n", rules
- printf "};\n\n"
- rules = 0;
-}
-
-/^[ \t]*#/ {
- # Ignore
-}
-
-!active && /^[ \t]*$/ {
- # Ignore
-}
-
-!active && /country/ {
- parse_country_head()
-}
-
-active && /^[ \t]*\(/ {
- parse_reg_rule()
-}
-
-active && /^[ \t]*$/ {
- print_tail_country()
-}
-
-END {
- if (active)
- print_tail_country()
- print regdb "};"
- print ""
- print "int reg_regdb_size = ARRAY_SIZE(reg_regdb);"
-}
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 10bf040a0982..413d4f4e6334 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Some IBSS support code for cfg80211.
*
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 421a6b80ec62..51aa55618ef7 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ieee80211.h>
#include <linux/export.h>
#include <net/cfg80211.h>
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index d8df7a5180a0..e7c64a8dce54 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* cfg80211 MLME SAP interface
*
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8ce85420ecb0..a0e1951227fa 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -549,6 +549,14 @@ nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = {
[NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED },
};
+/* policy for packet pattern attributes */
+static const struct nla_policy
+nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = {
+ [NL80211_PKTPAT_MASK] = { .type = NLA_BINARY, },
+ [NL80211_PKTPAT_PATTERN] = { .type = NLA_BINARY, },
+ [NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 },
+};
+
static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
struct netlink_callback *cb,
struct cfg80211_registered_device **rdev,
@@ -2122,6 +2130,15 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
case NL80211_CHAN_HT40MINUS:
cfg80211_chandef_create(chandef, chandef->chan,
chantype);
+ /* user input for center_freq is incorrect */
+ if (info->attrs[NL80211_ATTR_CENTER_FREQ1] &&
+ chandef->center_freq1 != nla_get_u32(
+ info->attrs[NL80211_ATTR_CENTER_FREQ1]))
+ return -EINVAL;
+ /* center_freq2 must be zero */
+ if (info->attrs[NL80211_ATTR_CENTER_FREQ2] &&
+ nla_get_u32(info->attrs[NL80211_ATTR_CENTER_FREQ2]))
+ return -EINVAL;
break;
default:
return -EINVAL;
@@ -3791,8 +3808,8 @@ static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params,
static void nl80211_calculate_ap_params(struct cfg80211_ap_settings *params)
{
const struct cfg80211_beacon_data *bcn = &params->beacon;
- size_t ies_len = bcn->beacon_ies_len;
- const u8 *ies = bcn->beacon_ies;
+ size_t ies_len = bcn->tail_len;
+ const u8 *ies = bcn->tail;
const u8 *rates;
const u8 *cap;
@@ -5669,6 +5686,11 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
}
}
+static int nl80211_reload_regdb(struct sk_buff *skb, struct genl_info *info)
+{
+ return reg_reload_regdb();
+}
+
static int nl80211_get_mesh_config(struct sk_buff *skb,
struct genl_info *info)
{
@@ -6269,7 +6291,7 @@ static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
if (!hdr)
return -1;
- genl_dump_check_consistent(cb, hdr, &nl80211_fam);
+ genl_dump_check_consistent(cb, hdr);
if (nl80211_put_regdom(regdom, msg))
goto nla_put_failure;
@@ -6610,6 +6632,77 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev)
return regulatory_pre_cac_allowed(wdev->wiphy);
}
+static int
+nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
+ void *request, struct nlattr **attrs,
+ bool is_sched_scan)
+{
+ u8 *mac_addr, *mac_addr_mask;
+ u32 *flags;
+ enum nl80211_feature_flags randomness_flag;
+
+ if (!attrs[NL80211_ATTR_SCAN_FLAGS])
+ return 0;
+
+ if (is_sched_scan) {
+ struct cfg80211_sched_scan_request *req = request;
+
+ randomness_flag = wdev ?
+ NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR :
+ NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
+ flags = &req->flags;
+ mac_addr = req->mac_addr;
+ mac_addr_mask = req->mac_addr_mask;
+ } else {
+ struct cfg80211_scan_request *req = request;
+
+ randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
+ flags = &req->flags;
+ mac_addr = req->mac_addr;
+ mac_addr_mask = req->mac_addr_mask;
+ }
+
+ *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
+
+ if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
+ !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
+ return -EOPNOTSUPP;
+
+ if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
+ int err;
+
+ if (!(wiphy->features & randomness_flag) ||
+ (wdev && wdev->current_bss))
+ return -EOPNOTSUPP;
+
+ err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask);
+ if (err)
+ return err;
+ }
+
+ if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME))
+ return -EOPNOTSUPP;
+
+ if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP))
+ return -EOPNOTSUPP;
+
+ if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION))
+ return -EOPNOTSUPP;
+
+ if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6815,34 +6908,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]);
}
- if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) {
- request->flags = nla_get_u32(
- info->attrs[NL80211_ATTR_SCAN_FLAGS]);
- if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
- !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
- if (!(wiphy->features &
- NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (wdev->current_bss) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- err = nl80211_parse_random_mac(info->attrs,
- request->mac_addr,
- request->mac_addr_mask);
- if (err)
- goto out_free;
- }
- }
+ err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs,
+ false);
+ if (err)
+ goto out_free;
request->no_cck =
nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]);
@@ -7290,37 +7359,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
request->ie_len);
}
- if (attrs[NL80211_ATTR_SCAN_FLAGS]) {
- request->flags = nla_get_u32(
- attrs[NL80211_ATTR_SCAN_FLAGS]);
- if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
- !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
- u32 flg = NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR;
-
- if (!wdev) /* must be net-detect */
- flg = NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
-
- if (!(wiphy->features & flg)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (wdev && wdev->current_bss) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- err = nl80211_parse_random_mac(attrs, request->mac_addr,
- request->mac_addr_mask);
- if (err)
- goto out_free;
- }
- }
+ err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true);
+ if (err)
+ goto out_free;
if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY])
request->delay =
@@ -7681,7 +7722,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
if (!hdr)
return -1;
- genl_dump_check_consistent(cb, hdr, &nl80211_fam);
+ genl_dump_check_consistent(cb, hdr);
if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation))
goto nla_put_failure;
@@ -8924,8 +8965,14 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_USE_MFP]) {
connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
+ if (connect.mfp == NL80211_MFP_OPTIONAL &&
+ !wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_MFP_OPTIONAL))
+ return -EOPNOTSUPP;
+
if (connect.mfp != NL80211_MFP_REQUIRED &&
- connect.mfp != NL80211_MFP_NO)
+ connect.mfp != NL80211_MFP_NO &&
+ connect.mfp != NL80211_MFP_OPTIONAL)
return -EINVAL;
} else {
connect.mfp = NL80211_MFP_NO;
@@ -9987,6 +10034,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
+ if (!setup.chandef.chan)
+ return -EINVAL;
+
err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band,
&setup.beacon_rate);
if (err)
@@ -10529,7 +10579,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
u8 *mask_pat;
nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
- NULL, info->extack);
+ nl80211_packet_pattern_policy,
+ info->extack);
err = -EINVAL;
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
@@ -10778,7 +10829,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
rem) {
u8 *mask_pat;
- nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL, NULL);
+ nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
+ nl80211_packet_pattern_policy, NULL);
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
return -EINVAL;
@@ -10903,6 +10955,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
+ if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] ||
+ !tb[NL80211_REKEY_DATA_KCK])
+ return -EINVAL;
if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN)
return -ERANGE;
if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN)
@@ -12669,6 +12724,12 @@ static const struct genl_ops nl80211_ops[] = {
.flags = GENL_ADMIN_PERM,
},
{
+ .cmd = NL80211_CMD_RELOAD_REGDB,
+ .doit = nl80211_reload_regdb,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
.cmd = NL80211_CMD_GET_MESH_CONFIG,
.doit = nl80211_get_mesh_config,
.policy = nl80211_policy,
@@ -13796,9 +13857,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
info->req_ie)) ||
(info->resp_ie &&
nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len,
- info->resp_ie)) ||
- (info->authorized &&
- nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED)))
+ info->resp_ie)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -13812,6 +13871,36 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
nlmsg_free(msg);
}
+void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
+ struct net_device *netdev, const u8 *bssid)
+{
+ struct sk_buff *msg;
+ void *hdr;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PORT_AUTHORIZED);
+ if (!hdr) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+ NL80211_MCGRP_MLME, GFP_KERNEL);
+ return;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ nlmsg_free(msg);
+}
+
void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
struct net_device *netdev, u16 reason,
const u8 *ie, size_t ie_len, bool from_ap)
@@ -14185,7 +14274,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct sk_buff *msg;
void *hdr;
- u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid);
+ u32 nlportid = READ_ONCE(wdev->ap_unexpected_nlportid);
if (!nlportid)
return false;
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index b96933322077..79e47fe60c35 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NET_WIRELESS_NL80211_H
#define __NET_WIRELESS_NL80211_H
@@ -58,6 +59,8 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
struct cfg80211_roam_info *info, gfp_t gfp);
+void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
+ struct net_device *netdev, const u8 *bssid);
void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
struct net_device *netdev, u16 reason,
const u8 *ie, size_t ie_len, bool from_ap);
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index ce23d7d49960..0c06240d25af 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __CFG80211_RDEV_OPS
#define __CFG80211_RDEV_OPS
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 5fae296a6a58..3871998059de 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -4,6 +4,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright 2017 Intel Deutschland GmbH
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -52,12 +53,13 @@
#include <linux/ctype.h>
#include <linux/nl80211.h>
#include <linux/platform_device.h>
+#include <linux/verification.h>
#include <linux/moduleparam.h>
+#include <linux/firmware.h>
#include <net/cfg80211.h>
#include "core.h"
#include "reg.h"
#include "rdev-ops.h"
-#include "regdb.h"
#include "nl80211.h"
/*
@@ -99,7 +101,7 @@ static struct regulatory_request core_request_world = {
static struct regulatory_request __rcu *last_request =
(void __force __rcu *)&core_request_world;
-/* To trigger userspace events */
+/* To trigger userspace events and load firmware */
static struct platform_device *reg_pdev;
/*
@@ -442,7 +444,6 @@ reg_copy_regd(const struct ieee80211_regdomain *src_regd)
return regd;
}
-#ifdef CONFIG_CFG80211_INTERNAL_REGDB
struct reg_regdb_apply_request {
struct list_head list;
const struct ieee80211_regdomain *regdom;
@@ -474,55 +475,26 @@ static void reg_regdb_apply(struct work_struct *work)
static DECLARE_WORK(reg_regdb_work, reg_regdb_apply);
-static int reg_query_builtin(const char *alpha2)
+static int reg_schedule_apply(const struct ieee80211_regdomain *regdom)
{
- const struct ieee80211_regdomain *regdom = NULL;
struct reg_regdb_apply_request *request;
- unsigned int i;
-
- for (i = 0; i < reg_regdb_size; i++) {
- if (alpha2_equal(alpha2, reg_regdb[i]->alpha2)) {
- regdom = reg_regdb[i];
- break;
- }
- }
-
- if (!regdom)
- return -ENODATA;
request = kzalloc(sizeof(struct reg_regdb_apply_request), GFP_KERNEL);
- if (!request)
- return -ENOMEM;
-
- request->regdom = reg_copy_regd(regdom);
- if (IS_ERR_OR_NULL(request->regdom)) {
- kfree(request);
+ if (!request) {
+ kfree(regdom);
return -ENOMEM;
}
+ request->regdom = regdom;
+
mutex_lock(&reg_regdb_apply_mutex);
list_add_tail(&request->list, &reg_regdb_apply_list);
mutex_unlock(&reg_regdb_apply_mutex);
schedule_work(&reg_regdb_work);
-
return 0;
}
-/* Feel free to add any other sanity checks here */
-static void reg_regdb_size_check(void)
-{
- /* We should ideally BUILD_BUG_ON() but then random builds would fail */
- WARN_ONCE(!reg_regdb_size, "db.txt is empty, you should update it...");
-}
-#else
-static inline void reg_regdb_size_check(void) {}
-static inline int reg_query_builtin(const char *alpha2)
-{
- return -ENODATA;
-}
-#endif /* CONFIG_CFG80211_INTERNAL_REGDB */
-
#ifdef CONFIG_CFG80211_CRDA_SUPPORT
/* Max number of consecutive attempts to communicate with CRDA */
#define REG_MAX_CRDA_TIMEOUTS 10
@@ -598,10 +570,402 @@ static inline int call_crda(const char *alpha2)
}
#endif /* CONFIG_CFG80211_CRDA_SUPPORT */
+/* code to directly load a firmware database through request_firmware */
+static const struct fwdb_header *regdb;
+
+struct fwdb_country {
+ u8 alpha2[2];
+ __be16 coll_ptr;
+ /* this struct cannot be extended */
+} __packed __aligned(4);
+
+struct fwdb_collection {
+ u8 len;
+ u8 n_rules;
+ u8 dfs_region;
+ /* no optional data yet */
+ /* aligned to 2, then followed by __be16 array of rule pointers */
+} __packed __aligned(4);
+
+enum fwdb_flags {
+ FWDB_FLAG_NO_OFDM = BIT(0),
+ FWDB_FLAG_NO_OUTDOOR = BIT(1),
+ FWDB_FLAG_DFS = BIT(2),
+ FWDB_FLAG_NO_IR = BIT(3),
+ FWDB_FLAG_AUTO_BW = BIT(4),
+};
+
+struct fwdb_rule {
+ u8 len;
+ u8 flags;
+ __be16 max_eirp;
+ __be32 start, end, max_bw;
+ /* start of optional data */
+ __be16 cac_timeout;
+} __packed __aligned(4);
+
+#define FWDB_MAGIC 0x52474442
+#define FWDB_VERSION 20
+
+struct fwdb_header {
+ __be32 magic;
+ __be32 version;
+ struct fwdb_country country[];
+} __packed __aligned(4);
+
+static bool valid_rule(const u8 *data, unsigned int size, u16 rule_ptr)
+{
+ struct fwdb_rule *rule = (void *)(data + (rule_ptr << 2));
+
+ if ((u8 *)rule + sizeof(rule->len) > data + size)
+ return false;
+
+ /* mandatory fields */
+ if (rule->len < offsetofend(struct fwdb_rule, max_bw))
+ return false;
+
+ return true;
+}
+
+static bool valid_country(const u8 *data, unsigned int size,
+ const struct fwdb_country *country)
+{
+ unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
+ struct fwdb_collection *coll = (void *)(data + ptr);
+ __be16 *rules_ptr;
+ unsigned int i;
+
+ /* make sure we can read len/n_rules */
+ if ((u8 *)coll + offsetofend(typeof(*coll), n_rules) > data + size)
+ return false;
+
+ /* make sure base struct and all rules fit */
+ if ((u8 *)coll + ALIGN(coll->len, 2) +
+ (coll->n_rules * 2) > data + size)
+ return false;
+
+ /* mandatory fields must exist */
+ if (coll->len < offsetofend(struct fwdb_collection, dfs_region))
+ return false;
+
+ rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
+
+ for (i = 0; i < coll->n_rules; i++) {
+ u16 rule_ptr = be16_to_cpu(rules_ptr[i]);
+
+ if (!valid_rule(data, size, rule_ptr))
+ return false;
+ }
+
+ return true;
+}
+
+#ifdef CONFIG_CFG80211_REQUIRE_SIGNED_REGDB
+static struct key *builtin_regdb_keys;
+
+static void __init load_keys_from_buffer(const u8 *p, unsigned int buflen)
+{
+ const u8 *end = p + buflen;
+ size_t plen;
+ key_ref_t key;
+
+ while (p < end) {
+ /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
+ * than 256 bytes in size.
+ */
+ if (end - p < 4)
+ goto dodgy_cert;
+ if (p[0] != 0x30 &&
+ p[1] != 0x82)
+ goto dodgy_cert;
+ plen = (p[2] << 8) | p[3];
+ plen += 4;
+ if (plen > end - p)
+ goto dodgy_cert;
+
+ key = key_create_or_update(make_key_ref(builtin_regdb_keys, 1),
+ "asymmetric", NULL, p, plen,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ),
+ KEY_ALLOC_NOT_IN_QUOTA |
+ KEY_ALLOC_BUILT_IN |
+ KEY_ALLOC_BYPASS_RESTRICTION);
+ if (IS_ERR(key)) {
+ pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
+ PTR_ERR(key));
+ } else {
+ pr_notice("Loaded X.509 cert '%s'\n",
+ key_ref_to_ptr(key)->description);
+ key_ref_put(key);
+ }
+ p += plen;
+ }
+
+ return;
+
+dodgy_cert:
+ pr_err("Problem parsing in-kernel X.509 certificate list\n");
+}
+
+static int __init load_builtin_regdb_keys(void)
+{
+ builtin_regdb_keys =
+ keyring_alloc(".builtin_regdb_keys",
+ KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
+ KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
+ if (IS_ERR(builtin_regdb_keys))
+ return PTR_ERR(builtin_regdb_keys);
+
+ pr_notice("Loading compiled-in X.509 certificates for regulatory database\n");
+
+#ifdef CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS
+ load_keys_from_buffer(shipped_regdb_certs, shipped_regdb_certs_len);
+#endif
+#ifdef CONFIG_CFG80211_EXTRA_REGDB_KEYDIR
+ if (CONFIG_CFG80211_EXTRA_REGDB_KEYDIR[0] != '\0')
+ load_keys_from_buffer(extra_regdb_certs, extra_regdb_certs_len);
+#endif
+
+ return 0;
+}
+
+static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
+{
+ const struct firmware *sig;
+ bool result;
+
+ if (request_firmware(&sig, "regulatory.db.p7s", &reg_pdev->dev))
+ return false;
+
+ result = verify_pkcs7_signature(data, size, sig->data, sig->size,
+ builtin_regdb_keys,
+ VERIFYING_UNSPECIFIED_SIGNATURE,
+ NULL, NULL) == 0;
+
+ release_firmware(sig);
+
+ return result;
+}
+
+static void free_regdb_keyring(void)
+{
+ key_put(builtin_regdb_keys);
+}
+#else
+static int load_builtin_regdb_keys(void)
+{
+ return 0;
+}
+
+static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
+{
+ return true;
+}
+
+static void free_regdb_keyring(void)
+{
+}
+#endif /* CONFIG_CFG80211_REQUIRE_SIGNED_REGDB */
+
+static bool valid_regdb(const u8 *data, unsigned int size)
+{
+ const struct fwdb_header *hdr = (void *)data;
+ const struct fwdb_country *country;
+
+ if (size < sizeof(*hdr))
+ return false;
+
+ if (hdr->magic != cpu_to_be32(FWDB_MAGIC))
+ return false;
+
+ if (hdr->version != cpu_to_be32(FWDB_VERSION))
+ return false;
+
+ if (!regdb_has_valid_signature(data, size))
+ return false;
+
+ country = &hdr->country[0];
+ while ((u8 *)(country + 1) <= data + size) {
+ if (!country->coll_ptr)
+ break;
+ if (!valid_country(data, size, country))
+ return false;
+ country++;
+ }
+
+ return true;
+}
+
+static int regdb_query_country(const struct fwdb_header *db,
+ const struct fwdb_country *country)
+{
+ unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
+ struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
+ struct ieee80211_regdomain *regdom;
+ unsigned int size_of_regd;
+ unsigned int i;
+
+ size_of_regd =
+ sizeof(struct ieee80211_regdomain) +
+ coll->n_rules * sizeof(struct ieee80211_reg_rule);
+
+ regdom = kzalloc(size_of_regd, GFP_KERNEL);
+ if (!regdom)
+ return -ENOMEM;
+
+ regdom->n_reg_rules = coll->n_rules;
+ regdom->alpha2[0] = country->alpha2[0];
+ regdom->alpha2[1] = country->alpha2[1];
+ regdom->dfs_region = coll->dfs_region;
+
+ for (i = 0; i < regdom->n_reg_rules; i++) {
+ __be16 *rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
+ unsigned int rule_ptr = be16_to_cpu(rules_ptr[i]) << 2;
+ struct fwdb_rule *rule = (void *)((u8 *)db + rule_ptr);
+ struct ieee80211_reg_rule *rrule = &regdom->reg_rules[i];
+
+ rrule->freq_range.start_freq_khz = be32_to_cpu(rule->start);
+ rrule->freq_range.end_freq_khz = be32_to_cpu(rule->end);
+ rrule->freq_range.max_bandwidth_khz = be32_to_cpu(rule->max_bw);
+
+ rrule->power_rule.max_antenna_gain = 0;
+ rrule->power_rule.max_eirp = be16_to_cpu(rule->max_eirp);
+
+ rrule->flags = 0;
+ if (rule->flags & FWDB_FLAG_NO_OFDM)
+ rrule->flags |= NL80211_RRF_NO_OFDM;
+ if (rule->flags & FWDB_FLAG_NO_OUTDOOR)
+ rrule->flags |= NL80211_RRF_NO_OUTDOOR;
+ if (rule->flags & FWDB_FLAG_DFS)
+ rrule->flags |= NL80211_RRF_DFS;
+ if (rule->flags & FWDB_FLAG_NO_IR)
+ rrule->flags |= NL80211_RRF_NO_IR;
+ if (rule->flags & FWDB_FLAG_AUTO_BW)
+ rrule->flags |= NL80211_RRF_AUTO_BW;
+
+ rrule->dfs_cac_ms = 0;
+
+ /* handle optional data */
+ if (rule->len >= offsetofend(struct fwdb_rule, cac_timeout))
+ rrule->dfs_cac_ms =
+ 1000 * be16_to_cpu(rule->cac_timeout);
+ }
+
+ return reg_schedule_apply(regdom);
+}
+
+static int query_regdb(const char *alpha2)
+{
+ const struct fwdb_header *hdr = regdb;
+ const struct fwdb_country *country;
+
+ ASSERT_RTNL();
+
+ if (IS_ERR(regdb))
+ return PTR_ERR(regdb);
+
+ country = &hdr->country[0];
+ while (country->coll_ptr) {
+ if (alpha2_equal(alpha2, country->alpha2))
+ return regdb_query_country(regdb, country);
+ country++;
+ }
+
+ return -ENODATA;
+}
+
+static void regdb_fw_cb(const struct firmware *fw, void *context)
+{
+ int set_error = 0;
+ bool restore = true;
+ void *db;
+
+ if (!fw) {
+ pr_info("failed to load regulatory.db\n");
+ set_error = -ENODATA;
+ } else if (!valid_regdb(fw->data, fw->size)) {
+ pr_info("loaded regulatory.db is malformed or signature is missing/invalid\n");
+ set_error = -EINVAL;
+ }
+
+ rtnl_lock();
+ if (WARN_ON(regdb && !IS_ERR(regdb))) {
+ /* just restore and free new db */
+ } else if (set_error) {
+ regdb = ERR_PTR(set_error);
+ } else if (fw) {
+ db = kmemdup(fw->data, fw->size, GFP_KERNEL);
+ if (db) {
+ regdb = db;
+ restore = context && query_regdb(context);
+ } else {
+ restore = true;
+ }
+ }
+
+ if (restore)
+ restore_regulatory_settings(true);
+
+ rtnl_unlock();
+
+ kfree(context);
+
+ release_firmware(fw);
+}
+
+static int query_regdb_file(const char *alpha2)
+{
+ ASSERT_RTNL();
+
+ if (regdb)
+ return query_regdb(alpha2);
+
+ alpha2 = kmemdup(alpha2, 2, GFP_KERNEL);
+ if (!alpha2)
+ return -ENOMEM;
+
+ return request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
+ &reg_pdev->dev, GFP_KERNEL,
+ (void *)alpha2, regdb_fw_cb);
+}
+
+int reg_reload_regdb(void)
+{
+ const struct firmware *fw;
+ void *db;
+ int err;
+
+ err = request_firmware(&fw, "regulatory.db", &reg_pdev->dev);
+ if (err)
+ return err;
+
+ if (!valid_regdb(fw->data, fw->size)) {
+ err = -ENODATA;
+ goto out;
+ }
+
+ db = kmemdup(fw->data, fw->size, GFP_KERNEL);
+ if (!db) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rtnl_lock();
+ if (!IS_ERR_OR_NULL(regdb))
+ kfree(regdb);
+ regdb = db;
+ rtnl_unlock();
+
+ out:
+ release_firmware(fw);
+ return err;
+}
+
static bool reg_query_database(struct regulatory_request *request)
{
- /* query internal regulatory database (if it exists) */
- if (reg_query_builtin(request->alpha2) == 0)
+ if (query_regdb_file(request->alpha2) == 0)
return true;
if (call_crda(request->alpha2) == 0)
@@ -1483,7 +1847,9 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy,
{
struct ieee80211_supported_band *sband = wiphy->bands[channel->band];
struct ieee80211_channel *channel_before = NULL, *channel_after = NULL;
+ const struct ieee80211_regdomain *regd;
unsigned int i;
+ u32 flags;
if (!is_ht40_allowed(channel)) {
channel->flags |= IEEE80211_CHAN_NO_HT40;
@@ -1503,17 +1869,30 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy,
channel_after = c;
}
+ flags = 0;
+ regd = get_wiphy_regdom(wiphy);
+ if (regd) {
+ const struct ieee80211_reg_rule *reg_rule =
+ freq_reg_info_regd(MHZ_TO_KHZ(channel->center_freq),
+ regd, MHZ_TO_KHZ(20));
+
+ if (!IS_ERR(reg_rule))
+ flags = reg_rule->flags;
+ }
+
/*
* Please note that this assumes target bandwidth is 20 MHz,
* if that ever changes we also need to change the below logic
* to include that as well.
*/
- if (!is_ht40_allowed(channel_before))
+ if (!is_ht40_allowed(channel_before) ||
+ flags & NL80211_RRF_NO_HT40MINUS)
channel->flags |= IEEE80211_CHAN_NO_HT40MINUS;
else
channel->flags &= ~IEEE80211_CHAN_NO_HT40MINUS;
- if (!is_ht40_allowed(channel_after))
+ if (!is_ht40_allowed(channel_after) ||
+ flags & NL80211_RRF_NO_HT40PLUS)
channel->flags |= IEEE80211_CHAN_NO_HT40PLUS;
else
channel->flags &= ~IEEE80211_CHAN_NO_HT40PLUS;
@@ -3269,6 +3648,10 @@ int __init regulatory_init(void)
{
int err = 0;
+ err = load_builtin_regdb_keys();
+ if (err)
+ return err;
+
reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
if (IS_ERR(reg_pdev))
return PTR_ERR(reg_pdev);
@@ -3277,8 +3660,6 @@ int __init regulatory_init(void)
spin_lock_init(&reg_pending_beacons_lock);
spin_lock_init(&reg_indoor_lock);
- reg_regdb_size_check();
-
rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom);
user_alpha2[0] = '9';
@@ -3344,4 +3725,9 @@ void regulatory_exit(void)
list_del(&reg_request->list);
kfree(reg_request);
}
+
+ if (!IS_ERR_OR_NULL(regdb))
+ kfree(regdb);
+
+ free_regdb_keyring();
}
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index ca7fedf2e7a1..9ceeb5f3a7cb 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -1,5 +1,8 @@
#ifndef __NET_WIRELESS_REG_H
#define __NET_WIRELESS_REG_H
+
+#include <net/cfg80211.h>
+
/*
* Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
*
@@ -179,4 +182,15 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy,
* @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1
*/
bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2);
+
+/**
+ * reg_reload_regdb - reload the regulatory.db firmware file
+ */
+int reg_reload_regdb(void);
+
+extern const u8 shipped_regdb_certs[];
+extern unsigned int shipped_regdb_certs_len;
+extern const u8 extra_regdb_certs[];
+extern unsigned int extra_regdb_certs_len;
+
#endif /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/regdb.h b/net/wireless/regdb.h
deleted file mode 100644
index 3279cfcefb0c..000000000000
--- a/net/wireless/regdb.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef __REGDB_H__
-#define __REGDB_H__
-
-/*
- * Copyright 2009 John W. Linville <linville@tuxdriver.com>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-extern const struct ieee80211_regdomain *reg_regdb[];
-extern int reg_regdb_size;
-
-#endif /* __REGDB_H__ */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 9f0901f3e42b..f6c5fe482506 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* cfg80211 scan result handling
*
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 0a49b88070d0..fdb3646274a5 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* SME code for cfg80211
* both driver SME event handling and the SME implementation
@@ -522,11 +523,6 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
return -EOPNOTSUPP;
if (wdev->current_bss) {
- if (!prev_bssid)
- return -EALREADY;
- if (prev_bssid &&
- !ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid))
- return -ENOTCONN;
cfg80211_unhold_bss(wdev->current_bss);
cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
wdev->current_bss = NULL;
@@ -960,7 +956,6 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
ev->rm.resp_ie_len = info->resp_ie_len;
memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len);
ev->rm.bss = info->bss;
- ev->rm.authorized = info->authorized;
spin_lock_irqsave(&wdev->event_lock, flags);
list_add_tail(&ev->list, &wdev->event_list);
@@ -969,6 +964,50 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
}
EXPORT_SYMBOL(cfg80211_roamed);
+void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid)
+{
+ ASSERT_WDEV_LOCK(wdev);
+
+ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+ return;
+
+ if (WARN_ON(!wdev->current_bss) ||
+ WARN_ON(!ether_addr_equal(wdev->current_bss->pub.bssid, bssid)))
+ return;
+
+ nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev,
+ bssid);
+}
+
+void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid,
+ gfp_t gfp)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct cfg80211_event *ev;
+ unsigned long flags;
+
+ if (WARN_ON(!bssid))
+ return;
+
+ ev = kzalloc(sizeof(*ev), gfp);
+ if (!ev)
+ return;
+
+ ev->type = EVENT_PORT_AUTHORIZED;
+ memcpy(ev->pa.bssid, bssid, ETH_ALEN);
+
+ /*
+ * Use the wdev event list so that if there are pending
+ * connected/roamed events, they will be reported first.
+ */
+ spin_lock_irqsave(&wdev->event_lock, flags);
+ list_add_tail(&ev->list, &wdev->event_list);
+ spin_unlock_irqrestore(&wdev->event_lock, flags);
+ queue_work(cfg80211_wq, &rdev->event_work);
+}
+EXPORT_SYMBOL(cfg80211_port_authorized);
+
void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
size_t ie_len, u16 reason, bool from_ap)
{
@@ -1063,11 +1102,35 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
ASSERT_WDEV_LOCK(wdev);
- if (WARN_ON(wdev->connect_keys)) {
- kzfree(wdev->connect_keys);
- wdev->connect_keys = NULL;
+ /*
+ * If we have an ssid_len, we're trying to connect or are
+ * already connected, so reject a new SSID unless it's the
+ * same (which is the case for re-association.)
+ */
+ if (wdev->ssid_len &&
+ (wdev->ssid_len != connect->ssid_len ||
+ memcmp(wdev->ssid, connect->ssid, wdev->ssid_len)))
+ return -EALREADY;
+
+ /*
+ * If connected, reject (re-)association unless prev_bssid
+ * matches the current BSSID.
+ */
+ if (wdev->current_bss) {
+ if (!prev_bssid)
+ return -EALREADY;
+ if (!ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid))
+ return -ENOTCONN;
}
+ /*
+ * Reject if we're in the process of connecting with WEP,
+ * this case isn't very interesting and trying to handle
+ * it would make the code much more complex.
+ */
+ if (wdev->connect_keys)
+ return -EINPROGRESS;
+
cfg80211_oper_and_ht_capa(&connect->ht_capa_mask,
rdev->wiphy.ht_capa_mod_mask);
@@ -1118,7 +1181,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
if (err) {
wdev->connect_keys = NULL;
- wdev->ssid_len = 0;
+ /*
+ * This could be reassoc getting refused, don't clear
+ * ssid_len in that case.
+ */
+ if (!wdev->current_bss)
+ wdev->ssid_len = 0;
return err;
}
@@ -1145,6 +1213,14 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
else if (wdev->ssid_len)
err = rdev_disconnect(rdev, dev, reason);
+ /*
+ * Clear ssid_len unless we actually were fully connected,
+ * in which case cfg80211_disconnected() will take care of
+ * this later.
+ */
+ if (!wdev->current_bss)
+ wdev->ssid_len = 0;
+
return err;
}
diff --git a/net/wireless/sysfs.h b/net/wireless/sysfs.h
index b533ed71daff..7b454c2de9b7 100644
--- a/net/wireless/sysfs.h
+++ b/net/wireless/sysfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __WIRELESS_SYSFS_H
#define __WIRELESS_SYSFS_H
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 0f8db41eaddb..f3353fe5b35b 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM cfg80211
diff --git a/net/wireless/util.c b/net/wireless/util.c
index bcb1284c3415..c69160694b6c 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Wireless utility functions
*
@@ -157,32 +158,30 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband)
case NL80211_BAND_2GHZ:
want = 7;
for (i = 0; i < sband->n_bitrates; i++) {
- if (sband->bitrates[i].bitrate == 10) {
+ switch (sband->bitrates[i].bitrate) {
+ case 10:
+ case 20:
+ case 55:
+ case 110:
sband->bitrates[i].flags |=
IEEE80211_RATE_MANDATORY_B |
IEEE80211_RATE_MANDATORY_G;
want--;
- }
-
- if (sband->bitrates[i].bitrate == 20 ||
- sband->bitrates[i].bitrate == 55 ||
- sband->bitrates[i].bitrate == 110 ||
- sband->bitrates[i].bitrate == 60 ||
- sband->bitrates[i].bitrate == 120 ||
- sband->bitrates[i].bitrate == 240) {
+ break;
+ case 60:
+ case 120:
+ case 240:
sband->bitrates[i].flags |=
IEEE80211_RATE_MANDATORY_G;
want--;
- }
-
- if (sband->bitrates[i].bitrate != 10 &&
- sband->bitrates[i].bitrate != 20 &&
- sband->bitrates[i].bitrate != 55 &&
- sband->bitrates[i].bitrate != 110)
+ /* fall through */
+ default:
sband->bitrates[i].flags |=
IEEE80211_RATE_ERP_G;
+ break;
+ }
}
- WARN_ON(want != 0 && want != 3 && want != 6);
+ WARN_ON(want != 0 && want != 3);
break;
case NL80211_BAND_60GHZ:
/* check for mandatory HT MCS 1..4 */
@@ -529,121 +528,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
}
EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr);
-int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
- enum nl80211_iftype iftype,
- const u8 *bssid, bool qos)
-{
- struct ieee80211_hdr hdr;
- u16 hdrlen, ethertype;
- __le16 fc;
- const u8 *encaps_data;
- int encaps_len, skip_header_bytes;
- int nh_pos, h_pos;
- int head_need;
-
- if (unlikely(skb->len < ETH_HLEN))
- return -EINVAL;
-
- nh_pos = skb_network_header(skb) - skb->data;
- h_pos = skb_transport_header(skb) - skb->data;
-
- /* convert Ethernet header to proper 802.11 header (based on
- * operation mode) */
- ethertype = (skb->data[12] << 8) | skb->data[13];
- fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
-
- switch (iftype) {
- case NL80211_IFTYPE_AP:
- case NL80211_IFTYPE_AP_VLAN:
- case NL80211_IFTYPE_P2P_GO:
- fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
- /* DA BSSID SA */
- memcpy(hdr.addr1, skb->data, ETH_ALEN);
- memcpy(hdr.addr2, addr, ETH_ALEN);
- memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
- hdrlen = 24;
- break;
- case NL80211_IFTYPE_STATION:
- case NL80211_IFTYPE_P2P_CLIENT:
- fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
- /* BSSID SA DA */
- memcpy(hdr.addr1, bssid, ETH_ALEN);
- memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
- memcpy(hdr.addr3, skb->data, ETH_ALEN);
- hdrlen = 24;
- break;
- case NL80211_IFTYPE_OCB:
- case NL80211_IFTYPE_ADHOC:
- /* DA SA BSSID */
- memcpy(hdr.addr1, skb->data, ETH_ALEN);
- memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
- memcpy(hdr.addr3, bssid, ETH_ALEN);
- hdrlen = 24;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- if (qos) {
- fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
- hdrlen += 2;
- }
-
- hdr.frame_control = fc;
- hdr.duration_id = 0;
- hdr.seq_ctrl = 0;
-
- skip_header_bytes = ETH_HLEN;
- if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) {
- encaps_data = bridge_tunnel_header;
- encaps_len = sizeof(bridge_tunnel_header);
- skip_header_bytes -= 2;
- } else if (ethertype >= ETH_P_802_3_MIN) {
- encaps_data = rfc1042_header;
- encaps_len = sizeof(rfc1042_header);
- skip_header_bytes -= 2;
- } else {
- encaps_data = NULL;
- encaps_len = 0;
- }
-
- skb_pull(skb, skip_header_bytes);
- nh_pos -= skip_header_bytes;
- h_pos -= skip_header_bytes;
-
- head_need = hdrlen + encaps_len - skb_headroom(skb);
-
- if (head_need > 0 || skb_cloned(skb)) {
- head_need = max(head_need, 0);
- if (head_need)
- skb_orphan(skb);
-
- if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC))
- return -ENOMEM;
- }
-
- if (encaps_data) {
- memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
- nh_pos += encaps_len;
- h_pos += encaps_len;
- }
-
- memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
-
- nh_pos += hdrlen;
- h_pos += hdrlen;
-
- /* Update skb pointers to various headers since this modified frame
- * is going to go through Linux networking code that may potentially
- * need things like pointer to IP header. */
- skb_reset_mac_header(skb);
- skb_set_network_header(skb, nh_pos);
- skb_set_transport_header(skb, h_pos);
-
- return 0;
-}
-EXPORT_SYMBOL(ieee80211_data_from_8023);
-
static void
__frame_add_frag(struct sk_buff *skb, struct page *page,
void *ptr, int len, int size)
@@ -963,6 +847,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
case EVENT_STOPPED:
__cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev);
break;
+ case EVENT_PORT_AUTHORIZED:
+ __cfg80211_port_authorized(wdev, ev->pa.bssid);
+ break;
}
wdev_unlock(wdev);
@@ -1367,13 +1254,29 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,
}
EXPORT_SYMBOL(cfg80211_get_p2p_attr);
-static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id)
+static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id, bool id_ext)
{
int i;
- for (i = 0; i < n_ids; i++)
- if (ids[i] == id)
+ /* Make sure array values are legal */
+ if (WARN_ON(ids[n_ids - 1] == WLAN_EID_EXTENSION))
+ return false;
+
+ i = 0;
+ while (i < n_ids) {
+ if (ids[i] == WLAN_EID_EXTENSION) {
+ if (id_ext && (ids[i + 1] == id))
+ return true;
+
+ i += 2;
+ continue;
+ }
+
+ if (ids[i] == id && !id_ext)
return true;
+
+ i++;
+ }
return false;
}
@@ -1403,14 +1306,36 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
{
size_t pos = offset;
- while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) {
+ while (pos < ielen) {
+ u8 ext = 0;
+
+ if (ies[pos] == WLAN_EID_EXTENSION)
+ ext = 2;
+ if ((pos + ext) >= ielen)
+ break;
+
+ if (!ieee80211_id_in_list(ids, n_ids, ies[pos + ext],
+ ies[pos] == WLAN_EID_EXTENSION))
+ break;
+
if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) {
pos = skip_ie(ies, ielen, pos);
- while (pos < ielen &&
- !ieee80211_id_in_list(after_ric, n_after_ric,
- ies[pos]))
- pos = skip_ie(ies, ielen, pos);
+ while (pos < ielen) {
+ if (ies[pos] == WLAN_EID_EXTENSION)
+ ext = 2;
+ else
+ ext = 0;
+
+ if ((pos + ext) >= ielen)
+ break;
+
+ if (!ieee80211_id_in_list(after_ric,
+ n_after_ric,
+ ies[pos + ext],
+ ext == 2))
+ pos = skip_ie(ies, ielen, pos);
+ }
} else {
pos = skip_ie(ies, ielen, pos);
}
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 5d4a02c7979b..7ca04a7de85a 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* cfg80211 - wext compat code
*
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index c434f193f39a..c67d7a82ab13 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* cfg80211 wext compat for managed mode.
*
diff --git a/net/x25/Makefile b/net/x25/Makefile
index a2c34ab6f194..5dd544a231f2 100644
--- a/net/x25/Makefile
+++ b/net/x25/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux X.25 Packet layer.
#
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5a1a98df3499..ea87143314f3 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -74,7 +74,7 @@ DEFINE_RWLOCK(x25_list_lock);
static const struct proto_ops x25_proto_ops;
-static struct x25_address null_x25_address = {" "};
+static const struct x25_address null_x25_address = {" "};
#ifdef CONFIG_COMPAT
struct compat_x25_subscrip_struct {
@@ -374,9 +374,11 @@ static void __x25_destroy_socket(struct sock *);
/*
* handler for deferred kills.
*/
-static void x25_destroy_timer(unsigned long data)
+static void x25_destroy_timer(struct timer_list *t)
{
- x25_destroy_socket_from_timer((struct sock *)data);
+ struct sock *sk = from_timer(sk, t, sk_timer);
+
+ x25_destroy_socket_from_timer(sk);
}
/*
@@ -413,8 +415,7 @@ static void __x25_destroy_socket(struct sock *sk)
if (sk_has_allocations(sk)) {
/* Defer: outstanding buffers */
sk->sk_timer.expires = jiffies + 10 * HZ;
- sk->sk_timer.function = x25_destroy_timer;
- sk->sk_timer.data = (unsigned long)sk;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)x25_destroy_timer;
add_timer(&sk->sk_timer);
} else {
/* drop last reference so sock_put will free */
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index ba078c85f0a1..e9802afa43d0 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* -*- linux-c -*-
* sysctl_net_x25.c: sysctl interface to net X.25 subsystem.
*
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 997ff7b2509b..ad1734d36ed7 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -103,7 +103,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
*vc_fac_mask |= X25_MASK_REVERSE;
break;
}
-
+ /*fall through */
case X25_FAC_THROUGHPUT:
facilities->throughput = p[1];
*vc_fac_mask |= X25_MASK_THROUGHPUT;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 7ac50098a375..3c12cae32001 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -345,6 +345,7 @@ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametyp
case X25_RESET_REQUEST:
x25_write_internal(sk, X25_RESET_CONFIRMATION);
+ /* fall through */
case X25_RESET_CONFIRMATION: {
x25_stop_timer(sk);
x25->condition = 0x00;
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
index 5c5db1a36399..1dfba3c23459 100644
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c
@@ -26,18 +26,17 @@
#include <net/tcp_states.h>
#include <net/x25.h>
-static void x25_heartbeat_expiry(unsigned long);
-static void x25_timer_expiry(unsigned long);
+static void x25_heartbeat_expiry(struct timer_list *t);
+static void x25_timer_expiry(struct timer_list *t);
void x25_init_timers(struct sock *sk)
{
struct x25_sock *x25 = x25_sk(sk);
- setup_timer(&x25->timer, x25_timer_expiry, (unsigned long)sk);
+ timer_setup(&x25->timer, x25_timer_expiry, 0);
/* initialized by sock_init_data */
- sk->sk_timer.data = (unsigned long)sk;
- sk->sk_timer.function = &x25_heartbeat_expiry;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)x25_heartbeat_expiry;
}
void x25_start_heartbeat(struct sock *sk)
@@ -93,9 +92,9 @@ unsigned long x25_display_timer(struct sock *sk)
return x25->timer.expires - jiffies;
}
-static void x25_heartbeat_expiry(unsigned long param)
+static void x25_heartbeat_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct sock *sk = from_timer(sk, t, sk_timer);
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) /* can currently only occur in state 3 */
@@ -160,9 +159,10 @@ static inline void x25_do_timer_expiry(struct sock * sk)
}
}
-static void x25_timer_expiry(unsigned long param)
+static void x25_timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct x25_sock *x25 = from_timer(x25, t, timer);
+ struct sock *sk = &x25->sk;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) { /* can currently only occur in state 3 */
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 55b2ac300995..0bd2465a8c5a 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the XFRM subsystem.
#
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 5f7e8bfa0c2d..30e5746085b8 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -63,7 +63,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
xfrm_address_t *daddr;
if (!x->type_offload)
- return 0;
+ return -EINVAL;
/* We don't yet support UDP encapsulation, TFC padding and ESN. */
if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN))
@@ -79,7 +79,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
daddr = &x->props.saddr;
}
- dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, x->props.family);
+ dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr,
+ x->props.family, x->props.output_mark);
if (IS_ERR(dst))
return 0;
@@ -90,6 +91,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
}
if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) {
+ xso->dev = NULL;
dev_put(dev);
return 0;
}
@@ -153,6 +155,7 @@ static int xfrm_dev_register(struct net_device *dev)
static int xfrm_dev_unregister(struct net_device *dev)
{
+ xfrm_policy_cache_flush();
return NOTIFY_DONE;
}
@@ -175,8 +178,7 @@ static int xfrm_dev_down(struct net_device *dev)
if (dev->features & NETIF_F_HW_ESP)
xfrm_dev_state_flush(dev_net(dev), dev, true);
- xfrm_garbage_collect(dev_net(dev));
-
+ xfrm_policy_cache_flush();
return NOTIFY_DONE;
}
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index 1e98bc0fe0a5..2ad33ce1ea17 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* xfrm_hash.c: Common hash table code.
*
* Copyright (C) 2006 David S. Miller (davem@davemloft.net)
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index eaea9c4fb3b0..61be810389d8 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _XFRM_HASH_H
#define _XFRM_HASH_H
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 923205e279f7..347ab31574d5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* xfrm_input.c
*
@@ -247,6 +248,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
+ if (xo->status & CRYPTO_INVALID_PROTOCOL) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
+ goto drop;
+ }
+
XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
goto drop;
}
@@ -260,8 +266,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto lock;
}
- daddr = (xfrm_address_t *)(skb_network_header(skb) +
- XFRM_SPI_SKB_CB(skb)->daddroff);
family = XFRM_SPI_SKB_CB(skb)->family;
/* if tunnel is present override skb->mark value with tunnel i_key */
@@ -288,6 +292,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
+ daddr = (xfrm_address_t *)(skb_network_header(skb) +
+ XFRM_SPI_SKB_CB(skb)->daddroff);
do {
if (skb->sp->len == XFRM_MAX_DEPTH) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
@@ -424,6 +430,8 @@ resume:
nf_reset(skb);
if (decaps) {
+ if (skb->sp)
+ skb->sp->olen = 0;
skb_dst_drop(skb);
gro_cells_receive(&gro_cells, skb);
return 0;
@@ -434,6 +442,8 @@ resume:
err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async);
if (xfrm_gro) {
+ if (skb->sp)
+ skb->sp->olen = 0;
skb_dst_drop(skb);
gro_cells_receive(&gro_cells, skb);
return err;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 8c0b6722aaa8..73ad8c8ef344 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -66,6 +66,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
goto error_nolock;
}
+ if (x->props.output_mark)
+ skb->mark = x->props.output_mark;
+
err = x->outer_mode->output(x, skb);
if (err) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR);
@@ -102,6 +105,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
if (xfrm_offload(skb)) {
x->type_offload->encap(x, skb);
} else {
+ /* Inner headers are invalid now. */
+ skb->encapsulation = 0;
+
err = x->type->output(x, skb);
if (err == -EINPROGRESS)
goto out;
@@ -205,7 +211,6 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
int err;
secpath_reset(skb);
- skb->encapsulation = 0;
if (xfrm_dev_offload_ok(skb, x)) {
struct sec_path *sp;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ff61d8557929..9542975eb2f9 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -24,6 +24,7 @@
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/cache.h>
+#include <linux/cpu.h>
#include <linux/audit.h>
#include <net/dst.h>
#include <net/flow.h>
@@ -44,6 +45,8 @@ struct xfrm_flo {
u8 flags;
};
+static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
+static struct work_struct *xfrm_pcpu_work __read_mostly;
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
__read_mostly;
@@ -54,7 +57,7 @@ static __read_mostly seqcount_t xfrm_policy_hash_generation;
static void xfrm_init_pmtu(struct dst_entry *dst);
static int stale_bundle(struct dst_entry *dst);
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
-static void xfrm_policy_queue_process(unsigned long arg);
+static void xfrm_policy_queue_process(struct timer_list *t);
static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
@@ -119,7 +122,7 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa
struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr,
- int family)
+ int family, u32 mark)
{
const struct xfrm_policy_afinfo *afinfo;
struct dst_entry *dst;
@@ -128,7 +131,7 @@ struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
if (unlikely(afinfo == NULL))
return ERR_PTR(-EAFNOSUPPORT);
- dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr);
+ dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark);
rcu_read_unlock();
@@ -140,7 +143,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
int tos, int oif,
xfrm_address_t *prev_saddr,
xfrm_address_t *prev_daddr,
- int family)
+ int family, u32 mark)
{
struct net *net = xs_net(x);
xfrm_address_t *saddr = &x->props.saddr;
@@ -156,7 +159,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
daddr = x->coaddr;
}
- dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family);
+ dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark);
if (!IS_ERR(dst)) {
if (prev_saddr != saddr)
@@ -176,9 +179,9 @@ static inline unsigned long make_jiffies(long secs)
return secs*HZ;
}
-static void xfrm_policy_timer(unsigned long data)
+static void xfrm_policy_timer(struct timer_list *t)
{
- struct xfrm_policy *xp = (struct xfrm_policy *)data;
+ struct xfrm_policy *xp = from_timer(xp, t, timer);
unsigned long now = get_seconds();
long next = LONG_MAX;
int warn = 0;
@@ -246,36 +249,6 @@ expired:
xfrm_pol_put(xp);
}
-static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
-{
- struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
-
- if (unlikely(pol->walk.dead))
- flo = NULL;
- else
- xfrm_pol_hold(pol);
-
- return flo;
-}
-
-static int xfrm_policy_flo_check(struct flow_cache_object *flo)
-{
- struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
-
- return !pol->walk.dead;
-}
-
-static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
-{
- xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
-}
-
-static const struct flow_cache_ops xfrm_policy_fc_ops = {
- .get = xfrm_policy_flo_get,
- .check = xfrm_policy_flo_check,
- .delete = xfrm_policy_flo_delete,
-};
-
/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
* SPD calls.
*/
@@ -294,11 +267,9 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
rwlock_init(&policy->lock);
refcount_set(&policy->refcnt, 1);
skb_queue_head_init(&policy->polq.hold_queue);
- setup_timer(&policy->timer, xfrm_policy_timer,
- (unsigned long)policy);
- setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
- (unsigned long)policy);
- policy->flo.ops = &xfrm_policy_fc_ops;
+ timer_setup(&policy->timer, xfrm_policy_timer, 0);
+ timer_setup(&policy->polq.hold_timer,
+ xfrm_policy_queue_process, 0);
}
return policy;
}
@@ -798,7 +769,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
else
hlist_add_head(&policy->bydst, chain);
__xfrm_policy_link(policy, dir);
- atomic_inc(&net->xfrm.flow_cache_genid);
/* After previous checking, family can either be AF_INET or AF_INET6 */
if (policy->family == AF_INET)
@@ -1004,6 +974,8 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
}
if (!cnt)
err = -ESRCH;
+ else
+ xfrm_policy_cache_flush();
out:
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return err;
@@ -1175,7 +1147,7 @@ fail:
}
static struct xfrm_policy *
-__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
+xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
{
#ifdef CONFIG_XFRM_SUB_POLICY
struct xfrm_policy *pol;
@@ -1187,61 +1159,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir
return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
}
-static int flow_to_policy_dir(int dir)
-{
- if (XFRM_POLICY_IN == FLOW_DIR_IN &&
- XFRM_POLICY_OUT == FLOW_DIR_OUT &&
- XFRM_POLICY_FWD == FLOW_DIR_FWD)
- return dir;
-
- switch (dir) {
- default:
- case FLOW_DIR_IN:
- return XFRM_POLICY_IN;
- case FLOW_DIR_OUT:
- return XFRM_POLICY_OUT;
- case FLOW_DIR_FWD:
- return XFRM_POLICY_FWD;
- }
-}
-
-static struct flow_cache_object *
-xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
- u8 dir, struct flow_cache_object *old_obj, void *ctx)
-{
- struct xfrm_policy *pol;
-
- if (old_obj)
- xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
-
- pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
- if (IS_ERR_OR_NULL(pol))
- return ERR_CAST(pol);
-
- /* Resolver returns two references:
- * one for cache and one for caller of flow_cache_lookup() */
- xfrm_pol_hold(pol);
-
- return &pol->flo;
-}
-
-static inline int policy_to_flow_dir(int dir)
-{
- if (XFRM_POLICY_IN == FLOW_DIR_IN &&
- XFRM_POLICY_OUT == FLOW_DIR_OUT &&
- XFRM_POLICY_FWD == FLOW_DIR_FWD)
- return dir;
- switch (dir) {
- default:
- case XFRM_POLICY_IN:
- return FLOW_DIR_IN;
- case XFRM_POLICY_OUT:
- return FLOW_DIR_OUT;
- case XFRM_POLICY_FWD:
- return FLOW_DIR_FWD;
- }
-}
-
static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
const struct flowi *fl, u16 family)
{
@@ -1261,7 +1178,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
}
err = security_xfrm_policy_lookup(pol->security,
fl->flowi_secid,
- policy_to_flow_dir(dir));
+ dir);
if (!err) {
if (!xfrm_pol_hold_rcu(pol))
goto again;
@@ -1388,6 +1305,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
newp->xfrm_nr = old->xfrm_nr;
newp->index = old->index;
newp->type = old->type;
+ newp->family = old->family;
memcpy(newp->xfrm_vec, old->xfrm_vec,
newp->xfrm_nr*sizeof(struct xfrm_tmpl));
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
@@ -1422,14 +1340,14 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
static int
xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
- xfrm_address_t *remote, unsigned short family)
+ xfrm_address_t *remote, unsigned short family, u32 mark)
{
int err;
const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
if (unlikely(afinfo == NULL))
return -EINVAL;
- err = afinfo->get_saddr(net, oif, local, remote);
+ err = afinfo->get_saddr(net, oif, local, remote, mark);
rcu_read_unlock();
return err;
}
@@ -1460,7 +1378,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
if (xfrm_addr_any(local, tmpl->encap_family)) {
error = xfrm_get_saddr(net, fl->flowi_oif,
&tmp, remote,
- tmpl->encap_family);
+ tmpl->encap_family, 0);
if (error)
goto fail;
local = &tmp;
@@ -1545,58 +1463,6 @@ static int xfrm_get_tos(const struct flowi *fl, int family)
return tos;
}
-static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
-{
- struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
- struct dst_entry *dst = &xdst->u.dst;
-
- if (xdst->route == NULL) {
- /* Dummy bundle - if it has xfrms we were not
- * able to build bundle as template resolution failed.
- * It means we need to try again resolving. */
- if (xdst->num_xfrms > 0)
- return NULL;
- } else if (dst->flags & DST_XFRM_QUEUE) {
- return NULL;
- } else {
- /* Real bundle */
- if (stale_bundle(dst))
- return NULL;
- }
-
- dst_hold(dst);
- return flo;
-}
-
-static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
-{
- struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
- struct dst_entry *dst = &xdst->u.dst;
-
- if (!xdst->route)
- return 0;
- if (stale_bundle(dst))
- return 0;
-
- return 1;
-}
-
-static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
-{
- struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
- struct dst_entry *dst = &xdst->u.dst;
-
- /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
- dst->obsolete = DST_OBSOLETE_DEAD;
- dst_release_immediate(dst);
-}
-
-static const struct flow_cache_ops xfrm_bundle_fc_ops = {
- .get = xfrm_bundle_flo_get,
- .check = xfrm_bundle_flo_check,
- .delete = xfrm_bundle_flo_delete,
-};
-
static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
{
const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1624,7 +1490,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
struct dst_entry *dst = &xdst->u.dst;
memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
- xdst->flo.ops = &xfrm_bundle_fc_ops;
} else
xdst = ERR_PTR(-ENOBUFS);
@@ -1708,6 +1573,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
goto put_states;
}
+ if (!dst_prev)
+ dst0 = dst1;
+ else
+ /* Ref count is taken during xfrm_alloc_dst()
+ * No need to do dst_clone() on dst1
+ */
+ dst_prev->child = dst1;
+
if (xfrm[i]->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(xfrm[i],
xfrm_af2proto(family));
@@ -1719,21 +1592,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
} else
inner_mode = xfrm[i]->inner_mode;
- if (!dst_prev)
- dst0 = dst1;
- else
- /* Ref count is taken during xfrm_alloc_dst()
- * No need to do dst_clone() on dst1
- */
- dst_prev->child = dst1;
-
xdst->route = dst;
dst_copy_metrics(dst1, dst);
if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
family = xfrm[i]->props.family;
dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
- &saddr, &daddr, family);
+ &saddr, &daddr, family,
+ xfrm[i]->props.output_mark);
err = PTR_ERR(dst);
if (IS_ERR(dst))
goto put_states;
@@ -1840,6 +1706,106 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
}
+static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
+{
+ this_cpu_write(xfrm_last_dst, xdst);
+ if (old)
+ dst_release(&old->u.dst);
+}
+
+static void __xfrm_pcpu_work_fn(void)
+{
+ struct xfrm_dst *old;
+
+ old = this_cpu_read(xfrm_last_dst);
+ if (old && !xfrm_bundle_ok(old))
+ xfrm_last_dst_update(NULL, old);
+}
+
+static void xfrm_pcpu_work_fn(struct work_struct *work)
+{
+ local_bh_disable();
+ rcu_read_lock();
+ __xfrm_pcpu_work_fn();
+ rcu_read_unlock();
+ local_bh_enable();
+}
+
+void xfrm_policy_cache_flush(void)
+{
+ struct xfrm_dst *old;
+ bool found = 0;
+ int cpu;
+
+ local_bh_disable();
+ rcu_read_lock();
+ for_each_possible_cpu(cpu) {
+ old = per_cpu(xfrm_last_dst, cpu);
+ if (old && !xfrm_bundle_ok(old)) {
+ if (smp_processor_id() == cpu) {
+ __xfrm_pcpu_work_fn();
+ continue;
+ }
+ found = true;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+ local_bh_enable();
+
+ if (!found)
+ return;
+
+ get_online_cpus();
+
+ for_each_possible_cpu(cpu) {
+ bool bundle_release;
+
+ rcu_read_lock();
+ old = per_cpu(xfrm_last_dst, cpu);
+ bundle_release = old && !xfrm_bundle_ok(old);
+ rcu_read_unlock();
+
+ if (!bundle_release)
+ continue;
+
+ if (cpu_online(cpu)) {
+ schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
+ continue;
+ }
+
+ rcu_read_lock();
+ old = per_cpu(xfrm_last_dst, cpu);
+ if (old && !xfrm_bundle_ok(old)) {
+ per_cpu(xfrm_last_dst, cpu) = NULL;
+ dst_release(&old->u.dst);
+ }
+ rcu_read_unlock();
+ }
+
+ put_online_cpus();
+}
+
+static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
+ struct xfrm_state * const xfrm[],
+ int num)
+{
+ const struct dst_entry *dst = &xdst->u.dst;
+ int i;
+
+ if (xdst->num_xfrms != num)
+ return false;
+
+ for (i = 0; i < num; i++) {
+ if (!dst || dst->xfrm != xfrm[i])
+ return false;
+ dst = dst->child;
+ }
+
+ return xfrm_bundle_ok(xdst);
+}
+
static struct xfrm_dst *
xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
const struct flowi *fl, u16 family,
@@ -1847,8 +1813,8 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
{
struct net *net = xp_net(pols[0]);
struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+ struct xfrm_dst *xdst, *old;
struct dst_entry *dst;
- struct xfrm_dst *xdst;
int err;
/* Try to instantiate a bundle */
@@ -1859,6 +1825,21 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
return ERR_PTR(err);
}
+ xdst = this_cpu_read(xfrm_last_dst);
+ if (xdst &&
+ xdst->u.dst.dev == dst_orig->dev &&
+ xdst->num_pols == num_pols &&
+ memcmp(xdst->pols, pols,
+ sizeof(struct xfrm_policy *) * num_pols) == 0 &&
+ xfrm_xdst_can_reuse(xdst, xfrm, err)) {
+ dst_hold(&xdst->u.dst);
+ while (err > 0)
+ xfrm_state_put(xfrm[--err]);
+ return xdst;
+ }
+
+ old = xdst;
+
dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
if (IS_ERR(dst)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
@@ -1871,15 +1852,18 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
xdst->policy_genid = atomic_read(&pols[0]->genid);
+ atomic_set(&xdst->u.dst.__refcnt, 2);
+ xfrm_last_dst_update(xdst, old);
+
return xdst;
}
-static void xfrm_policy_queue_process(unsigned long arg)
+static void xfrm_policy_queue_process(struct timer_list *t)
{
struct sk_buff *skb;
struct sock *sk;
struct dst_entry *dst;
- struct xfrm_policy *pol = (struct xfrm_policy *)arg;
+ struct xfrm_policy *pol = from_timer(pol, t, polq.hold_timer);
struct net *net = xp_net(pol);
struct xfrm_policy_queue *pq = &pol->polq;
struct flowi fl;
@@ -2051,86 +2035,39 @@ free_dst:
goto out;
}
-static struct flow_cache_object *
-xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
- struct flow_cache_object *oldflo, void *ctx)
+static struct xfrm_dst *
+xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo)
{
- struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
- struct xfrm_dst *xdst, *new_xdst;
- int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
-
- /* Check if the policies from old bundle are usable */
- xdst = NULL;
- if (oldflo) {
- xdst = container_of(oldflo, struct xfrm_dst, flo);
- num_pols = xdst->num_pols;
- num_xfrms = xdst->num_xfrms;
- pol_dead = 0;
- for (i = 0; i < num_pols; i++) {
- pols[i] = xdst->pols[i];
- pol_dead |= pols[i]->walk.dead;
- }
- if (pol_dead) {
- /* Mark DST_OBSOLETE_DEAD to fail the next
- * xfrm_dst_check()
- */
- xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
- dst_release_immediate(&xdst->u.dst);
- xdst = NULL;
- num_pols = 0;
- num_xfrms = 0;
- oldflo = NULL;
- }
- }
+ int num_pols = 0, num_xfrms = 0, err;
+ struct xfrm_dst *xdst;
/* Resolve policies to use if we couldn't get them from
* previous cache entry */
- if (xdst == NULL) {
- num_pols = 1;
- pols[0] = __xfrm_policy_lookup(net, fl, family,
- flow_to_policy_dir(dir));
- err = xfrm_expand_policies(fl, family, pols,
+ num_pols = 1;
+ pols[0] = xfrm_policy_lookup(net, fl, family, dir);
+ err = xfrm_expand_policies(fl, family, pols,
&num_pols, &num_xfrms);
- if (err < 0)
- goto inc_error;
- if (num_pols == 0)
- return NULL;
- if (num_xfrms <= 0)
- goto make_dummy_bundle;
- }
+ if (err < 0)
+ goto inc_error;
+ if (num_pols == 0)
+ return NULL;
+ if (num_xfrms <= 0)
+ goto make_dummy_bundle;
- new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
+ xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
xflo->dst_orig);
- if (IS_ERR(new_xdst)) {
- err = PTR_ERR(new_xdst);
+ if (IS_ERR(xdst)) {
+ err = PTR_ERR(xdst);
if (err != -EAGAIN)
goto error;
- if (oldflo == NULL)
- goto make_dummy_bundle;
- dst_hold(&xdst->u.dst);
- return oldflo;
- } else if (new_xdst == NULL) {
+ goto make_dummy_bundle;
+ } else if (xdst == NULL) {
num_xfrms = 0;
- if (oldflo == NULL)
- goto make_dummy_bundle;
- xdst->num_xfrms = 0;
- dst_hold(&xdst->u.dst);
- return oldflo;
- }
-
- /* Kill the previous bundle */
- if (xdst) {
- /* The policies were stolen for newly generated bundle */
- xdst->num_pols = 0;
- /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
- xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
- dst_release_immediate(&xdst->u.dst);
+ goto make_dummy_bundle;
}
- /* We do need to return one reference for original caller */
- dst_hold(&new_xdst->u.dst);
- return &new_xdst->flo;
+ return xdst;
make_dummy_bundle:
/* We found policies, but there's no bundles to instantiate:
@@ -2145,18 +2082,12 @@ make_dummy_bundle:
xdst->num_xfrms = num_xfrms;
memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
- dst_hold(&xdst->u.dst);
- return &xdst->flo;
+ return xdst;
inc_error:
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
error:
- if (xdst != NULL) {
- /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
- xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
- dst_release_immediate(&xdst->u.dst);
- } else
- xfrm_pols_put(pols, num_pols);
+ xfrm_pols_put(pols, num_pols);
return ERR_PTR(err);
}
@@ -2187,11 +2118,10 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
const struct sock *sk, int flags)
{
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
- struct flow_cache_object *flo;
struct xfrm_dst *xdst;
struct dst_entry *dst, *route;
u16 family = dst_orig->ops->family;
- u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+ u8 dir = XFRM_POLICY_OUT;
int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
dst = NULL;
@@ -2226,7 +2156,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
goto no_transform;
}
- dst_hold(&xdst->u.dst);
route = xdst->route;
}
}
@@ -2242,15 +2171,13 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
!net->xfrm.policy_count[XFRM_POLICY_OUT])
goto nopol;
- flo = flow_cache_lookup(net, fl, family, dir,
- xfrm_bundle_lookup, &xflo);
- if (flo == NULL)
+ xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
+ if (xdst == NULL)
goto nopol;
- if (IS_ERR(flo)) {
- err = PTR_ERR(flo);
+ if (IS_ERR(xdst)) {
+ err = PTR_ERR(xdst);
goto dropdst;
}
- xdst = container_of(flo, struct xfrm_dst, flo);
num_pols = xdst->num_pols;
num_xfrms = xdst->num_xfrms;
@@ -2449,12 +2376,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
int pi;
int reverse;
struct flowi fl;
- u8 fl_dir;
int xerr_idx = -1;
reverse = dir & ~XFRM_POLICY_MASK;
dir &= XFRM_POLICY_MASK;
- fl_dir = policy_to_flow_dir(dir);
if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
@@ -2486,16 +2411,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
}
- if (!pol) {
- struct flow_cache_object *flo;
-
- flo = flow_cache_lookup(net, &fl, family, fl_dir,
- xfrm_policy_lookup, NULL);
- if (IS_ERR_OR_NULL(flo))
- pol = ERR_CAST(flo);
- else
- pol = container_of(flo, struct xfrm_policy, flo);
- }
+ if (!pol)
+ pol = xfrm_policy_lookup(net, &fl, family, dir);
if (IS_ERR(pol)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
@@ -2641,11 +2558,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
* notice. That's what we are validating here via the
* stale_bundle() check.
*
- * When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will
- * be marked on it.
* When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will
* be marked on it.
- * Both will force stable_bundle() to fail on any xdst bundle with
+ * This will force stale_bundle() to fail on any xdst bundle with
* this dst linked in it.
*/
if (dst->obsolete < 0 && !stale_bundle(dst))
@@ -2685,18 +2600,6 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
return dst;
}
-void xfrm_garbage_collect(struct net *net)
-{
- flow_cache_flush(net);
-}
-EXPORT_SYMBOL(xfrm_garbage_collect);
-
-void xfrm_garbage_collect_deferred(struct net *net)
-{
- flow_cache_flush_deferred(net);
-}
-EXPORT_SYMBOL(xfrm_garbage_collect_deferred);
-
static void xfrm_init_pmtu(struct dst_entry *dst)
{
do {
@@ -3034,14 +2937,9 @@ static int __net_init xfrm_net_init(struct net *net)
rv = xfrm_sysctl_init(net);
if (rv < 0)
goto out_sysctl;
- rv = flow_cache_init(net);
- if (rv < 0)
- goto out;
return 0;
-out:
- xfrm_sysctl_fini(net);
out_sysctl:
xfrm_policy_fini(net);
out_policy:
@@ -3054,7 +2952,6 @@ out_statistics:
static void __net_exit xfrm_net_exit(struct net *net)
{
- flow_cache_fini(net);
xfrm_sysctl_fini(net);
xfrm_policy_fini(net);
xfrm_state_fini(net);
@@ -3068,7 +2965,15 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
void __init xfrm_init(void)
{
- flow_cache_hp_init();
+ int i;
+
+ xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
+ GFP_KERNEL);
+ BUG_ON(!xfrm_pcpu_work);
+
+ for (i = 0; i < NR_CPUS; i++)
+ INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
+
register_pernet_subsys(&xfrm_net_ops);
seqcount_init(&xfrm_policy_hash_generation);
xfrm_input_init();
@@ -3308,9 +3213,15 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_state *x_new[XFRM_MAX_DEPTH];
struct xfrm_migrate *mp;
+ /* Stage 0 - sanity checks */
if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
goto out;
+ if (dir >= XFRM_POLICY_MAX) {
+ err = -EINVAL;
+ goto out;
+ }
+
/* Stage 1 - find policy */
if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
err = -ENOENT;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 6c0956d10db6..1f5cee2269af 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -296,12 +296,14 @@ int xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
}
EXPORT_SYMBOL(xfrm_unregister_type_offload);
-static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned short family)
+static const struct xfrm_type_offload *
+xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
{
struct xfrm_state_afinfo *afinfo;
const struct xfrm_type_offload **typemap;
const struct xfrm_type_offload *type;
+retry:
afinfo = xfrm_state_get_afinfo(family);
if (unlikely(afinfo == NULL))
return NULL;
@@ -311,6 +313,12 @@ static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned
if ((type && !try_module_get(type->owner)))
type = NULL;
+ if (!type && try_load) {
+ request_module("xfrm-offload-%d-%d", family, proto);
+ try_load = 0;
+ goto retry;
+ }
+
rcu_read_unlock();
return type;
}
@@ -724,11 +732,12 @@ restart:
}
}
}
- if (cnt)
- err = 0;
-
out:
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ if (cnt) {
+ err = 0;
+ xfrm_policy_cache_flush();
+ }
return err;
}
EXPORT_SYMBOL(xfrm_state_flush);
@@ -1620,6 +1629,7 @@ int
xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
unsigned short family, struct net *net)
{
+ int i;
int err = 0;
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
if (!afinfo)
@@ -1628,6 +1638,9 @@ xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/
if (afinfo->tmpl_sort)
err = afinfo->tmpl_sort(dst, src, n);
+ else
+ for (i = 0; i < n; i++)
+ dst[i] = src[i];
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
rcu_read_unlock();
return err;
@@ -1638,6 +1651,7 @@ int
xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
unsigned short family)
{
+ int i;
int err = 0;
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
struct net *net = xs_net(*src);
@@ -1648,6 +1662,9 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
spin_lock_bh(&net->xfrm.xfrm_state_lock);
if (afinfo->state_sort)
err = afinfo->state_sort(dst, src, n);
+ else
+ for (i = 0; i < n; i++)
+ dst[i] = src[i];
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
rcu_read_unlock();
return err;
@@ -2052,6 +2069,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
if (err >= 0) {
xfrm_sk_policy_insert(sk, err, pol);
xfrm_pol_put(pol);
+ __sk_dst_reset(sk);
err = 0;
}
@@ -2164,7 +2182,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
return mtu - x->props.header_len;
}
-int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
+int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
{
struct xfrm_state_afinfo *afinfo;
struct xfrm_mode *inner_mode;
@@ -2229,7 +2247,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
if (x->type == NULL)
goto error;
- x->type_offload = xfrm_get_type_offload(x->id.proto, family);
+ x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload);
err = x->type->init_state(x);
if (err)
@@ -2257,7 +2275,7 @@ EXPORT_SYMBOL(__xfrm_init_state);
int xfrm_init_state(struct xfrm_state *x)
{
- return __xfrm_init_state(x, true);
+ return __xfrm_init_state(x, true, false);
}
EXPORT_SYMBOL(xfrm_init_state);
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
index 35a7e794ad04..0c6c5ef65f9d 100644
--- a/net/xfrm/xfrm_sysctl.c
+++ b/net/xfrm/xfrm_sysctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/sysctl.h>
#include <linux/slab.h>
#include <net/net_namespace.h>
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 2be4c6af008a..983b0233767b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -42,7 +42,7 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
return 0;
algp = nla_data(rt);
- if (nla_len(rt) < xfrm_alg_len(algp))
+ if (nla_len(rt) < (int)xfrm_alg_len(algp))
return -EINVAL;
switch (type) {
@@ -68,7 +68,7 @@ static int verify_auth_trunc(struct nlattr **attrs)
return 0;
algp = nla_data(rt);
- if (nla_len(rt) < xfrm_alg_auth_len(algp))
+ if (nla_len(rt) < (int)xfrm_alg_auth_len(algp))
return -EINVAL;
algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
@@ -84,7 +84,7 @@ static int verify_aead(struct nlattr **attrs)
return 0;
algp = nla_data(rt);
- if (nla_len(rt) < aead_len(algp))
+ if (nla_len(rt) < (int)aead_len(algp))
return -EINVAL;
algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
@@ -130,7 +130,7 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
return -EINVAL;
- if (nla_len(rt) < xfrm_replay_state_esn_len(rs) &&
+ if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
nla_len(rt) != sizeof(*rs))
return -EINVAL;
}
@@ -404,7 +404,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
struct nlattr *rp)
{
struct xfrm_replay_state_esn *up;
- int ulen;
+ unsigned int ulen;
if (!replay_esn || !rp)
return 0;
@@ -414,7 +414,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
/* Check the overall length and the internal bitmap length to avoid
* potential overflow. */
- if (nla_len(rp) < ulen ||
+ if (nla_len(rp) < (int)ulen ||
xfrm_replay_state_esn_len(replay_esn) != ulen ||
replay_esn->bmp_len != up->bmp_len)
return -EINVAL;
@@ -430,14 +430,14 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn
struct nlattr *rta)
{
struct xfrm_replay_state_esn *p, *pp, *up;
- int klen, ulen;
+ unsigned int klen, ulen;
if (!rta)
return 0;
up = nla_data(rta);
klen = xfrm_replay_state_esn_len(up);
- ulen = nla_len(rta) >= klen ? klen : sizeof(*up);
+ ulen = nla_len(rta) >= (int)klen ? klen : sizeof(*up);
p = kzalloc(klen, GFP_KERNEL);
if (!p)
@@ -458,9 +458,9 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn
return 0;
}
-static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx)
+static inline unsigned int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx)
{
- int len = 0;
+ unsigned int len = 0;
if (xfrm_ctx) {
len += sizeof(struct xfrm_user_sec_ctx);
@@ -584,7 +584,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
xfrm_mark_get(attrs, &x->mark);
- err = __xfrm_init_state(x, false);
+ if (attrs[XFRMA_OUTPUT_MARK])
+ x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]);
+
+ err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
if (err)
goto error;
@@ -654,6 +657,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0) {
x->km.state = XFRM_STATE_DEAD;
+ xfrm_dev_state_delete(x);
__xfrm_state_put(x);
goto out;
}
@@ -796,7 +800,7 @@ static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb
return -EMSGSIZE;
xuo = nla_data(attr);
-
+ memset(xuo, 0, sizeof(*xuo));
xuo->ifindex = xso->dev->ifindex;
xuo->flags = xso->flags;
@@ -897,6 +901,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
ret = copy_user_offload(&x->xso, skb);
if (ret)
goto out;
+ if (x->props.output_mark) {
+ ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark);
+ if (ret)
+ goto out;
+ }
if (x->security)
ret = copy_sec_ctx(x->security, skb);
out:
@@ -1023,7 +1032,7 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
return -1;
}
-static inline size_t xfrm_spdinfo_msgsize(void)
+static inline unsigned int xfrm_spdinfo_msgsize(void)
{
return NLMSG_ALIGN(4)
+ nla_total_size(sizeof(struct xfrmu_spdinfo))
@@ -1138,18 +1147,19 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 *flags = nlmsg_data(nlh);
u32 sportid = NETLINK_CB(skb).portid;
u32 seq = nlh->nlmsg_seq;
+ int err;
r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC);
if (r_skb == NULL)
return -ENOMEM;
- if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0)
- BUG();
+ err = build_spdinfo(r_skb, net, sportid, seq, *flags);
+ BUG_ON(err < 0);
return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
}
-static inline size_t xfrm_sadinfo_msgsize(void)
+static inline unsigned int xfrm_sadinfo_msgsize(void)
{
return NLMSG_ALIGN(4)
+ nla_total_size(sizeof(struct xfrmu_sadhinfo))
@@ -1196,13 +1206,14 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 *flags = nlmsg_data(nlh);
u32 sportid = NETLINK_CB(skb).portid;
u32 seq = nlh->nlmsg_seq;
+ int err;
r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC);
if (r_skb == NULL)
return -ENOMEM;
- if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0)
- BUG();
+ err = build_sadinfo(r_skb, net, sportid, seq, *flags);
+ BUG_ON(err < 0);
return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
}
@@ -1625,7 +1636,7 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s
return copy_sec_ctx(xp->security, skb);
return 0;
}
-static inline size_t userpolicy_type_attrsize(void)
+static inline unsigned int userpolicy_type_attrsize(void)
{
#ifdef CONFIG_XFRM_SUB_POLICY
return nla_total_size(sizeof(struct xfrm_userpolicy_type));
@@ -1684,32 +1695,34 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
static int xfrm_dump_policy_done(struct netlink_callback *cb)
{
- struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1];
+ struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
struct net *net = sock_net(cb->skb->sk);
xfrm_policy_walk_done(walk, net);
return 0;
}
+static int xfrm_dump_policy_start(struct netlink_callback *cb)
+{
+ struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
+
+ BUILD_BUG_ON(sizeof(*walk) > sizeof(cb->args));
+
+ xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
+ return 0;
+}
+
static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
- struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1];
+ struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
struct xfrm_dump_info info;
- BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) >
- sizeof(cb->args) - sizeof(cb->args[0]));
-
info.in_skb = cb->skb;
info.out_skb = skb;
info.nlmsg_seq = cb->nlh->nlmsg_seq;
info.nlmsg_flags = NLM_F_MULTI;
- if (!cb->args[0]) {
- cb->args[0] = 1;
- xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
- }
-
(void) xfrm_policy_walk(net, walk, dump_one_policy, &info);
return skb->len;
@@ -1815,8 +1828,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
out:
xfrm_pol_put(xp);
- if (delete && err == 0)
- xfrm_garbage_collect(net);
return err;
}
@@ -1844,9 +1855,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
-static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x)
+static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x)
{
- size_t replay_size = x->replay_esn ?
+ unsigned int replay_size = x->replay_esn ?
xfrm_replay_state_esn_len(x->replay_esn) :
sizeof(struct xfrm_replay_state);
@@ -1869,6 +1880,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
return -EMSGSIZE;
id = nlmsg_data(nlh);
+ memset(&id->sa_id, 0, sizeof(id->sa_id));
memcpy(&id->sa_id.daddr, &x->id.daddr, sizeof(x->id.daddr));
id->sa_id.spi = x->id.spi;
id->sa_id.family = x->props.family;
@@ -1950,8 +1962,9 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
c.seq = nlh->nlmsg_seq;
c.portid = nlh->nlmsg_pid;
- if (build_aevent(r_skb, x, &c) < 0)
- BUG();
+ err = build_aevent(r_skb, x, &c);
+ BUG_ON(err < 0);
+
err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid);
spin_unlock_bh(&x->lock);
xfrm_state_put(x);
@@ -2027,7 +2040,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
return err;
}
- xfrm_garbage_collect(net);
c.data.type = type;
c.event = nlh->nlmsg_type;
@@ -2315,8 +2327,8 @@ static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff
return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk);
}
-static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma,
- int with_encp)
+static inline unsigned int xfrm_migrate_msgsize(int num_migrate, int with_kma,
+ int with_encp)
{
return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id))
+ (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0)
@@ -2379,6 +2391,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
{
struct net *net = &init_net;
struct sk_buff *skb;
+ int err;
skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k, !!encap),
GFP_ATOMIC);
@@ -2386,8 +2399,8 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
return -ENOMEM;
/* build migrate */
- if (build_migrate(skb, m, num_migrate, k, sel, encap, dir, type) < 0)
- BUG();
+ err = build_migrate(skb, m, num_migrate, k, sel, encap, dir, type);
+ BUG_ON(err < 0);
return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE);
}
@@ -2457,6 +2470,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_PROTO] = { .type = NLA_U8 },
[XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
[XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) },
+ [XFRMA_OUTPUT_MARK] = { .len = NLA_U32 },
};
static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
@@ -2466,6 +2480,7 @@ static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
static const struct xfrm_link {
int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
+ int (*start)(struct netlink_callback *);
int (*dump)(struct sk_buff *, struct netlink_callback *);
int (*done)(struct netlink_callback *);
const struct nla_policy *nla_pol;
@@ -2479,6 +2494,7 @@ static const struct xfrm_link {
[XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy },
[XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy },
[XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy,
+ .start = xfrm_dump_policy_start,
.dump = xfrm_dump_policy,
.done = xfrm_dump_policy_done },
[XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi },
@@ -2531,6 +2547,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct netlink_dump_control c = {
+ .start = link->start,
.dump = link->dump,
.done = link->done,
};
@@ -2559,7 +2576,7 @@ static void xfrm_netlink_rcv(struct sk_buff *skb)
mutex_unlock(&net->xfrm.xfrm_cfg_mutex);
}
-static inline size_t xfrm_expire_msgsize(void)
+static inline unsigned int xfrm_expire_msgsize(void)
{
return NLMSG_ALIGN(sizeof(struct xfrm_user_expire))
+ nla_total_size(sizeof(struct xfrm_mark));
@@ -2578,6 +2595,8 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
ue = nlmsg_data(nlh);
copy_to_user_state(x, &ue->state);
ue->hard = (c->data.hard != 0) ? 1 : 0;
+ /* clear the padding bytes */
+ memset(&ue->hard + 1, 0, sizeof(*ue) - offsetofend(typeof(*ue), hard));
err = xfrm_mark_put(skb, &x->mark);
if (err)
@@ -2608,13 +2627,14 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event
{
struct net *net = xs_net(x);
struct sk_buff *skb;
+ int err;
skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
- if (build_aevent(skb, x, c) < 0)
- BUG();
+ err = build_aevent(skb, x, c);
+ BUG_ON(err < 0);
return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS);
}
@@ -2645,9 +2665,9 @@ static int xfrm_notify_sa_flush(const struct km_event *c)
return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA);
}
-static inline size_t xfrm_sa_len(struct xfrm_state *x)
+static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
{
- size_t l = 0;
+ unsigned int l = 0;
if (x->aead)
l += nla_total_size(aead_len(x->aead));
if (x->aalg) {
@@ -2676,6 +2696,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
l += nla_total_size(sizeof(x->props.extra_flags));
if (x->xso.dev)
l += nla_total_size(sizeof(x->xso));
+ if (x->props.output_mark)
+ l += nla_total_size(sizeof(x->props.output_mark));
/* Must count x->lastused as it may become non-zero behind our back. */
l += nla_total_size_64bit(sizeof(u64));
@@ -2690,8 +2712,9 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
struct xfrm_usersa_id *id;
struct nlmsghdr *nlh;
struct sk_buff *skb;
- int len = xfrm_sa_len(x);
- int headlen, err;
+ unsigned int len = xfrm_sa_len(x);
+ unsigned int headlen;
+ int err;
headlen = sizeof(*p);
if (c->event == XFRM_MSG_DELSA) {
@@ -2715,6 +2738,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
struct nlattr *attr;
id = nlmsg_data(nlh);
+ memset(id, 0, sizeof(*id));
memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr));
id->spi = x->id.spi;
id->family = x->props.family;
@@ -2764,8 +2788,8 @@ static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c
}
-static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x,
- struct xfrm_policy *xp)
+static inline unsigned int xfrm_acquire_msgsize(struct xfrm_state *x,
+ struct xfrm_policy *xp)
{
return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire))
+ nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
@@ -2817,13 +2841,14 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
{
struct net *net = xs_net(x);
struct sk_buff *skb;
+ int err;
skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
- if (build_acquire(skb, x, xt, xp) < 0)
- BUG();
+ err = build_acquire(skb, x, xt, xp);
+ BUG_ON(err < 0);
return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE);
}
@@ -2888,7 +2913,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
return xp;
}
-static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp)
+static inline unsigned int xfrm_polexpire_msgsize(struct xfrm_policy *xp)
{
return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire))
+ nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
@@ -2932,26 +2957,28 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct
{
struct net *net = xp_net(xp);
struct sk_buff *skb;
+ int err;
skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
- if (build_polexpire(skb, xp, dir, c) < 0)
- BUG();
+ err = build_polexpire(skb, xp, dir, c);
+ BUG_ON(err < 0);
return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);
}
static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
{
- int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
+ unsigned int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
struct net *net = xp_net(xp);
struct xfrm_userpolicy_info *p;
struct xfrm_userpolicy_id *id;
struct nlmsghdr *nlh;
struct sk_buff *skb;
- int headlen, err;
+ unsigned int headlen;
+ int err;
headlen = sizeof(*p);
if (c->event == XFRM_MSG_DELPOLICY) {
@@ -3058,7 +3085,7 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct
}
-static inline size_t xfrm_report_msgsize(void)
+static inline unsigned int xfrm_report_msgsize(void)
{
return NLMSG_ALIGN(sizeof(struct xfrm_user_report));
}
@@ -3092,18 +3119,19 @@ static int xfrm_send_report(struct net *net, u8 proto,
struct xfrm_selector *sel, xfrm_address_t *addr)
{
struct sk_buff *skb;
+ int err;
skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
- if (build_report(skb, proto, sel, addr) < 0)
- BUG();
+ err = build_report(skb, proto, sel, addr);
+ BUG_ON(err < 0);
return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT);
}
-static inline size_t xfrm_mapping_msgsize(void)
+static inline unsigned int xfrm_mapping_msgsize(void)
{
return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping));
}
@@ -3139,6 +3167,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
{
struct net *net = xs_net(x);
struct sk_buff *skb;
+ int err;
if (x->id.proto != IPPROTO_ESP)
return -EINVAL;
@@ -3150,8 +3179,8 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
if (skb == NULL)
return -ENOMEM;
- if (build_mapping(skb, x, ipaddr, sport) < 0)
- BUG();
+ err = build_mapping(skb, x, ipaddr, sport);
+ BUG_ON(err < 0);
return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING);
}